yt_dlp/extractor/vidio.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     clean_html,
   9     ExtractorError,
  10     get_element_by_class,
  11     int_or_none,
  12     parse_iso8601,
  13     smuggle_url,
  14     str_or_none,
  15     strip_or_none,
  16     try_get,
  17     unsmuggle_url,
  18     urlencode_postdata,
  19 )
  20
  21
  22 class VidioBaseIE(InfoExtractor):
  23     _LOGIN_URL = 'https://www.vidio.com/users/login'
  24     _NETRC_MACHINE = 'vidio'
  25
  26     def _login(self):
  27         username, password = self._get_login_info()
  28         if username is None:
  29             return
  30
  31         def is_logged_in():
  32             res = self._download_json(
  33                 'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
  34             return bool(res.get('current_user'))
  35
  36         if is_logged_in():
  37             return
  38
  39         login_page = self._download_webpage(
  40             self._LOGIN_URL, None, 'Downloading log in page')
  41
  42         login_form = self._form_hidden_inputs("login-form", login_page)
  43         login_form.update({
  44             'user[login]': username,
  45             'user[password]': password,
  46         })
  47         login_post, login_post_urlh = self._download_webpage_handle(
  48             self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
  49
  50         if login_post_urlh.status == 401:
  51             if get_element_by_class('onboarding-content-register-popup__title', login_post):
  52                 raise ExtractorError(
  53                     'Unable to log in: The provided email has not registered yet.', expected=True)
  54
  55             reason = get_element_by_class('onboarding-form__general-error', login_post) or get_element_by_class('onboarding-modal__title', login_post)
  56             if 'Akun terhubung ke' in reason:
  57                 raise ExtractorError(
  58                     'Unable to log in: Your account is linked to a social media account. '
  59                     'Use --cookies to provide account credentials instead', expected=True)
  60             elif reason:
  61                 subreason = get_element_by_class('onboarding-modal__description-text', login_post) or ''
  62                 raise ExtractorError(
  63                     'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True)
  64             raise ExtractorError('Unable to log in')
  65
  66     def _real_initialize(self):
  67         self._api_key = self._download_json(
  68             'https://www.vidio.com/auth', None, data=b'')['api_key']
  69         self._login()
  70
  71     def _call_api(self, url, video_id, note=None):
  72         return self._download_json(url, video_id, note=note, headers={
  73             'Content-Type': 'application/vnd.api+json',
  74             'X-API-KEY': self._api_key,
  75         })
  76
  77
  78 class VidioIE(VidioBaseIE):
  79     _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
  80     _TESTS = [{
  81         'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
  82         'md5': 'cd2801394afc164e9775db6a140b91fe',
  83         'info_dict': {
  84             'id': '165683',
  85             'display_id': 'dj_ambred-booyah-live-2015',
  86             'ext': 'mp4',
  87             'title': 'DJ_AMBRED - Booyah (Live 2015)',
  88             'description': 'md5:27dc15f819b6a78a626490881adbadf8',
  89             'thumbnail': r're:^https?://.*\.jpg$',
  90             'duration': 149,
  91             'like_count': int,
  92             'uploader': 'TWELVE Pic',
  93             'timestamp': 1444902800,
  94             'upload_date': '20151015',
  95             'uploader_id': 'twelvepictures',
  96             'channel': 'Cover Music Video',
  97             'channel_id': '280236',
  98             'view_count': int,
  99             'dislike_count': int,
 100             'comment_count': int,
 101             'tags': 'count:4',
 102         },
 103     }, {
 104         'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
 105         'only_matching': True,
 106     }, {
 107         # Premier-exclusive video
 108         'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
 109         'only_matching': True
 110     }]
 111
 112     def _real_extract(self, url):
 113         match = re.match(self._VALID_URL, url).groupdict()
 114         video_id, display_id = match.get('id'), match.get('display_id')
 115         data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
 116         video = data['videos'][0]
 117         title = video['title'].strip()
 118         is_premium = video.get('is_premium')
 119
 120         if is_premium:
 121             sources = self._download_json(
 122                 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=videos' % video_id,
 123                 display_id, note='Downloading premier API JSON')
 124             if not (sources.get('source') or sources.get('source_dash')):
 125                 self.raise_login_required('This video is only available for registered users with the appropriate subscription')
 126
 127             formats, subs = [], {}
 128             if sources.get('source'):
 129                 hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
 130                     sources['source'], display_id, 'mp4', 'm3u8_native')
 131                 formats.extend(hls_formats)
 132                 subs.update(hls_subs)
 133             if sources.get('source_dash'):  # TODO: Find video example with source_dash
 134                 dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
 135                     sources['source_dash'], display_id, 'dash')
 136                 formats.extend(dash_formats)
 137                 subs.update(dash_subs)
 138         else:
 139             hls_url = data['clips'][0]['hls_url']
 140             formats, subs = self._extract_m3u8_formats_and_subtitles(
 141                 hls_url, display_id, 'mp4', 'm3u8_native')
 142
 143         self._sort_formats(formats)
 144
 145         get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
 146         channel = get_first('channel')
 147         user = get_first('user')
 148         username = user.get('username')
 149         get_count = lambda x: int_or_none(video.get('total_' + x))
 150
 151         return {
 152             'id': video_id,
 153             'display_id': display_id,
 154             'title': title,
 155             'description': strip_or_none(video.get('description')),
 156             'thumbnail': video.get('image_url_medium'),
 157             'duration': int_or_none(video.get('duration')),
 158             'like_count': get_count('likes'),
 159             'formats': formats,
 160             'subtitles': subs,
 161             'uploader': user.get('name'),
 162             'timestamp': parse_iso8601(video.get('created_at')),
 163             'uploader_id': username,
 164             'uploader_url': 'https://www.vidio.com/@' + username if username else None,
 165             'channel': channel.get('name'),
 166             'channel_id': str_or_none(channel.get('id')),
 167             'view_count': get_count('view_count'),
 168             'dislike_count': get_count('dislikes'),
 169             'comment_count': get_count('comments'),
 170             'tags': video.get('tag_list'),
 171         }
 172
 173
 174 class VidioPremierIE(VidioBaseIE):
 175     _VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
 176     _TESTS = [{
 177         'url': 'https://www.vidio.com/premier/2885/badai-pasti-berlalu',
 178         'playlist_mincount': 14,
 179     }, {
 180         # Series with both free and premier-exclusive videos
 181         'url': 'https://www.vidio.com/premier/2567/sosmed',
 182         'only_matching': True,
 183     }]
 184
 185     def _playlist_entries(self, playlist_url, display_id):
 186         index = 1
 187         while playlist_url:
 188             playlist_json = self._call_api(playlist_url, display_id, 'Downloading API JSON page %s' % index)
 189             for video_json in playlist_json.get('data', []):
 190                 link = video_json['links']['watchpage']
 191                 yield self.url_result(link, 'Vidio', video_json['id'])
 192             playlist_url = try_get(playlist_json, lambda x: x['links']['next'])
 193             index += 1
 194
 195     def _real_extract(self, url):
 196         url, idata = unsmuggle_url(url, {})
 197         playlist_id, display_id = re.match(self._VALID_URL, url).groups()
 198
 199         playlist_url = idata.get('url')
 200         if playlist_url:  # Smuggled data contains an API URL. Download only that playlist
 201             playlist_id = idata['id']
 202             return self.playlist_result(
 203                 self._playlist_entries(playlist_url, playlist_id),
 204                 playlist_id=playlist_id, playlist_title=idata.get('title'))
 205
 206         playlist_data = self._call_api('https://api.vidio.com/content_profiles/%s/playlists' % playlist_id, display_id)
 207
 208         return self.playlist_from_matches(
 209             playlist_data.get('data', []), playlist_id=playlist_id, ie=self.ie_key(),
 210             getter=lambda data: smuggle_url(url, {
 211                 'url': data['relationships']['videos']['links']['related'],
 212                 'id': data['id'],
 213                 'title': try_get(data, lambda x: x['attributes']['name'])
 214             }))
 215
 216
 217 class VidioLiveIE(VidioBaseIE):
 218     _VALID_URL = r'https?://(?:www\.)?vidio\.com/live/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
 219     _TESTS = [{
 220         'url': 'https://www.vidio.com/live/204-sctv',
 221         'info_dict': {
 222             'id': '204',
 223             'title': 'SCTV',
 224             'uploader': 'SCTV',
 225             'uploader_id': 'sctv',
 226             'thumbnail': r're:^https?://.*\.jpg$',
 227         },
 228     }, {
 229         # Premier-exclusive livestream
 230         'url': 'https://www.vidio.com/live/6362-tvn',
 231         'only_matching': True,
 232     }, {
 233         # DRM premier-exclusive livestream
 234         'url': 'https://www.vidio.com/live/6299-bein-1',
 235         'only_matching': True,
 236     }]
 237
 238     def _real_extract(self, url):
 239         video_id, display_id = re.match(self._VALID_URL, url).groups()
 240         stream_data = self._call_api(
 241             'https://www.vidio.com/api/livestreamings/%s/detail' % video_id, display_id)
 242         stream_meta = stream_data['livestreamings'][0]
 243         user = stream_data.get('users', [{}])[0]
 244
 245         title = stream_meta.get('title')
 246         username = user.get('username')
 247
 248         formats = []
 249         if stream_meta.get('is_drm'):
 250             if not self.get_param('allow_unplayable_formats'):
 251                 self.raise_no_formats(
 252                     'This video is DRM protected.', expected=True)
 253         if stream_meta.get('is_premium'):
 254             sources = self._download_json(
 255                 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,
 256                 display_id, note='Downloading premier API JSON')
 257             if not (sources.get('source') or sources.get('source_dash')):
 258                 self.raise_login_required('This video is only available for registered users with the appropriate subscription')
 259
 260             if str_or_none(sources.get('source')):
 261                 token_json = self._download_json(
 262                     'https://www.vidio.com/live/%s/tokens' % video_id,
 263                     display_id, note='Downloading HLS token JSON', data=b'')
 264                 formats.extend(self._extract_m3u8_formats(
 265                     sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native'))
 266             if str_or_none(sources.get('source_dash')):
 267                 pass
 268         else:
 269             if stream_meta.get('stream_token_url'):
 270                 token_json = self._download_json(
 271                     'https://www.vidio.com/live/%s/tokens' % video_id,
 272                     display_id, note='Downloading HLS token JSON', data=b'')
 273                 formats.extend(self._extract_m3u8_formats(
 274                     stream_meta['stream_token_url'] + '?' + token_json.get('token', ''),
 275                     display_id, 'mp4', 'm3u8_native'))
 276             if stream_meta.get('stream_dash_url'):
 277                 pass
 278             if stream_meta.get('stream_url'):
 279                 formats.extend(self._extract_m3u8_formats(
 280                     stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
 281         self._sort_formats(formats)
 282
 283         return {
 284             'id': video_id,
 285             'display_id': display_id,
 286             'title': title,
 287             'is_live': True,
 288             'description': strip_or_none(stream_meta.get('description')),
 289             'thumbnail': stream_meta.get('image'),
 290             'like_count': int_or_none(stream_meta.get('like')),
 291             'dislike_count': int_or_none(stream_meta.get('dislike')),
 292             'formats': formats,
 293             'uploader': user.get('name'),
 294             'timestamp': parse_iso8601(stream_meta.get('start_time')),
 295             'uploader_id': username,
 296             'uploader_url': 'https://www.vidio.com/@' + username if username else None,
 297         }