youtube_dlc/extractor/vlive.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6 import itertools
   7
   8 from .common import InfoExtractor
   9 from .naver import NaverBaseIE
  10 from ..compat import compat_str
  11 from ..utils import (
  12     ExtractorError,
  13     merge_dicts,
  14     try_get,
  15     urlencode_postdata,
  16 )
  17
  18
  19 class VLiveIE(NaverBaseIE):
  20     IE_NAME = 'vlive'
  21     _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)'
  22     _NETRC_MACHINE = 'vlive'
  23     _TESTS = [{
  24         'url': 'https://www.vlive.tv/video/1326',
  25         'md5': 'cc7314812855ce56de70a06a27314983',
  26         'info_dict': {
  27             'id': '1326',
  28             'ext': 'mp4',
  29             'title': "[V LIVE] Girl's Day's Broadcast",
  30             'creator': "Girl's Day",
  31             'view_count': int,
  32             'uploader_id': 'muploader_a',
  33         },
  34     },
  35         {
  36         'url': 'https://vlive.tv/post/1-18244258',
  37         'md5': 'cc7314812855ce56de70a06a27314983',
  38         'info_dict': {
  39             'id': '1326',
  40             'ext': 'mp4',
  41             'title': "[V LIVE] Girl's Day's Broadcast",
  42             'creator': "Girl's Day",
  43             'view_count': int,
  44             'uploader_id': 'muploader_a',
  45         },
  46     },
  47         {
  48         'url': 'https://www.vlive.tv/video/16937',
  49         'info_dict': {
  50             'id': '16937',
  51             'ext': 'mp4',
  52             'title': '[V LIVE] 첸백시 걍방',
  53             'creator': 'EXO',
  54             'view_count': int,
  55             'subtitles': 'mincount:12',
  56             'uploader_id': 'muploader_j',
  57         },
  58         'params': {
  59             'skip_download': True,
  60         },
  61     }, {
  62         'url': 'https://www.vlive.tv/video/129100',
  63         'md5': 'ca2569453b79d66e5b919e5d308bff6b',
  64         'info_dict': {
  65             'id': '129100',
  66             'ext': 'mp4',
  67             'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
  68             'creator': 'BTS+',
  69             'view_count': int,
  70             'subtitles': 'mincount:10',
  71         },
  72         'skip': 'This video is only available for CH+ subscribers',
  73     }]
  74
  75     @classmethod
  76     def suitable(cls, url):
  77         return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
  78
  79     def _real_initialize(self):
  80         self._login()
  81
  82     def _login(self):
  83         email, password = self._get_login_info()
  84         if None in (email, password):
  85             return
  86
  87         def is_logged_in():
  88             login_info = self._download_json(
  89                 'https://www.vlive.tv/auth/loginInfo', None,
  90                 note='Downloading login info',
  91                 headers={'Referer': 'https://www.vlive.tv/home'})
  92             return try_get(
  93                 login_info, lambda x: x['message']['login'], bool) or False
  94
  95         LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
  96         self._request_webpage(
  97             LOGIN_URL, None, note='Downloading login cookies')
  98
  99         self._download_webpage(
 100             LOGIN_URL, None, note='Logging in',
 101             data=urlencode_postdata({'email': email, 'pwd': password}),
 102             headers={
 103                 'Referer': LOGIN_URL,
 104                 'Content-Type': 'application/x-www-form-urlencoded'
 105             })
 106
 107         if not is_logged_in():
 108             raise ExtractorError('Unable to log in', expected=True)
 109
 110     def _real_extract(self, url):
 111         # url may match on a post or a video url with a post_id potentially matching a video_id
 112         working_id = self._match_id(url)
 113         webpage = self._download_webpage(url, working_id)
 114
 115         PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>'
 116         PARAMS_FIELD = 'params'
 117
 118         params = self._search_regex(
 119             PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL)
 120         params = self._parse_json(params, working_id, fatal=False)
 121
 122         video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict)
 123
 124         if video_params is None:
 125             error = try_get(params, lambda x: x["postDetail"]["error"], dict)
 126             error_data = try_get(error, lambda x: x["data"], dict)
 127             error_video = try_get(error_data, lambda x: x["officialVideo"], dict)
 128             error_msg = try_get(error, lambda x: x["message"], compat_str)
 129             product_type = try_get(error_data,
 130                                    [lambda x: x["officialVideo"]["productType"],
 131                                     lambda x: x["board"]["boardType"]],
 132                                    compat_str)
 133
 134             if error_video is not None:
 135                 if product_type in ('VLIVE_PLUS', 'VLIVE+'):
 136                     self.raise_login_required('This video is only available with V LIVE+.')
 137                 elif error_msg is not None:
 138                     raise ExtractorError('V LIVE reported the following error: %s' % error_msg)
 139                 else:
 140                     raise ExtractorError('Failed to extract video parameters.')
 141             elif 'post' in url:
 142                 raise ExtractorError('Url does not appear to be a video post.', expected=True)
 143             else:
 144                 raise ExtractorError('Failed to extract video parameters.')
 145
 146         video_id = working_id if 'video' in url else str(video_params["videoSeq"])
 147
 148         video_type = video_params["type"]
 149         if video_type in ('VOD'):
 150             encoding_status = video_params["encodingStatus"]
 151             if encoding_status == 'COMPLETE':
 152                 return self._replay(video_id, webpage, params, video_params)
 153             else:
 154                 raise ExtractorError('VOD encoding not yet complete. Please try again later.',
 155                                      expected=True)
 156         elif video_type in ('LIVE'):
 157             video_status = video_params["status"]
 158             if video_status in ('RESERVED'):
 159                 raise ExtractorError('Coming soon!', expected=True)
 160             elif video_status in ('ENDED', 'END'):
 161                 raise ExtractorError('Uploading for replay. Please wait...', expected=True)
 162             else:
 163                 return self._live(video_id, webpage, params)
 164         else:
 165             raise ExtractorError('Unknown video type %s' % video_type)
 166
 167     def _get_common_fields(self, webpage, params):
 168         title = self._og_search_title(webpage)
 169         description = self._html_search_meta(
 170             ['og:description', 'description', 'twitter:description'],
 171             webpage, 'description', default=None)
 172         creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
 173                    or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
 174         thumbnail = self._og_search_thumbnail(webpage)
 175         return {
 176             'title': title,
 177             'creator': creator,
 178             'thumbnail': thumbnail,
 179         }
 180
 181     def _live(self, video_id, webpage, params):
 182         LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id
 183         play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id,
 184                                         headers={"referer": "https://www.vlive.tv"})
 185
 186         streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or []
 187
 188         formats = []
 189         for stream in streams:
 190             formats.extend(self._extract_m3u8_formats(
 191                 stream['serviceUrl'], video_id, 'mp4',
 192                 fatal=False, live=True))
 193         self._sort_formats(formats)
 194
 195         info = self._get_common_fields(webpage, params)
 196         info.update({
 197             'title': self._live_title(info['title']),
 198             'id': video_id,
 199             'formats': formats,
 200             'is_live': True,
 201         })
 202         return info
 203
 204     def _replay(self, video_id, webpage, params, video_params):
 205         long_video_id = video_params["vodId"]
 206
 207         VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id
 208         key_json = self._download_json(VOD_KEY_ENDPOINT, video_id,
 209                                        headers={"referer": "https://www.vlive.tv"})
 210         key = key_json["inkey"]
 211
 212         return merge_dicts(
 213             self._get_common_fields(webpage, params),
 214             self._extract_video_info(video_id, long_video_id, key))
 215
 216
 217 class VLiveChannelIE(InfoExtractor):
 218     IE_NAME = 'vlive:channel'
 219     _VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)'
 220     _TESTS = [{
 221         'url': 'https://channels.vlive.tv/FCD4B',
 222         'info_dict': {
 223             'id': 'FCD4B',
 224             'title': 'MAMAMOO',
 225         },
 226         'playlist_mincount': 110
 227     }, {
 228         'url': 'https://www.vlive.tv/channel/FCD4B',
 229         'info_dict': {
 230             'id': 'FCD4B',
 231             'title': 'MAMAMOO',
 232         },
 233         'playlist_mincount': 110
 234     }]
 235     _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
 236
 237     def _real_extract(self, url):
 238         channel_code = self._match_id(url)
 239
 240         webpage = self._download_webpage(
 241             'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
 242
 243         app_id = None
 244
 245         app_js_url = self._search_regex(
 246             r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
 247             webpage, 'app js', default=None, group='url')
 248
 249         if app_js_url:
 250             app_js = self._download_webpage(
 251                 app_js_url, channel_code, 'Downloading app JS', fatal=False)
 252             if app_js:
 253                 app_id = self._search_regex(
 254                     r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
 255                     app_js, 'app id', default=None)
 256
 257         app_id = app_id or self._APP_ID
 258
 259         channel_info = self._download_json(
 260             'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
 261             channel_code, note='Downloading decode channel code',
 262             query={
 263                 'app_id': app_id,
 264                 'channelCode': channel_code,
 265                 '_': int(time.time())
 266             })
 267
 268         channel_seq = channel_info['result']['channelSeq']
 269         channel_name = None
 270         entries = []
 271
 272         for page_num in itertools.count(1):
 273             video_list = self._download_json(
 274                 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
 275                 channel_code, note='Downloading channel list page #%d' % page_num,
 276                 query={
 277                     'app_id': app_id,
 278                     'channelSeq': channel_seq,
 279                     # Large values of maxNumOfRows (~300 or above) may cause
 280                     # empty responses (see [1]), e.g. this happens for [2] that
 281                     # has more than 300 videos.
 282                     # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
 283                     # 2. http://channels.vlive.tv/EDBF.
 284                     'maxNumOfRows': 100,
 285                     '_': int(time.time()),
 286                     'pageNo': page_num
 287                 }
 288             )
 289
 290             if not channel_name:
 291                 channel_name = try_get(
 292                     video_list,
 293                     lambda x: x['result']['channelInfo']['channelName'],
 294                     compat_str)
 295
 296             videos = try_get(
 297                 video_list, lambda x: x['result']['videoList'], list)
 298             if not videos:
 299                 break
 300
 301             for video in videos:
 302                 video_id = video.get('videoSeq')
 303                 if not video_id:
 304                     continue
 305                 video_id = compat_str(video_id)
 306                 entries.append(
 307                     self.url_result(
 308                         'http://www.vlive.tv/video/%s' % video_id,
 309                         ie=VLiveIE.ie_key(), video_id=video_id))
 310
 311         return self.playlist_result(
 312             entries, channel_code, channel_name)
 313
 314
 315 class VLivePlaylistIE(InfoExtractor):
 316     IE_NAME = 'vlive:playlist'
 317     _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
 318     _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
 319     _TESTS = [{
 320         # regular working playlist
 321         'url': 'https://www.vlive.tv/video/117956/playlist/117963',
 322         'info_dict': {
 323             'id': '117963',
 324             'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
 325         },
 326         'playlist_mincount': 10
 327     }, {
 328         # playlist with no playlistVideoSeqs
 329         'url': 'http://www.vlive.tv/video/22867/playlist/22912',
 330         'info_dict': {
 331             'id': '22867',
 332             'ext': 'mp4',
 333             'title': '[V LIVE] Valentine Day Message from MINA',
 334             'creator': 'TWICE',
 335             'view_count': int
 336         },
 337         'params': {
 338             'skip_download': True,
 339         }
 340     }]
 341
 342     def _build_video_result(self, video_id, message):
 343         self.to_screen(message)
 344         return self.url_result(
 345             self._VIDEO_URL_TEMPLATE % video_id,
 346             ie=VLiveIE.ie_key(), video_id=video_id)
 347
 348     def _real_extract(self, url):
 349         mobj = re.match(self._VALID_URL, url)
 350         video_id, playlist_id = mobj.group('video_id', 'id')
 351
 352         if self._downloader.params.get('noplaylist'):
 353             return self._build_video_result(
 354                 video_id,
 355                 'Downloading just video %s because of --no-playlist'
 356                 % video_id)
 357
 358         self.to_screen(
 359             'Downloading playlist %s - add --no-playlist to just download video'
 360             % playlist_id)
 361
 362         webpage = self._download_webpage(
 363             'http://www.vlive.tv/video/%s/playlist/%s'
 364             % (video_id, playlist_id), playlist_id)
 365
 366         raw_item_ids = self._search_regex(
 367             r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
 368             'playlist video seqs', default=None, fatal=False)
 369
 370         if not raw_item_ids:
 371             return self._build_video_result(
 372                 video_id,
 373                 'Downloading just video %s because no playlist was found'
 374                 % video_id)
 375
 376         item_ids = self._parse_json(raw_item_ids, playlist_id)
 377
 378         entries = [
 379             self.url_result(
 380                 self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
 381                 video_id=compat_str(item_id))
 382             for item_id in item_ids]
 383
 384         playlist_name = self._html_search_regex(
 385             r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
 386             webpage, 'playlist title', fatal=False)
 387
 388         return self.playlist_result(entries, playlist_id, playlist_name)