]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/vlive.py
[vlive] add: support new channel url format
[yt-dlp.git] / youtube_dlc / extractor / vlive.py
CommitLineData
061f62da 1# coding: utf-8
25bcd355 2from __future__ import unicode_literals
061f62da 3
b24d6336 4import re
b92d3c53 5import time
6import itertools
9d186afa 7
061f62da 8from .common import InfoExtractor
c88debff
RA
9from .naver import NaverBaseIE
10from ..compat import compat_str
061f62da 11from ..utils import (
9d186afa 12 ExtractorError,
c88debff 13 merge_dicts,
661cc229 14 try_get,
89c63cc5 15 urlencode_postdata,
061f62da 16)
061f62da 17
18
c88debff 19class VLiveIE(NaverBaseIE):
061f62da 20 IE_NAME = 'vlive'
5dcfd250 21 _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)'
01b517a2 22 _NETRC_MACHINE = 'vlive'
58355a3b 23 _TESTS = [{
5dcfd250 24 'url': 'https://www.vlive.tv/video/1326',
061f62da 25 'md5': 'cc7314812855ce56de70a06a27314983',
26 'info_dict': {
27 'id': '1326',
28 'ext': 'mp4',
25bcd355 29 'title': "[V LIVE] Girl's Day's Broadcast",
52f5889f
S
30 'creator': "Girl's Day",
31 'view_count': int,
c88debff 32 'uploader_id': 'muploader_a',
061f62da 33 },
5dcfd250 34 },
35 {
36 'url': 'https://vlive.tv/post/1-18244258',
37 'md5': 'cc7314812855ce56de70a06a27314983',
38 'info_dict': {
39 'id': '1326',
40 'ext': 'mp4',
41 'title': "[V LIVE] Girl's Day's Broadcast",
42 'creator': "Girl's Day",
43 'view_count': int,
44 'uploader_id': 'muploader_a',
45 },
46 },
47 {
48 'url': 'https://www.vlive.tv/video/16937',
58355a3b
S
49 'info_dict': {
50 'id': '16937',
51 'ext': 'mp4',
52 'title': '[V LIVE] 첸백시 걍방',
53 'creator': 'EXO',
54 'view_count': int,
55 'subtitles': 'mincount:12',
c88debff 56 'uploader_id': 'muploader_j',
58355a3b
S
57 },
58 'params': {
59 'skip_download': True,
60 },
01b517a2 61 }, {
62 'url': 'https://www.vlive.tv/video/129100',
63 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
64 'info_dict': {
65 'id': '129100',
66 'ext': 'mp4',
4831ef7f
S
67 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
68 'creator': 'BTS+',
01b517a2 69 'view_count': int,
70 'subtitles': 'mincount:10',
71 },
72 'skip': 'This video is only available for CH+ subscribers',
58355a3b 73 }]
061f62da 74
b71c18b4 75 @classmethod
76 def suitable(cls, url):
77 return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
78
01b517a2 79 def _real_initialize(self):
80 self._login()
81
82 def _login(self):
83 email, password = self._get_login_info()
84 if None in (email, password):
85 return
86
87 def is_logged_in():
88 login_info = self._download_json(
89 'https://www.vlive.tv/auth/loginInfo', None,
90 note='Downloading login info',
91 headers={'Referer': 'https://www.vlive.tv/home'})
ef19739e
S
92 return try_get(
93 login_info, lambda x: x['message']['login'], bool) or False
01b517a2 94
95 LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
ef19739e
S
96 self._request_webpage(
97 LOGIN_URL, None, note='Downloading login cookies')
01b517a2 98
99 self._download_webpage(
100 LOGIN_URL, None, note='Logging in',
101 data=urlencode_postdata({'email': email, 'pwd': password}),
102 headers={
103 'Referer': LOGIN_URL,
104 'Content-Type': 'application/x-www-form-urlencoded'
105 })
106
107 if not is_logged_in():
108 raise ExtractorError('Unable to log in', expected=True)
109
061f62da 110 def _real_extract(self, url):
5dcfd250 111 # url may match on a post or a video url with a post_id potentially matching a video_id
112 working_id = self._match_id(url)
113 webpage = self._download_webpage(url, working_id)
0536e60b 114
115 PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>'
116 PARAMS_FIELD = 'params'
117
118 params = self._search_regex(
119 PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL)
5dcfd250 120 params = self._parse_json(params, working_id, fatal=False)
0536e60b 121
5dcfd250 122 video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"])
0536e60b 123 if video_params is None:
5dcfd250 124 if 'post' in url:
125 raise ExtractorError('Url does not appear to be a video post.')
126 else:
127 raise ExtractorError('Failed to extract video parameters.')
0536e60b 128
5dcfd250 129 video_id = working_id if 'video' in url else str(video_params["videoSeq"])
0536e60b 130 long_video_id = video_params["vodId"]
131 video_type = video_params["type"]
5dcfd250 132
133 VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id
134 key_json = self._download_json(VOD_KEY_ENDPOINT, video_id,
0536e60b 135 headers={"referer": "https://www.vlive.tv"})
136 key = key_json["inkey"]
137
138 if video_type in ('VOD'):
139 encoding_status = video_params["encodingStatus"]
140 if encoding_status == 'COMPLETE':
141 return self._replay(video_id, webpage, long_video_id, key, params)
142 else:
143 raise ExtractorError('VOD encoding not yet complete. Please try again later.',
144 expected=True)
145 elif video_type in ('LIVE'):
146 video_status = video_params["status"]
147 if video_status == 'RESERVED':
148 raise ExtractorError('Coming soon!', expected=True)
149 else:
150 return self._live(video_id, webpage, params)
b24d6336 151 else:
0536e60b 152 raise ExtractorError('Unknown video type %s' % video_type)
b24d6336 153
0536e60b 154 def _get_common_fields(self, webpage, params):
061f62da 155 title = self._og_search_title(webpage)
0536e60b 156 description = self._html_search_meta(
157 ['og:description', 'description', 'twitter:description'],
158 webpage, 'description', default=None)
159 creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
160 or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
b24d6336
KH
161 thumbnail = self._og_search_thumbnail(webpage)
162 return {
163 'title': title,
164 'creator': creator,
165 'thumbnail': thumbnail,
166 }
08354db4 167
0536e60b 168 def _live(self, video_id, webpage, params):
01b517a2 169 init_page = self._download_init_page(video_id)
57774807
CN
170
171 live_params = self._search_regex(
172 r'"liveStreamInfo"\s*:\s*(".*"),',
89c63cc5 173 init_page, 'live stream info')
57774807
CN
174 live_params = self._parse_json(live_params, video_id)
175 live_params = self._parse_json(live_params, video_id)
176
b24d6336
KH
177 formats = []
178 for vid in live_params.get('resolutions', []):
179 formats.extend(self._extract_m3u8_formats(
180 vid['cdnUrl'], video_id, 'mp4',
181 m3u8_id=vid.get('name'),
182 fatal=False, live=True))
183 self._sort_formats(formats)
184
0536e60b 185 info = self._get_common_fields(webpage, params)
069f9183
S
186 info.update({
187 'title': self._live_title(info['title']),
188 'id': video_id,
189 'formats': formats,
190 'is_live': True,
191 })
192 return info
b24d6336 193
0536e60b 194 def _replay(self, video_id, webpage, long_video_id, key, params):
01b517a2 195 if '' in (long_video_id, key):
196 init_page = self._download_init_page(video_id)
197 video_info = self._parse_json(self._search_regex(
ef19739e
S
198 (r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
199 r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
01b517a2 200 video_id)
ef19739e 201 if video_info.get('status') == 'NEED_CHANNEL_PLUS':
01b517a2 202 self.raise_login_required(
203 'This video is only available for CH+ subscribers')
204 long_video_id, key = video_info['vid'], video_info['inkey']
205
c88debff 206 return merge_dicts(
0536e60b 207 self._get_common_fields(webpage, params),
c88debff 208 self._extract_video_info(video_id, long_video_id, key))
b92d3c53 209
01b517a2 210 def _download_init_page(self, video_id):
211 return self._download_webpage(
212 'https://www.vlive.tv/video/init/view',
213 video_id, note='Downloading live webpage',
214 data=urlencode_postdata({'videoSeq': video_id}),
215 headers={
216 'Referer': 'https://www.vlive.tv/video/%s' % video_id,
217 'Content-Type': 'application/x-www-form-urlencoded'
218 })
219
b92d3c53 220
221class VLiveChannelIE(InfoExtractor):
222 IE_NAME = 'vlive:channel'
1923b146 223 _VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)'
224 _TESTS = [{
225 'url': 'https://channels.vlive.tv/FCD4B',
226 'info_dict': {
227 'id': 'FCD4B',
228 'title': 'MAMAMOO',
229 },
230 'playlist_mincount': 110
231 }, {
232 'url': 'https://www.vlive.tv/channel/FCD4B',
b92d3c53 233 'info_dict': {
234 'id': 'FCD4B',
235 'title': 'MAMAMOO',
236 },
237 'playlist_mincount': 110
1923b146 238 }]
b92d3c53 239 _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
240
241 def _real_extract(self, url):
242 channel_code = self._match_id(url)
243
244 webpage = self._download_webpage(
245 'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
661cc229
S
246
247 app_id = None
248
b92d3c53 249 app_js_url = self._search_regex(
661cc229
S
250 r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
251 webpage, 'app js', default=None, group='url')
b92d3c53 252
253 if app_js_url:
661cc229
S
254 app_js = self._download_webpage(
255 app_js_url, channel_code, 'Downloading app JS', fatal=False)
256 if app_js:
257 app_id = self._search_regex(
258 r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
259 app_js, 'app id', default=None)
260
261 app_id = app_id or self._APP_ID
b92d3c53 262
263 channel_info = self._download_json(
264 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
661cc229
S
265 channel_code, note='Downloading decode channel code',
266 query={
267 'app_id': app_id,
268 'channelCode': channel_code,
269 '_': int(time.time())
270 })
b92d3c53 271
272 channel_seq = channel_info['result']['channelSeq']
273 channel_name = None
274 entries = []
275
276 for page_num in itertools.count(1):
277 video_list = self._download_json(
278 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
661cc229 279 channel_code, note='Downloading channel list page #%d' % page_num,
b92d3c53 280 query={
281 'app_id': app_id,
282 'channelSeq': channel_seq,
f172c86d
S
283 # Large values of maxNumOfRows (~300 or above) may cause
284 # empty responses (see [1]), e.g. this happens for [2] that
285 # has more than 300 videos.
067aa17e 286 # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
f172c86d
S
287 # 2. http://channels.vlive.tv/EDBF.
288 'maxNumOfRows': 100,
b92d3c53 289 '_': int(time.time()),
290 'pageNo': page_num
291 }
292 )
b92d3c53 293
661cc229
S
294 if not channel_name:
295 channel_name = try_get(
296 video_list,
297 lambda x: x['result']['channelInfo']['channelName'],
298 compat_str)
299
300 videos = try_get(
301 video_list, lambda x: x['result']['videoList'], list)
302 if not videos:
b92d3c53 303 break
304
661cc229
S
305 for video in videos:
306 video_id = video.get('videoSeq')
307 if not video_id:
308 continue
309 video_id = compat_str(video_id)
b92d3c53 310 entries.append(
311 self.url_result(
661cc229
S
312 'http://www.vlive.tv/video/%s' % video_id,
313 ie=VLiveIE.ie_key(), video_id=video_id))
b92d3c53 314
315 return self.playlist_result(
316 entries, channel_code, channel_name)
b71c18b4 317
318
319class VLivePlaylistIE(InfoExtractor):
320 IE_NAME = 'vlive:playlist'
321 _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
178663df 322 _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
323 _TESTS = [{
4831ef7f 324 # regular working playlist
178663df 325 'url': 'https://www.vlive.tv/video/117956/playlist/117963',
326 'info_dict': {
327 'id': '117963',
328 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
329 },
330 'playlist_mincount': 10
331 }, {
4831ef7f 332 # playlist with no playlistVideoSeqs
b71c18b4 333 'url': 'http://www.vlive.tv/video/22867/playlist/22912',
334 'info_dict': {
178663df 335 'id': '22867',
336 'ext': 'mp4',
337 'title': '[V LIVE] Valentine Day Message from MINA',
4831ef7f 338 'creator': 'TWICE',
178663df 339 'view_count': int
b71c18b4 340 },
178663df 341 'params': {
342 'skip_download': True,
343 }
344 }]
345
346 def _build_video_result(self, video_id, message):
347 self.to_screen(message)
348 return self.url_result(
349 self._VIDEO_URL_TEMPLATE % video_id,
350 ie=VLiveIE.ie_key(), video_id=video_id)
b71c18b4 351
352 def _real_extract(self, url):
e3cd1fcd
S
353 mobj = re.match(self._VALID_URL, url)
354 video_id, playlist_id = mobj.group('video_id', 'id')
b71c18b4 355
b71c18b4 356 if self._downloader.params.get('noplaylist'):
178663df 357 return self._build_video_result(
358 video_id,
359 'Downloading just video %s because of --no-playlist'
360 % video_id)
b71c18b4 361
362 self.to_screen(
e3cd1fcd
S
363 'Downloading playlist %s - add --no-playlist to just download video'
364 % playlist_id)
b71c18b4 365
366 webpage = self._download_webpage(
e3cd1fcd
S
367 'http://www.vlive.tv/video/%s/playlist/%s'
368 % (video_id, playlist_id), playlist_id)
b71c18b4 369
178663df 370 raw_item_ids = self._search_regex(
371 r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
372 'playlist video seqs', default=None, fatal=False)
373
374 if not raw_item_ids:
375 return self._build_video_result(
376 video_id,
377 'Downloading just video %s because no playlist was found'
378 % video_id)
379
380 item_ids = self._parse_json(raw_item_ids, playlist_id)
b71c18b4 381
e3cd1fcd
S
382 entries = [
383 self.url_result(
178663df 384 self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
e3cd1fcd
S
385 video_id=compat_str(item_id))
386 for item_id in item_ids]
b71c18b4 387
e3cd1fcd
S
388 playlist_name = self._html_search_regex(
389 r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
390 webpage, 'playlist title', fatal=False)
b71c18b4 391
e3cd1fcd 392 return self.playlist_result(entries, playlist_id, playlist_name)