]>
Commit | Line | Data |
---|---|---|
061f62da | 1 | # coding: utf-8 |
25bcd355 | 2 | from __future__ import unicode_literals |
061f62da | 3 | |
b24d6336 | 4 | import re |
b92d3c53 | 5 | import time |
6 | import itertools | |
9d186afa | 7 | |
061f62da | 8 | from .common import InfoExtractor |
19a107f2 AG |
9 | from ..compat import ( |
10 | compat_urllib_parse_urlencode, | |
11 | compat_str, | |
12 | ) | |
061f62da | 13 | from ..utils import ( |
19a107f2 | 14 | dict_get, |
9d186afa | 15 | ExtractorError, |
19a107f2 AG |
16 | float_or_none, |
17 | int_or_none, | |
345dec93 | 18 | remove_start, |
661cc229 | 19 | try_get, |
89c63cc5 | 20 | urlencode_postdata, |
061f62da | 21 | ) |
061f62da | 22 | |
23 | ||
19a107f2 | 24 | class VLiveIE(InfoExtractor): |
061f62da | 25 | IE_NAME = 'vlive' |
52f5889f | 26 | _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' |
01b517a2 | 27 | _NETRC_MACHINE = 'vlive' |
58355a3b | 28 | _TESTS = [{ |
b8b465af | 29 | 'url': 'http://www.vlive.tv/video/1326', |
061f62da | 30 | 'md5': 'cc7314812855ce56de70a06a27314983', |
31 | 'info_dict': { | |
32 | 'id': '1326', | |
33 | 'ext': 'mp4', | |
25bcd355 | 34 | 'title': "[V LIVE] Girl's Day's Broadcast", |
52f5889f S |
35 | 'creator': "Girl's Day", |
36 | 'view_count': int, | |
061f62da | 37 | }, |
58355a3b S |
38 | }, { |
39 | 'url': 'http://www.vlive.tv/video/16937', | |
40 | 'info_dict': { | |
41 | 'id': '16937', | |
42 | 'ext': 'mp4', | |
43 | 'title': '[V LIVE] 첸백시 걍방', | |
44 | 'creator': 'EXO', | |
45 | 'view_count': int, | |
46 | 'subtitles': 'mincount:12', | |
47 | }, | |
48 | 'params': { | |
49 | 'skip_download': True, | |
50 | }, | |
01b517a2 | 51 | }, { |
52 | 'url': 'https://www.vlive.tv/video/129100', | |
53 | 'md5': 'ca2569453b79d66e5b919e5d308bff6b', | |
54 | 'info_dict': { | |
55 | 'id': '129100', | |
56 | 'ext': 'mp4', | |
4831ef7f S |
57 | 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene', |
58 | 'creator': 'BTS+', | |
01b517a2 | 59 | 'view_count': int, |
60 | 'subtitles': 'mincount:10', | |
61 | }, | |
62 | 'skip': 'This video is only available for CH+ subscribers', | |
58355a3b | 63 | }] |
061f62da | 64 | |
b71c18b4 | 65 | @classmethod |
66 | def suitable(cls, url): | |
67 | return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url) | |
68 | ||
01b517a2 | 69 | def _real_initialize(self): |
70 | self._login() | |
71 | ||
72 | def _login(self): | |
73 | email, password = self._get_login_info() | |
74 | if None in (email, password): | |
75 | return | |
76 | ||
77 | def is_logged_in(): | |
78 | login_info = self._download_json( | |
79 | 'https://www.vlive.tv/auth/loginInfo', None, | |
80 | note='Downloading login info', | |
81 | headers={'Referer': 'https://www.vlive.tv/home'}) | |
ef19739e S |
82 | return try_get( |
83 | login_info, lambda x: x['message']['login'], bool) or False | |
01b517a2 | 84 | |
85 | LOGIN_URL = 'https://www.vlive.tv/auth/email/login' | |
ef19739e S |
86 | self._request_webpage( |
87 | LOGIN_URL, None, note='Downloading login cookies') | |
01b517a2 | 88 | |
89 | self._download_webpage( | |
90 | LOGIN_URL, None, note='Logging in', | |
91 | data=urlencode_postdata({'email': email, 'pwd': password}), | |
92 | headers={ | |
93 | 'Referer': LOGIN_URL, | |
94 | 'Content-Type': 'application/x-www-form-urlencoded' | |
95 | }) | |
96 | ||
97 | if not is_logged_in(): | |
98 | raise ExtractorError('Unable to log in', expected=True) | |
99 | ||
061f62da | 100 | def _real_extract(self, url): |
101 | video_id = self._match_id(url) | |
102 | ||
103 | webpage = self._download_webpage( | |
973b6cee | 104 | 'https://www.vlive.tv/video/%s' % video_id, video_id) |
061f62da | 105 | |
89c63cc5 S |
106 | VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)' |
107 | VIDEO_PARAMS_FIELD = 'video params' | |
57774807 | 108 | |
89c63cc5 S |
109 | params = self._parse_json(self._search_regex( |
110 | VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id, | |
111 | transform_source=lambda s: '[' + s + ']', fatal=False) | |
112 | ||
113 | if not params or len(params) < 7: | |
114 | params = self._search_regex( | |
115 | VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD) | |
116 | params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)] | |
117 | ||
118 | status, long_video_id, key = params[2], params[5], params[6] | |
345dec93 | 119 | status = remove_start(status, 'PRODUCT_') |
b24d6336 | 120 | |
40fcba5e | 121 | if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'): |
57774807 | 122 | return self._live(video_id, webpage) |
40fcba5e | 123 | elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'): |
01b517a2 | 124 | return self._replay(video_id, webpage, long_video_id, key) |
b24d6336 KH |
125 | |
126 | if status == 'LIVE_END': | |
127 | raise ExtractorError('Uploading for replay. Please wait...', | |
128 | expected=True) | |
129 | elif status == 'COMING_SOON': | |
25bcd355 | 130 | raise ExtractorError('Coming soon!', expected=True) |
b24d6336 KH |
131 | elif status == 'CANCELED': |
132 | raise ExtractorError('We are sorry, ' | |
133 | 'but the live broadcast has been canceled.', | |
134 | expected=True) | |
01b517a2 | 135 | elif status == 'ONLY_APP': |
136 | raise ExtractorError('Unsupported video type', expected=True) | |
b24d6336 KH |
137 | else: |
138 | raise ExtractorError('Unknown status %s' % status) | |
139 | ||
140 | def _get_common_fields(self, webpage): | |
061f62da | 141 | title = self._og_search_title(webpage) |
b24d6336 | 142 | creator = self._html_search_regex( |
ef19739e | 143 | r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)', |
b24d6336 KH |
144 | webpage, 'creator', fatal=False) |
145 | thumbnail = self._og_search_thumbnail(webpage) | |
146 | return { | |
147 | 'title': title, | |
148 | 'creator': creator, | |
149 | 'thumbnail': thumbnail, | |
150 | } | |
08354db4 | 151 | |
57774807 | 152 | def _live(self, video_id, webpage): |
01b517a2 | 153 | init_page = self._download_init_page(video_id) |
57774807 CN |
154 | |
155 | live_params = self._search_regex( | |
156 | r'"liveStreamInfo"\s*:\s*(".*"),', | |
89c63cc5 | 157 | init_page, 'live stream info') |
57774807 CN |
158 | live_params = self._parse_json(live_params, video_id) |
159 | live_params = self._parse_json(live_params, video_id) | |
160 | ||
b24d6336 KH |
161 | formats = [] |
162 | for vid in live_params.get('resolutions', []): | |
163 | formats.extend(self._extract_m3u8_formats( | |
164 | vid['cdnUrl'], video_id, 'mp4', | |
165 | m3u8_id=vid.get('name'), | |
166 | fatal=False, live=True)) | |
167 | self._sort_formats(formats) | |
168 | ||
069f9183 S |
169 | info = self._get_common_fields(webpage) |
170 | info.update({ | |
171 | 'title': self._live_title(info['title']), | |
172 | 'id': video_id, | |
173 | 'formats': formats, | |
174 | 'is_live': True, | |
175 | }) | |
176 | return info | |
b24d6336 KH |
177 | |
178 | def _replay(self, video_id, webpage, long_video_id, key): | |
01b517a2 | 179 | if '' in (long_video_id, key): |
180 | init_page = self._download_init_page(video_id) | |
181 | video_info = self._parse_json(self._search_regex( | |
ef19739e S |
182 | (r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script', |
183 | r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'), | |
01b517a2 | 184 | video_id) |
ef19739e | 185 | if video_info.get('status') == 'NEED_CHANNEL_PLUS': |
01b517a2 | 186 | self.raise_login_required( |
187 | 'This video is only available for CH+ subscribers') | |
188 | long_video_id, key = video_info['vid'], video_info['inkey'] | |
189 | ||
19a107f2 AG |
190 | playinfo = self._download_json( |
191 | 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' | |
192 | % compat_urllib_parse_urlencode({ | |
193 | 'videoId': long_video_id, | |
194 | 'key': key, | |
195 | 'ptc': 'http', | |
196 | 'doct': 'json', # document type (xml or json) | |
197 | 'cpt': 'vtt', # captions type (vtt or ttml) | |
198 | }), video_id) | |
199 | ||
200 | formats = [{ | |
201 | 'url': vid['source'], | |
202 | 'format_id': vid.get('encodingOption', {}).get('name'), | |
203 | 'abr': float_or_none(vid.get('bitrate', {}).get('audio')), | |
204 | 'vbr': float_or_none(vid.get('bitrate', {}).get('video')), | |
205 | 'width': int_or_none(vid.get('encodingOption', {}).get('width')), | |
206 | 'height': int_or_none(vid.get('encodingOption', {}).get('height')), | |
207 | 'filesize': int_or_none(vid.get('size')), | |
208 | } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')] | |
209 | self._sort_formats(formats) | |
210 | ||
211 | view_count = int_or_none(playinfo.get('meta', {}).get('count')) | |
212 | ||
213 | subtitles = {} | |
214 | for caption in playinfo.get('captions', {}).get('list', []): | |
215 | lang = dict_get(caption, ('locale', 'language', 'country', 'label')) | |
216 | if lang and caption.get('source'): | |
217 | subtitles[lang] = [{ | |
218 | 'ext': 'vtt', | |
219 | 'url': caption['source']}] | |
220 | ||
221 | info = self._get_common_fields(webpage) | |
222 | info.update({ | |
223 | 'id': video_id, | |
224 | 'formats': formats, | |
225 | 'view_count': view_count, | |
226 | 'subtitles': subtitles, | |
227 | }) | |
228 | return info | |
b92d3c53 | 229 | |
01b517a2 | 230 | def _download_init_page(self, video_id): |
231 | return self._download_webpage( | |
232 | 'https://www.vlive.tv/video/init/view', | |
233 | video_id, note='Downloading live webpage', | |
234 | data=urlencode_postdata({'videoSeq': video_id}), | |
235 | headers={ | |
236 | 'Referer': 'https://www.vlive.tv/video/%s' % video_id, | |
237 | 'Content-Type': 'application/x-www-form-urlencoded' | |
238 | }) | |
239 | ||
b92d3c53 | 240 | |
241 | class VLiveChannelIE(InfoExtractor): | |
242 | IE_NAME = 'vlive:channel' | |
661cc229 | 243 | _VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)' |
b92d3c53 | 244 | _TEST = { |
661cc229 | 245 | 'url': 'http://channels.vlive.tv/FCD4B', |
b92d3c53 | 246 | 'info_dict': { |
247 | 'id': 'FCD4B', | |
248 | 'title': 'MAMAMOO', | |
249 | }, | |
250 | 'playlist_mincount': 110 | |
251 | } | |
252 | _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' | |
253 | ||
254 | def _real_extract(self, url): | |
255 | channel_code = self._match_id(url) | |
256 | ||
257 | webpage = self._download_webpage( | |
258 | 'http://channels.vlive.tv/%s/video' % channel_code, channel_code) | |
661cc229 S |
259 | |
260 | app_id = None | |
261 | ||
b92d3c53 | 262 | app_js_url = self._search_regex( |
661cc229 S |
263 | r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1', |
264 | webpage, 'app js', default=None, group='url') | |
b92d3c53 | 265 | |
266 | if app_js_url: | |
661cc229 S |
267 | app_js = self._download_webpage( |
268 | app_js_url, channel_code, 'Downloading app JS', fatal=False) | |
269 | if app_js: | |
270 | app_id = self._search_regex( | |
271 | r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', | |
272 | app_js, 'app id', default=None) | |
273 | ||
274 | app_id = app_id or self._APP_ID | |
b92d3c53 | 275 | |
276 | channel_info = self._download_json( | |
277 | 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', | |
661cc229 S |
278 | channel_code, note='Downloading decode channel code', |
279 | query={ | |
280 | 'app_id': app_id, | |
281 | 'channelCode': channel_code, | |
282 | '_': int(time.time()) | |
283 | }) | |
b92d3c53 | 284 | |
285 | channel_seq = channel_info['result']['channelSeq'] | |
286 | channel_name = None | |
287 | entries = [] | |
288 | ||
289 | for page_num in itertools.count(1): | |
290 | video_list = self._download_json( | |
291 | 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', | |
661cc229 | 292 | channel_code, note='Downloading channel list page #%d' % page_num, |
b92d3c53 | 293 | query={ |
294 | 'app_id': app_id, | |
295 | 'channelSeq': channel_seq, | |
f172c86d S |
296 | # Large values of maxNumOfRows (~300 or above) may cause |
297 | # empty responses (see [1]), e.g. this happens for [2] that | |
298 | # has more than 300 videos. | |
067aa17e | 299 | # 1. https://github.com/ytdl-org/youtube-dl/issues/13830 |
f172c86d S |
300 | # 2. http://channels.vlive.tv/EDBF. |
301 | 'maxNumOfRows': 100, | |
b92d3c53 | 302 | '_': int(time.time()), |
303 | 'pageNo': page_num | |
304 | } | |
305 | ) | |
b92d3c53 | 306 | |
661cc229 S |
307 | if not channel_name: |
308 | channel_name = try_get( | |
309 | video_list, | |
310 | lambda x: x['result']['channelInfo']['channelName'], | |
311 | compat_str) | |
312 | ||
313 | videos = try_get( | |
314 | video_list, lambda x: x['result']['videoList'], list) | |
315 | if not videos: | |
b92d3c53 | 316 | break |
317 | ||
661cc229 S |
318 | for video in videos: |
319 | video_id = video.get('videoSeq') | |
320 | if not video_id: | |
321 | continue | |
322 | video_id = compat_str(video_id) | |
b92d3c53 | 323 | entries.append( |
324 | self.url_result( | |
661cc229 S |
325 | 'http://www.vlive.tv/video/%s' % video_id, |
326 | ie=VLiveIE.ie_key(), video_id=video_id)) | |
b92d3c53 | 327 | |
328 | return self.playlist_result( | |
329 | entries, channel_code, channel_name) | |
b71c18b4 | 330 | |
331 | ||
332 | class VLivePlaylistIE(InfoExtractor): | |
333 | IE_NAME = 'vlive:playlist' | |
334 | _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)' | |
178663df | 335 | _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' |
336 | _TESTS = [{ | |
4831ef7f | 337 | # regular working playlist |
178663df | 338 | 'url': 'https://www.vlive.tv/video/117956/playlist/117963', |
339 | 'info_dict': { | |
340 | 'id': '117963', | |
341 | 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들' | |
342 | }, | |
343 | 'playlist_mincount': 10 | |
344 | }, { | |
4831ef7f | 345 | # playlist with no playlistVideoSeqs |
b71c18b4 | 346 | 'url': 'http://www.vlive.tv/video/22867/playlist/22912', |
347 | 'info_dict': { | |
178663df | 348 | 'id': '22867', |
349 | 'ext': 'mp4', | |
350 | 'title': '[V LIVE] Valentine Day Message from MINA', | |
4831ef7f | 351 | 'creator': 'TWICE', |
178663df | 352 | 'view_count': int |
b71c18b4 | 353 | }, |
178663df | 354 | 'params': { |
355 | 'skip_download': True, | |
356 | } | |
357 | }] | |
358 | ||
359 | def _build_video_result(self, video_id, message): | |
360 | self.to_screen(message) | |
361 | return self.url_result( | |
362 | self._VIDEO_URL_TEMPLATE % video_id, | |
363 | ie=VLiveIE.ie_key(), video_id=video_id) | |
b71c18b4 | 364 | |
365 | def _real_extract(self, url): | |
e3cd1fcd S |
366 | mobj = re.match(self._VALID_URL, url) |
367 | video_id, playlist_id = mobj.group('video_id', 'id') | |
b71c18b4 | 368 | |
b71c18b4 | 369 | if self._downloader.params.get('noplaylist'): |
178663df | 370 | return self._build_video_result( |
371 | video_id, | |
372 | 'Downloading just video %s because of --no-playlist' | |
373 | % video_id) | |
b71c18b4 | 374 | |
375 | self.to_screen( | |
e3cd1fcd S |
376 | 'Downloading playlist %s - add --no-playlist to just download video' |
377 | % playlist_id) | |
b71c18b4 | 378 | |
379 | webpage = self._download_webpage( | |
e3cd1fcd S |
380 | 'http://www.vlive.tv/video/%s/playlist/%s' |
381 | % (video_id, playlist_id), playlist_id) | |
b71c18b4 | 382 | |
178663df | 383 | raw_item_ids = self._search_regex( |
384 | r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage, | |
385 | 'playlist video seqs', default=None, fatal=False) | |
386 | ||
387 | if not raw_item_ids: | |
388 | return self._build_video_result( | |
389 | video_id, | |
390 | 'Downloading just video %s because no playlist was found' | |
391 | % video_id) | |
392 | ||
393 | item_ids = self._parse_json(raw_item_ids, playlist_id) | |
b71c18b4 | 394 | |
e3cd1fcd S |
395 | entries = [ |
396 | self.url_result( | |
178663df | 397 | self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(), |
e3cd1fcd S |
398 | video_id=compat_str(item_id)) |
399 | for item_id in item_ids] | |
b71c18b4 | 400 | |
e3cd1fcd S |
401 | playlist_name = self._html_search_regex( |
402 | r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', | |
403 | webpage, 'playlist title', fatal=False) | |
b71c18b4 | 404 | |
e3cd1fcd | 405 | return self.playlist_result(entries, playlist_id, playlist_name) |