]>
Commit | Line | Data |
---|---|---|
061f62da | 1 | # coding: utf-8 |
25bcd355 | 2 | from __future__ import unicode_literals |
061f62da | 3 | |
b24d6336 | 4 | import re |
b92d3c53 | 5 | import time |
6 | import itertools | |
9d186afa | 7 | |
061f62da | 8 | from .common import InfoExtractor |
c88debff RA |
9 | from .naver import NaverBaseIE |
10 | from ..compat import compat_str | |
061f62da | 11 | from ..utils import ( |
9d186afa | 12 | ExtractorError, |
c88debff | 13 | merge_dicts, |
345dec93 | 14 | remove_start, |
661cc229 | 15 | try_get, |
89c63cc5 | 16 | urlencode_postdata, |
061f62da | 17 | ) |
061f62da | 18 | |
19 | ||
c88debff | 20 | class VLiveIE(NaverBaseIE): |
061f62da | 21 | IE_NAME = 'vlive' |
52f5889f | 22 | _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' |
01b517a2 | 23 | _NETRC_MACHINE = 'vlive' |
58355a3b | 24 | _TESTS = [{ |
b8b465af | 25 | 'url': 'http://www.vlive.tv/video/1326', |
061f62da | 26 | 'md5': 'cc7314812855ce56de70a06a27314983', |
27 | 'info_dict': { | |
28 | 'id': '1326', | |
29 | 'ext': 'mp4', | |
25bcd355 | 30 | 'title': "[V LIVE] Girl's Day's Broadcast", |
52f5889f S |
31 | 'creator': "Girl's Day", |
32 | 'view_count': int, | |
c88debff | 33 | 'uploader_id': 'muploader_a', |
061f62da | 34 | }, |
58355a3b S |
35 | }, { |
36 | 'url': 'http://www.vlive.tv/video/16937', | |
37 | 'info_dict': { | |
38 | 'id': '16937', | |
39 | 'ext': 'mp4', | |
40 | 'title': '[V LIVE] 첸백시 걍방', | |
41 | 'creator': 'EXO', | |
42 | 'view_count': int, | |
43 | 'subtitles': 'mincount:12', | |
c88debff | 44 | 'uploader_id': 'muploader_j', |
58355a3b S |
45 | }, |
46 | 'params': { | |
47 | 'skip_download': True, | |
48 | }, | |
01b517a2 | 49 | }, { |
50 | 'url': 'https://www.vlive.tv/video/129100', | |
51 | 'md5': 'ca2569453b79d66e5b919e5d308bff6b', | |
52 | 'info_dict': { | |
53 | 'id': '129100', | |
54 | 'ext': 'mp4', | |
4831ef7f S |
55 | 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene', |
56 | 'creator': 'BTS+', | |
01b517a2 | 57 | 'view_count': int, |
58 | 'subtitles': 'mincount:10', | |
59 | }, | |
60 | 'skip': 'This video is only available for CH+ subscribers', | |
58355a3b | 61 | }] |
061f62da | 62 | |
b71c18b4 | 63 | @classmethod |
64 | def suitable(cls, url): | |
65 | return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url) | |
66 | ||
01b517a2 | 67 | def _real_initialize(self): |
68 | self._login() | |
69 | ||
70 | def _login(self): | |
71 | email, password = self._get_login_info() | |
72 | if None in (email, password): | |
73 | return | |
74 | ||
75 | def is_logged_in(): | |
76 | login_info = self._download_json( | |
77 | 'https://www.vlive.tv/auth/loginInfo', None, | |
78 | note='Downloading login info', | |
79 | headers={'Referer': 'https://www.vlive.tv/home'}) | |
ef19739e S |
80 | return try_get( |
81 | login_info, lambda x: x['message']['login'], bool) or False | |
01b517a2 | 82 | |
83 | LOGIN_URL = 'https://www.vlive.tv/auth/email/login' | |
ef19739e S |
84 | self._request_webpage( |
85 | LOGIN_URL, None, note='Downloading login cookies') | |
01b517a2 | 86 | |
87 | self._download_webpage( | |
88 | LOGIN_URL, None, note='Logging in', | |
89 | data=urlencode_postdata({'email': email, 'pwd': password}), | |
90 | headers={ | |
91 | 'Referer': LOGIN_URL, | |
92 | 'Content-Type': 'application/x-www-form-urlencoded' | |
93 | }) | |
94 | ||
95 | if not is_logged_in(): | |
96 | raise ExtractorError('Unable to log in', expected=True) | |
97 | ||
061f62da | 98 | def _real_extract(self, url): |
99 | video_id = self._match_id(url) | |
100 | ||
101 | webpage = self._download_webpage( | |
973b6cee | 102 | 'https://www.vlive.tv/video/%s' % video_id, video_id) |
061f62da | 103 | |
89c63cc5 S |
104 | VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)' |
105 | VIDEO_PARAMS_FIELD = 'video params' | |
57774807 | 106 | |
89c63cc5 S |
107 | params = self._parse_json(self._search_regex( |
108 | VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id, | |
109 | transform_source=lambda s: '[' + s + ']', fatal=False) | |
110 | ||
111 | if not params or len(params) < 7: | |
112 | params = self._search_regex( | |
113 | VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD) | |
114 | params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)] | |
115 | ||
116 | status, long_video_id, key = params[2], params[5], params[6] | |
345dec93 | 117 | status = remove_start(status, 'PRODUCT_') |
b24d6336 | 118 | |
40fcba5e | 119 | if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'): |
57774807 | 120 | return self._live(video_id, webpage) |
40fcba5e | 121 | elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'): |
01b517a2 | 122 | return self._replay(video_id, webpage, long_video_id, key) |
b24d6336 KH |
123 | |
124 | if status == 'LIVE_END': | |
125 | raise ExtractorError('Uploading for replay. Please wait...', | |
126 | expected=True) | |
127 | elif status == 'COMING_SOON': | |
25bcd355 | 128 | raise ExtractorError('Coming soon!', expected=True) |
b24d6336 KH |
129 | elif status == 'CANCELED': |
130 | raise ExtractorError('We are sorry, ' | |
131 | 'but the live broadcast has been canceled.', | |
132 | expected=True) | |
01b517a2 | 133 | elif status == 'ONLY_APP': |
134 | raise ExtractorError('Unsupported video type', expected=True) | |
b24d6336 KH |
135 | else: |
136 | raise ExtractorError('Unknown status %s' % status) | |
137 | ||
138 | def _get_common_fields(self, webpage): | |
061f62da | 139 | title = self._og_search_title(webpage) |
b24d6336 | 140 | creator = self._html_search_regex( |
ef19739e | 141 | r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)', |
b24d6336 KH |
142 | webpage, 'creator', fatal=False) |
143 | thumbnail = self._og_search_thumbnail(webpage) | |
144 | return { | |
145 | 'title': title, | |
146 | 'creator': creator, | |
147 | 'thumbnail': thumbnail, | |
148 | } | |
08354db4 | 149 | |
57774807 | 150 | def _live(self, video_id, webpage): |
01b517a2 | 151 | init_page = self._download_init_page(video_id) |
57774807 CN |
152 | |
153 | live_params = self._search_regex( | |
154 | r'"liveStreamInfo"\s*:\s*(".*"),', | |
89c63cc5 | 155 | init_page, 'live stream info') |
57774807 CN |
156 | live_params = self._parse_json(live_params, video_id) |
157 | live_params = self._parse_json(live_params, video_id) | |
158 | ||
b24d6336 KH |
159 | formats = [] |
160 | for vid in live_params.get('resolutions', []): | |
161 | formats.extend(self._extract_m3u8_formats( | |
162 | vid['cdnUrl'], video_id, 'mp4', | |
163 | m3u8_id=vid.get('name'), | |
164 | fatal=False, live=True)) | |
165 | self._sort_formats(formats) | |
166 | ||
069f9183 S |
167 | info = self._get_common_fields(webpage) |
168 | info.update({ | |
169 | 'title': self._live_title(info['title']), | |
170 | 'id': video_id, | |
171 | 'formats': formats, | |
172 | 'is_live': True, | |
173 | }) | |
174 | return info | |
b24d6336 KH |
175 | |
176 | def _replay(self, video_id, webpage, long_video_id, key): | |
01b517a2 | 177 | if '' in (long_video_id, key): |
178 | init_page = self._download_init_page(video_id) | |
179 | video_info = self._parse_json(self._search_regex( | |
ef19739e S |
180 | (r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script', |
181 | r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'), | |
01b517a2 | 182 | video_id) |
ef19739e | 183 | if video_info.get('status') == 'NEED_CHANNEL_PLUS': |
01b517a2 | 184 | self.raise_login_required( |
185 | 'This video is only available for CH+ subscribers') | |
186 | long_video_id, key = video_info['vid'], video_info['inkey'] | |
187 | ||
c88debff RA |
188 | return merge_dicts( |
189 | self._get_common_fields(webpage), | |
190 | self._extract_video_info(video_id, long_video_id, key)) | |
b92d3c53 | 191 | |
01b517a2 | 192 | def _download_init_page(self, video_id): |
193 | return self._download_webpage( | |
194 | 'https://www.vlive.tv/video/init/view', | |
195 | video_id, note='Downloading live webpage', | |
196 | data=urlencode_postdata({'videoSeq': video_id}), | |
197 | headers={ | |
198 | 'Referer': 'https://www.vlive.tv/video/%s' % video_id, | |
199 | 'Content-Type': 'application/x-www-form-urlencoded' | |
200 | }) | |
201 | ||
b92d3c53 | 202 | |
203 | class VLiveChannelIE(InfoExtractor): | |
204 | IE_NAME = 'vlive:channel' | |
661cc229 | 205 | _VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)' |
b92d3c53 | 206 | _TEST = { |
661cc229 | 207 | 'url': 'http://channels.vlive.tv/FCD4B', |
b92d3c53 | 208 | 'info_dict': { |
209 | 'id': 'FCD4B', | |
210 | 'title': 'MAMAMOO', | |
211 | }, | |
212 | 'playlist_mincount': 110 | |
213 | } | |
214 | _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' | |
215 | ||
216 | def _real_extract(self, url): | |
217 | channel_code = self._match_id(url) | |
218 | ||
219 | webpage = self._download_webpage( | |
220 | 'http://channels.vlive.tv/%s/video' % channel_code, channel_code) | |
661cc229 S |
221 | |
222 | app_id = None | |
223 | ||
b92d3c53 | 224 | app_js_url = self._search_regex( |
661cc229 S |
225 | r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1', |
226 | webpage, 'app js', default=None, group='url') | |
b92d3c53 | 227 | |
228 | if app_js_url: | |
661cc229 S |
229 | app_js = self._download_webpage( |
230 | app_js_url, channel_code, 'Downloading app JS', fatal=False) | |
231 | if app_js: | |
232 | app_id = self._search_regex( | |
233 | r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', | |
234 | app_js, 'app id', default=None) | |
235 | ||
236 | app_id = app_id or self._APP_ID | |
b92d3c53 | 237 | |
238 | channel_info = self._download_json( | |
239 | 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', | |
661cc229 S |
240 | channel_code, note='Downloading decode channel code', |
241 | query={ | |
242 | 'app_id': app_id, | |
243 | 'channelCode': channel_code, | |
244 | '_': int(time.time()) | |
245 | }) | |
b92d3c53 | 246 | |
247 | channel_seq = channel_info['result']['channelSeq'] | |
248 | channel_name = None | |
249 | entries = [] | |
250 | ||
251 | for page_num in itertools.count(1): | |
252 | video_list = self._download_json( | |
253 | 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', | |
661cc229 | 254 | channel_code, note='Downloading channel list page #%d' % page_num, |
b92d3c53 | 255 | query={ |
256 | 'app_id': app_id, | |
257 | 'channelSeq': channel_seq, | |
f172c86d S |
258 | # Large values of maxNumOfRows (~300 or above) may cause |
259 | # empty responses (see [1]), e.g. this happens for [2] that | |
260 | # has more than 300 videos. | |
067aa17e | 261 | # 1. https://github.com/ytdl-org/youtube-dl/issues/13830 |
f172c86d S |
262 | # 2. http://channels.vlive.tv/EDBF. |
263 | 'maxNumOfRows': 100, | |
b92d3c53 | 264 | '_': int(time.time()), |
265 | 'pageNo': page_num | |
266 | } | |
267 | ) | |
b92d3c53 | 268 | |
661cc229 S |
269 | if not channel_name: |
270 | channel_name = try_get( | |
271 | video_list, | |
272 | lambda x: x['result']['channelInfo']['channelName'], | |
273 | compat_str) | |
274 | ||
275 | videos = try_get( | |
276 | video_list, lambda x: x['result']['videoList'], list) | |
277 | if not videos: | |
b92d3c53 | 278 | break |
279 | ||
661cc229 S |
280 | for video in videos: |
281 | video_id = video.get('videoSeq') | |
282 | if not video_id: | |
283 | continue | |
284 | video_id = compat_str(video_id) | |
b92d3c53 | 285 | entries.append( |
286 | self.url_result( | |
661cc229 S |
287 | 'http://www.vlive.tv/video/%s' % video_id, |
288 | ie=VLiveIE.ie_key(), video_id=video_id)) | |
b92d3c53 | 289 | |
290 | return self.playlist_result( | |
291 | entries, channel_code, channel_name) | |
b71c18b4 | 292 | |
293 | ||
294 | class VLivePlaylistIE(InfoExtractor): | |
295 | IE_NAME = 'vlive:playlist' | |
296 | _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)' | |
178663df | 297 | _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' |
298 | _TESTS = [{ | |
4831ef7f | 299 | # regular working playlist |
178663df | 300 | 'url': 'https://www.vlive.tv/video/117956/playlist/117963', |
301 | 'info_dict': { | |
302 | 'id': '117963', | |
303 | 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들' | |
304 | }, | |
305 | 'playlist_mincount': 10 | |
306 | }, { | |
4831ef7f | 307 | # playlist with no playlistVideoSeqs |
b71c18b4 | 308 | 'url': 'http://www.vlive.tv/video/22867/playlist/22912', |
309 | 'info_dict': { | |
178663df | 310 | 'id': '22867', |
311 | 'ext': 'mp4', | |
312 | 'title': '[V LIVE] Valentine Day Message from MINA', | |
4831ef7f | 313 | 'creator': 'TWICE', |
178663df | 314 | 'view_count': int |
b71c18b4 | 315 | }, |
178663df | 316 | 'params': { |
317 | 'skip_download': True, | |
318 | } | |
319 | }] | |
320 | ||
321 | def _build_video_result(self, video_id, message): | |
322 | self.to_screen(message) | |
323 | return self.url_result( | |
324 | self._VIDEO_URL_TEMPLATE % video_id, | |
325 | ie=VLiveIE.ie_key(), video_id=video_id) | |
b71c18b4 | 326 | |
327 | def _real_extract(self, url): | |
e3cd1fcd S |
328 | mobj = re.match(self._VALID_URL, url) |
329 | video_id, playlist_id = mobj.group('video_id', 'id') | |
b71c18b4 | 330 | |
b71c18b4 | 331 | if self._downloader.params.get('noplaylist'): |
178663df | 332 | return self._build_video_result( |
333 | video_id, | |
334 | 'Downloading just video %s because of --no-playlist' | |
335 | % video_id) | |
b71c18b4 | 336 | |
337 | self.to_screen( | |
e3cd1fcd S |
338 | 'Downloading playlist %s - add --no-playlist to just download video' |
339 | % playlist_id) | |
b71c18b4 | 340 | |
341 | webpage = self._download_webpage( | |
e3cd1fcd S |
342 | 'http://www.vlive.tv/video/%s/playlist/%s' |
343 | % (video_id, playlist_id), playlist_id) | |
b71c18b4 | 344 | |
178663df | 345 | raw_item_ids = self._search_regex( |
346 | r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage, | |
347 | 'playlist video seqs', default=None, fatal=False) | |
348 | ||
349 | if not raw_item_ids: | |
350 | return self._build_video_result( | |
351 | video_id, | |
352 | 'Downloading just video %s because no playlist was found' | |
353 | % video_id) | |
354 | ||
355 | item_ids = self._parse_json(raw_item_ids, playlist_id) | |
b71c18b4 | 356 | |
e3cd1fcd S |
357 | entries = [ |
358 | self.url_result( | |
178663df | 359 | self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(), |
e3cd1fcd S |
360 | video_id=compat_str(item_id)) |
361 | for item_id in item_ids] | |
b71c18b4 | 362 | |
e3cd1fcd S |
363 | playlist_name = self._html_search_regex( |
364 | r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', | |
365 | webpage, 'playlist title', fatal=False) | |
b71c18b4 | 366 | |
e3cd1fcd | 367 | return self.playlist_result(entries, playlist_id, playlist_name) |