]>
Commit | Line | Data |
---|---|---|
061f62da | 1 | # coding: utf-8 |
25bcd355 | 2 | from __future__ import unicode_literals |
061f62da | 3 | |
b24d6336 | 4 | import re |
b92d3c53 | 5 | import time |
6 | import itertools | |
9d186afa | 7 | |
061f62da | 8 | from .common import InfoExtractor |
c88debff RA |
9 | from .naver import NaverBaseIE |
10 | from ..compat import compat_str | |
061f62da | 11 | from ..utils import ( |
9d186afa | 12 | ExtractorError, |
c88debff | 13 | merge_dicts, |
661cc229 | 14 | try_get, |
89c63cc5 | 15 | urlencode_postdata, |
061f62da | 16 | ) |
061f62da | 17 | |
18 | ||
c88debff | 19 | class VLiveIE(NaverBaseIE): |
061f62da | 20 | IE_NAME = 'vlive' |
5dcfd250 | 21 | _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)' |
01b517a2 | 22 | _NETRC_MACHINE = 'vlive' |
58355a3b | 23 | _TESTS = [{ |
5dcfd250 | 24 | 'url': 'https://www.vlive.tv/video/1326', |
061f62da | 25 | 'md5': 'cc7314812855ce56de70a06a27314983', |
26 | 'info_dict': { | |
27 | 'id': '1326', | |
28 | 'ext': 'mp4', | |
25bcd355 | 29 | 'title': "[V LIVE] Girl's Day's Broadcast", |
52f5889f S |
30 | 'creator': "Girl's Day", |
31 | 'view_count': int, | |
c88debff | 32 | 'uploader_id': 'muploader_a', |
061f62da | 33 | }, |
5dcfd250 | 34 | }, |
35 | { | |
36 | 'url': 'https://vlive.tv/post/1-18244258', | |
37 | 'md5': 'cc7314812855ce56de70a06a27314983', | |
38 | 'info_dict': { | |
39 | 'id': '1326', | |
40 | 'ext': 'mp4', | |
41 | 'title': "[V LIVE] Girl's Day's Broadcast", | |
42 | 'creator': "Girl's Day", | |
43 | 'view_count': int, | |
44 | 'uploader_id': 'muploader_a', | |
45 | }, | |
46 | }, | |
47 | { | |
48 | 'url': 'https://www.vlive.tv/video/16937', | |
58355a3b S |
49 | 'info_dict': { |
50 | 'id': '16937', | |
51 | 'ext': 'mp4', | |
52 | 'title': '[V LIVE] 첸백시 걍방', | |
53 | 'creator': 'EXO', | |
54 | 'view_count': int, | |
55 | 'subtitles': 'mincount:12', | |
c88debff | 56 | 'uploader_id': 'muploader_j', |
58355a3b S |
57 | }, |
58 | 'params': { | |
59 | 'skip_download': True, | |
60 | }, | |
01b517a2 | 61 | }, { |
62 | 'url': 'https://www.vlive.tv/video/129100', | |
63 | 'md5': 'ca2569453b79d66e5b919e5d308bff6b', | |
64 | 'info_dict': { | |
65 | 'id': '129100', | |
66 | 'ext': 'mp4', | |
4831ef7f S |
67 | 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene', |
68 | 'creator': 'BTS+', | |
01b517a2 | 69 | 'view_count': int, |
70 | 'subtitles': 'mincount:10', | |
71 | }, | |
72 | 'skip': 'This video is only available for CH+ subscribers', | |
58355a3b | 73 | }] |
061f62da | 74 | |
b71c18b4 | 75 | @classmethod |
76 | def suitable(cls, url): | |
77 | return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url) | |
78 | ||
01b517a2 | 79 | def _real_initialize(self): |
80 | self._login() | |
81 | ||
82 | def _login(self): | |
83 | email, password = self._get_login_info() | |
84 | if None in (email, password): | |
85 | return | |
86 | ||
87 | def is_logged_in(): | |
88 | login_info = self._download_json( | |
89 | 'https://www.vlive.tv/auth/loginInfo', None, | |
90 | note='Downloading login info', | |
91 | headers={'Referer': 'https://www.vlive.tv/home'}) | |
ef19739e S |
92 | return try_get( |
93 | login_info, lambda x: x['message']['login'], bool) or False | |
01b517a2 | 94 | |
95 | LOGIN_URL = 'https://www.vlive.tv/auth/email/login' | |
ef19739e S |
96 | self._request_webpage( |
97 | LOGIN_URL, None, note='Downloading login cookies') | |
01b517a2 | 98 | |
99 | self._download_webpage( | |
100 | LOGIN_URL, None, note='Logging in', | |
101 | data=urlencode_postdata({'email': email, 'pwd': password}), | |
102 | headers={ | |
103 | 'Referer': LOGIN_URL, | |
104 | 'Content-Type': 'application/x-www-form-urlencoded' | |
105 | }) | |
106 | ||
107 | if not is_logged_in(): | |
108 | raise ExtractorError('Unable to log in', expected=True) | |
109 | ||
061f62da | 110 | def _real_extract(self, url): |
5dcfd250 | 111 | # url may match on a post or a video url with a post_id potentially matching a video_id |
112 | working_id = self._match_id(url) | |
113 | webpage = self._download_webpage(url, working_id) | |
0536e60b | 114 | |
115 | PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>' | |
116 | PARAMS_FIELD = 'params' | |
117 | ||
118 | params = self._search_regex( | |
119 | PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL) | |
5dcfd250 | 120 | params = self._parse_json(params, working_id, fatal=False) |
0536e60b | 121 | |
5dcfd250 | 122 | video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"]) |
0536e60b | 123 | if video_params is None: |
5dcfd250 | 124 | if 'post' in url: |
125 | raise ExtractorError('Url does not appear to be a video post.') | |
126 | else: | |
127 | raise ExtractorError('Failed to extract video parameters.') | |
0536e60b | 128 | |
5dcfd250 | 129 | video_id = working_id if 'video' in url else str(video_params["videoSeq"]) |
0536e60b | 130 | long_video_id = video_params["vodId"] |
131 | video_type = video_params["type"] | |
5dcfd250 | 132 | |
133 | VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id | |
134 | key_json = self._download_json(VOD_KEY_ENDPOINT, video_id, | |
0536e60b | 135 | headers={"referer": "https://www.vlive.tv"}) |
136 | key = key_json["inkey"] | |
137 | ||
138 | if video_type in ('VOD'): | |
139 | encoding_status = video_params["encodingStatus"] | |
140 | if encoding_status == 'COMPLETE': | |
141 | return self._replay(video_id, webpage, long_video_id, key, params) | |
142 | else: | |
143 | raise ExtractorError('VOD encoding not yet complete. Please try again later.', | |
144 | expected=True) | |
145 | elif video_type in ('LIVE'): | |
146 | video_status = video_params["status"] | |
147 | if video_status == 'RESERVED': | |
148 | raise ExtractorError('Coming soon!', expected=True) | |
149 | else: | |
150 | return self._live(video_id, webpage, params) | |
b24d6336 | 151 | else: |
0536e60b | 152 | raise ExtractorError('Unknown video type %s' % video_type) |
b24d6336 | 153 | |
0536e60b | 154 | def _get_common_fields(self, webpage, params): |
061f62da | 155 | title = self._og_search_title(webpage) |
0536e60b | 156 | description = self._html_search_meta( |
157 | ['og:description', 'description', 'twitter:description'], | |
158 | webpage, 'description', default=None) | |
159 | creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str) | |
160 | or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False)) | |
b24d6336 KH |
161 | thumbnail = self._og_search_thumbnail(webpage) |
162 | return { | |
163 | 'title': title, | |
164 | 'creator': creator, | |
165 | 'thumbnail': thumbnail, | |
166 | } | |
08354db4 | 167 | |
0536e60b | 168 | def _live(self, video_id, webpage, params): |
01b517a2 | 169 | init_page = self._download_init_page(video_id) |
57774807 CN |
170 | |
171 | live_params = self._search_regex( | |
172 | r'"liveStreamInfo"\s*:\s*(".*"),', | |
89c63cc5 | 173 | init_page, 'live stream info') |
57774807 CN |
174 | live_params = self._parse_json(live_params, video_id) |
175 | live_params = self._parse_json(live_params, video_id) | |
176 | ||
b24d6336 KH |
177 | formats = [] |
178 | for vid in live_params.get('resolutions', []): | |
179 | formats.extend(self._extract_m3u8_formats( | |
180 | vid['cdnUrl'], video_id, 'mp4', | |
181 | m3u8_id=vid.get('name'), | |
182 | fatal=False, live=True)) | |
183 | self._sort_formats(formats) | |
184 | ||
0536e60b | 185 | info = self._get_common_fields(webpage, params) |
069f9183 S |
186 | info.update({ |
187 | 'title': self._live_title(info['title']), | |
188 | 'id': video_id, | |
189 | 'formats': formats, | |
190 | 'is_live': True, | |
191 | }) | |
192 | return info | |
b24d6336 | 193 | |
0536e60b | 194 | def _replay(self, video_id, webpage, long_video_id, key, params): |
01b517a2 | 195 | if '' in (long_video_id, key): |
196 | init_page = self._download_init_page(video_id) | |
197 | video_info = self._parse_json(self._search_regex( | |
ef19739e S |
198 | (r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script', |
199 | r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'), | |
01b517a2 | 200 | video_id) |
ef19739e | 201 | if video_info.get('status') == 'NEED_CHANNEL_PLUS': |
01b517a2 | 202 | self.raise_login_required( |
203 | 'This video is only available for CH+ subscribers') | |
204 | long_video_id, key = video_info['vid'], video_info['inkey'] | |
205 | ||
c88debff | 206 | return merge_dicts( |
0536e60b | 207 | self._get_common_fields(webpage, params), |
c88debff | 208 | self._extract_video_info(video_id, long_video_id, key)) |
b92d3c53 | 209 | |
01b517a2 | 210 | def _download_init_page(self, video_id): |
211 | return self._download_webpage( | |
212 | 'https://www.vlive.tv/video/init/view', | |
213 | video_id, note='Downloading live webpage', | |
214 | data=urlencode_postdata({'videoSeq': video_id}), | |
215 | headers={ | |
216 | 'Referer': 'https://www.vlive.tv/video/%s' % video_id, | |
217 | 'Content-Type': 'application/x-www-form-urlencoded' | |
218 | }) | |
219 | ||
b92d3c53 | 220 | |
221 | class VLiveChannelIE(InfoExtractor): | |
222 | IE_NAME = 'vlive:channel' | |
1923b146 | 223 | _VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)' |
224 | _TESTS = [{ | |
225 | 'url': 'https://channels.vlive.tv/FCD4B', | |
226 | 'info_dict': { | |
227 | 'id': 'FCD4B', | |
228 | 'title': 'MAMAMOO', | |
229 | }, | |
230 | 'playlist_mincount': 110 | |
231 | }, { | |
232 | 'url': 'https://www.vlive.tv/channel/FCD4B', | |
b92d3c53 | 233 | 'info_dict': { |
234 | 'id': 'FCD4B', | |
235 | 'title': 'MAMAMOO', | |
236 | }, | |
237 | 'playlist_mincount': 110 | |
1923b146 | 238 | }] |
b92d3c53 | 239 | _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' |
240 | ||
241 | def _real_extract(self, url): | |
242 | channel_code = self._match_id(url) | |
243 | ||
244 | webpage = self._download_webpage( | |
245 | 'http://channels.vlive.tv/%s/video' % channel_code, channel_code) | |
661cc229 S |
246 | |
247 | app_id = None | |
248 | ||
b92d3c53 | 249 | app_js_url = self._search_regex( |
661cc229 S |
250 | r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1', |
251 | webpage, 'app js', default=None, group='url') | |
b92d3c53 | 252 | |
253 | if app_js_url: | |
661cc229 S |
254 | app_js = self._download_webpage( |
255 | app_js_url, channel_code, 'Downloading app JS', fatal=False) | |
256 | if app_js: | |
257 | app_id = self._search_regex( | |
258 | r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', | |
259 | app_js, 'app id', default=None) | |
260 | ||
261 | app_id = app_id or self._APP_ID | |
b92d3c53 | 262 | |
263 | channel_info = self._download_json( | |
264 | 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', | |
661cc229 S |
265 | channel_code, note='Downloading decode channel code', |
266 | query={ | |
267 | 'app_id': app_id, | |
268 | 'channelCode': channel_code, | |
269 | '_': int(time.time()) | |
270 | }) | |
b92d3c53 | 271 | |
272 | channel_seq = channel_info['result']['channelSeq'] | |
273 | channel_name = None | |
274 | entries = [] | |
275 | ||
276 | for page_num in itertools.count(1): | |
277 | video_list = self._download_json( | |
278 | 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', | |
661cc229 | 279 | channel_code, note='Downloading channel list page #%d' % page_num, |
b92d3c53 | 280 | query={ |
281 | 'app_id': app_id, | |
282 | 'channelSeq': channel_seq, | |
f172c86d S |
283 | # Large values of maxNumOfRows (~300 or above) may cause |
284 | # empty responses (see [1]), e.g. this happens for [2] that | |
285 | # has more than 300 videos. | |
067aa17e | 286 | # 1. https://github.com/ytdl-org/youtube-dl/issues/13830 |
f172c86d S |
287 | # 2. http://channels.vlive.tv/EDBF. |
288 | 'maxNumOfRows': 100, | |
b92d3c53 | 289 | '_': int(time.time()), |
290 | 'pageNo': page_num | |
291 | } | |
292 | ) | |
b92d3c53 | 293 | |
661cc229 S |
294 | if not channel_name: |
295 | channel_name = try_get( | |
296 | video_list, | |
297 | lambda x: x['result']['channelInfo']['channelName'], | |
298 | compat_str) | |
299 | ||
300 | videos = try_get( | |
301 | video_list, lambda x: x['result']['videoList'], list) | |
302 | if not videos: | |
b92d3c53 | 303 | break |
304 | ||
661cc229 S |
305 | for video in videos: |
306 | video_id = video.get('videoSeq') | |
307 | if not video_id: | |
308 | continue | |
309 | video_id = compat_str(video_id) | |
b92d3c53 | 310 | entries.append( |
311 | self.url_result( | |
661cc229 S |
312 | 'http://www.vlive.tv/video/%s' % video_id, |
313 | ie=VLiveIE.ie_key(), video_id=video_id)) | |
b92d3c53 | 314 | |
315 | return self.playlist_result( | |
316 | entries, channel_code, channel_name) | |
b71c18b4 | 317 | |
318 | ||
319 | class VLivePlaylistIE(InfoExtractor): | |
320 | IE_NAME = 'vlive:playlist' | |
321 | _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)' | |
178663df | 322 | _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' |
323 | _TESTS = [{ | |
4831ef7f | 324 | # regular working playlist |
178663df | 325 | 'url': 'https://www.vlive.tv/video/117956/playlist/117963', |
326 | 'info_dict': { | |
327 | 'id': '117963', | |
328 | 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들' | |
329 | }, | |
330 | 'playlist_mincount': 10 | |
331 | }, { | |
4831ef7f | 332 | # playlist with no playlistVideoSeqs |
b71c18b4 | 333 | 'url': 'http://www.vlive.tv/video/22867/playlist/22912', |
334 | 'info_dict': { | |
178663df | 335 | 'id': '22867', |
336 | 'ext': 'mp4', | |
337 | 'title': '[V LIVE] Valentine Day Message from MINA', | |
4831ef7f | 338 | 'creator': 'TWICE', |
178663df | 339 | 'view_count': int |
b71c18b4 | 340 | }, |
178663df | 341 | 'params': { |
342 | 'skip_download': True, | |
343 | } | |
344 | }] | |
345 | ||
346 | def _build_video_result(self, video_id, message): | |
347 | self.to_screen(message) | |
348 | return self.url_result( | |
349 | self._VIDEO_URL_TEMPLATE % video_id, | |
350 | ie=VLiveIE.ie_key(), video_id=video_id) | |
b71c18b4 | 351 | |
352 | def _real_extract(self, url): | |
e3cd1fcd S |
353 | mobj = re.match(self._VALID_URL, url) |
354 | video_id, playlist_id = mobj.group('video_id', 'id') | |
b71c18b4 | 355 | |
b71c18b4 | 356 | if self._downloader.params.get('noplaylist'): |
178663df | 357 | return self._build_video_result( |
358 | video_id, | |
359 | 'Downloading just video %s because of --no-playlist' | |
360 | % video_id) | |
b71c18b4 | 361 | |
362 | self.to_screen( | |
e3cd1fcd S |
363 | 'Downloading playlist %s - add --no-playlist to just download video' |
364 | % playlist_id) | |
b71c18b4 | 365 | |
366 | webpage = self._download_webpage( | |
e3cd1fcd S |
367 | 'http://www.vlive.tv/video/%s/playlist/%s' |
368 | % (video_id, playlist_id), playlist_id) | |
b71c18b4 | 369 | |
178663df | 370 | raw_item_ids = self._search_regex( |
371 | r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage, | |
372 | 'playlist video seqs', default=None, fatal=False) | |
373 | ||
374 | if not raw_item_ids: | |
375 | return self._build_video_result( | |
376 | video_id, | |
377 | 'Downloading just video %s because no playlist was found' | |
378 | % video_id) | |
379 | ||
380 | item_ids = self._parse_json(raw_item_ids, playlist_id) | |
b71c18b4 | 381 | |
e3cd1fcd S |
382 | entries = [ |
383 | self.url_result( | |
178663df | 384 | self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(), |
e3cd1fcd S |
385 | video_id=compat_str(item_id)) |
386 | for item_id in item_ids] | |
b71c18b4 | 387 | |
e3cd1fcd S |
388 | playlist_name = self._html_search_regex( |
389 | r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', | |
390 | webpage, 'playlist title', fatal=False) | |
b71c18b4 | 391 | |
e3cd1fcd | 392 | return self.playlist_result(entries, playlist_id, playlist_name) |