]>
Commit | Line | Data |
---|---|---|
061f62da | 1 | # coding: utf-8 |
25bcd355 | 2 | from __future__ import unicode_literals |
061f62da | 3 | |
b24d6336 | 4 | import re |
b92d3c53 | 5 | import time |
6 | import itertools | |
9d186afa | 7 | |
061f62da | 8 | from .common import InfoExtractor |
661cc229 S |
9 | from ..compat import ( |
10 | compat_urllib_parse_urlencode, | |
11 | compat_str, | |
12 | ) | |
061f62da | 13 | from ..utils import ( |
52f5889f | 14 | dict_get, |
9d186afa | 15 | ExtractorError, |
52f5889f S |
16 | float_or_none, |
17 | int_or_none, | |
345dec93 | 18 | remove_start, |
661cc229 | 19 | try_get, |
89c63cc5 | 20 | urlencode_postdata, |
061f62da | 21 | ) |
061f62da | 22 | |
23 | ||
24 | class VLiveIE(InfoExtractor): | |
25 | IE_NAME = 'vlive' | |
52f5889f | 26 | _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' |
58355a3b | 27 | _TESTS = [{ |
b8b465af | 28 | 'url': 'http://www.vlive.tv/video/1326', |
061f62da | 29 | 'md5': 'cc7314812855ce56de70a06a27314983', |
30 | 'info_dict': { | |
31 | 'id': '1326', | |
32 | 'ext': 'mp4', | |
25bcd355 | 33 | 'title': "[V LIVE] Girl's Day's Broadcast", |
52f5889f S |
34 | 'creator': "Girl's Day", |
35 | 'view_count': int, | |
061f62da | 36 | }, |
58355a3b S |
37 | }, { |
38 | 'url': 'http://www.vlive.tv/video/16937', | |
39 | 'info_dict': { | |
40 | 'id': '16937', | |
41 | 'ext': 'mp4', | |
42 | 'title': '[V LIVE] 첸백시 걍방', | |
43 | 'creator': 'EXO', | |
44 | 'view_count': int, | |
45 | 'subtitles': 'mincount:12', | |
46 | }, | |
47 | 'params': { | |
48 | 'skip_download': True, | |
49 | }, | |
50 | }] | |
061f62da | 51 | |
b71c18b4 | 52 | @classmethod |
53 | def suitable(cls, url): | |
54 | return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url) | |
55 | ||
061f62da | 56 | def _real_extract(self, url): |
57 | video_id = self._match_id(url) | |
58 | ||
59 | webpage = self._download_webpage( | |
52f5889f | 60 | 'http://www.vlive.tv/video/%s' % video_id, video_id) |
061f62da | 61 | |
89c63cc5 S |
62 | VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)' |
63 | VIDEO_PARAMS_FIELD = 'video params' | |
57774807 | 64 | |
89c63cc5 S |
65 | params = self._parse_json(self._search_regex( |
66 | VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id, | |
67 | transform_source=lambda s: '[' + s + ']', fatal=False) | |
68 | ||
69 | if not params or len(params) < 7: | |
70 | params = self._search_regex( | |
71 | VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD) | |
72 | params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)] | |
73 | ||
74 | status, long_video_id, key = params[2], params[5], params[6] | |
345dec93 | 75 | status = remove_start(status, 'PRODUCT_') |
b24d6336 | 76 | |
40fcba5e | 77 | if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'): |
57774807 | 78 | return self._live(video_id, webpage) |
40fcba5e | 79 | elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'): |
b24d6336 KH |
80 | if long_video_id and key: |
81 | return self._replay(video_id, webpage, long_video_id, key) | |
b24d6336 KH |
82 | else: |
83 | status = 'COMING_SOON' | |
84 | ||
85 | if status == 'LIVE_END': | |
86 | raise ExtractorError('Uploading for replay. Please wait...', | |
87 | expected=True) | |
88 | elif status == 'COMING_SOON': | |
25bcd355 | 89 | raise ExtractorError('Coming soon!', expected=True) |
b24d6336 KH |
90 | elif status == 'CANCELED': |
91 | raise ExtractorError('We are sorry, ' | |
92 | 'but the live broadcast has been canceled.', | |
93 | expected=True) | |
94 | else: | |
95 | raise ExtractorError('Unknown status %s' % status) | |
96 | ||
97 | def _get_common_fields(self, webpage): | |
061f62da | 98 | title = self._og_search_title(webpage) |
b24d6336 KH |
99 | creator = self._html_search_regex( |
100 | r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)', | |
101 | webpage, 'creator', fatal=False) | |
102 | thumbnail = self._og_search_thumbnail(webpage) | |
103 | return { | |
104 | 'title': title, | |
105 | 'creator': creator, | |
106 | 'thumbnail': thumbnail, | |
107 | } | |
08354db4 | 108 | |
57774807 CN |
109 | def _live(self, video_id, webpage): |
110 | init_page = self._download_webpage( | |
111 | 'http://www.vlive.tv/video/init/view', | |
89c63cc5 S |
112 | video_id, note='Downloading live webpage', |
113 | data=urlencode_postdata({'videoSeq': video_id}), | |
114 | headers={ | |
57774807 CN |
115 | 'Referer': 'http://www.vlive.tv/video/%s' % video_id, |
116 | 'Content-Type': 'application/x-www-form-urlencoded' | |
117 | }) | |
118 | ||
119 | live_params = self._search_regex( | |
120 | r'"liveStreamInfo"\s*:\s*(".*"),', | |
89c63cc5 | 121 | init_page, 'live stream info') |
57774807 CN |
122 | live_params = self._parse_json(live_params, video_id) |
123 | live_params = self._parse_json(live_params, video_id) | |
124 | ||
b24d6336 KH |
125 | formats = [] |
126 | for vid in live_params.get('resolutions', []): | |
127 | formats.extend(self._extract_m3u8_formats( | |
128 | vid['cdnUrl'], video_id, 'mp4', | |
129 | m3u8_id=vid.get('name'), | |
130 | fatal=False, live=True)) | |
131 | self._sort_formats(formats) | |
132 | ||
069f9183 S |
133 | info = self._get_common_fields(webpage) |
134 | info.update({ | |
135 | 'title': self._live_title(info['title']), | |
136 | 'id': video_id, | |
137 | 'formats': formats, | |
138 | 'is_live': True, | |
139 | }) | |
140 | return info | |
b24d6336 KH |
141 | |
142 | def _replay(self, video_id, webpage, long_video_id, key): | |
52f5889f S |
143 | playinfo = self._download_json( |
144 | 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' | |
15707c7e | 145 | % compat_urllib_parse_urlencode({ |
52f5889f S |
146 | 'videoId': long_video_id, |
147 | 'key': key, | |
148 | 'ptc': 'http', | |
149 | 'doct': 'json', # document type (xml or json) | |
150 | 'cpt': 'vtt', # captions type (vtt or ttml) | |
151 | }), video_id) | |
061f62da | 152 | |
52f5889f S |
153 | formats = [{ |
154 | 'url': vid['source'], | |
155 | 'format_id': vid.get('encodingOption', {}).get('name'), | |
156 | 'abr': float_or_none(vid.get('bitrate', {}).get('audio')), | |
157 | 'vbr': float_or_none(vid.get('bitrate', {}).get('video')), | |
158 | 'width': int_or_none(vid.get('encodingOption', {}).get('width')), | |
159 | 'height': int_or_none(vid.get('encodingOption', {}).get('height')), | |
160 | 'filesize': int_or_none(vid.get('size')), | |
161 | } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')] | |
061f62da | 162 | self._sort_formats(formats) |
163 | ||
52f5889f S |
164 | view_count = int_or_none(playinfo.get('meta', {}).get('count')) |
165 | ||
061f62da | 166 | subtitles = {} |
b8b465af | 167 | for caption in playinfo.get('captions', {}).get('list', []): |
49b69ad9 | 168 | lang = dict_get(caption, ('locale', 'language', 'country', 'label')) |
52f5889f S |
169 | if lang and caption.get('source'): |
170 | subtitles[lang] = [{ | |
171 | 'ext': 'vtt', | |
172 | 'url': caption['source']}] | |
061f62da | 173 | |
069f9183 S |
174 | info = self._get_common_fields(webpage) |
175 | info.update({ | |
176 | 'id': video_id, | |
177 | 'formats': formats, | |
178 | 'view_count': view_count, | |
179 | 'subtitles': subtitles, | |
180 | }) | |
181 | return info | |
b92d3c53 | 182 | |
183 | ||
184 | class VLiveChannelIE(InfoExtractor): | |
185 | IE_NAME = 'vlive:channel' | |
661cc229 | 186 | _VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)' |
b92d3c53 | 187 | _TEST = { |
661cc229 | 188 | 'url': 'http://channels.vlive.tv/FCD4B', |
b92d3c53 | 189 | 'info_dict': { |
190 | 'id': 'FCD4B', | |
191 | 'title': 'MAMAMOO', | |
192 | }, | |
193 | 'playlist_mincount': 110 | |
194 | } | |
195 | _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' | |
196 | ||
197 | def _real_extract(self, url): | |
198 | channel_code = self._match_id(url) | |
199 | ||
200 | webpage = self._download_webpage( | |
201 | 'http://channels.vlive.tv/%s/video' % channel_code, channel_code) | |
661cc229 S |
202 | |
203 | app_id = None | |
204 | ||
b92d3c53 | 205 | app_js_url = self._search_regex( |
661cc229 S |
206 | r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1', |
207 | webpage, 'app js', default=None, group='url') | |
b92d3c53 | 208 | |
209 | if app_js_url: | |
661cc229 S |
210 | app_js = self._download_webpage( |
211 | app_js_url, channel_code, 'Downloading app JS', fatal=False) | |
212 | if app_js: | |
213 | app_id = self._search_regex( | |
214 | r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', | |
215 | app_js, 'app id', default=None) | |
216 | ||
217 | app_id = app_id or self._APP_ID | |
b92d3c53 | 218 | |
219 | channel_info = self._download_json( | |
220 | 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', | |
661cc229 S |
221 | channel_code, note='Downloading decode channel code', |
222 | query={ | |
223 | 'app_id': app_id, | |
224 | 'channelCode': channel_code, | |
225 | '_': int(time.time()) | |
226 | }) | |
b92d3c53 | 227 | |
228 | channel_seq = channel_info['result']['channelSeq'] | |
229 | channel_name = None | |
230 | entries = [] | |
231 | ||
232 | for page_num in itertools.count(1): | |
233 | video_list = self._download_json( | |
234 | 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', | |
661cc229 | 235 | channel_code, note='Downloading channel list page #%d' % page_num, |
b92d3c53 | 236 | query={ |
237 | 'app_id': app_id, | |
238 | 'channelSeq': channel_seq, | |
f172c86d S |
239 | # Large values of maxNumOfRows (~300 or above) may cause |
240 | # empty responses (see [1]), e.g. this happens for [2] that | |
241 | # has more than 300 videos. | |
242 | # 1. https://github.com/rg3/youtube-dl/issues/13830 | |
243 | # 2. http://channels.vlive.tv/EDBF. | |
244 | 'maxNumOfRows': 100, | |
b92d3c53 | 245 | '_': int(time.time()), |
246 | 'pageNo': page_num | |
247 | } | |
248 | ) | |
b92d3c53 | 249 | |
661cc229 S |
250 | if not channel_name: |
251 | channel_name = try_get( | |
252 | video_list, | |
253 | lambda x: x['result']['channelInfo']['channelName'], | |
254 | compat_str) | |
255 | ||
256 | videos = try_get( | |
257 | video_list, lambda x: x['result']['videoList'], list) | |
258 | if not videos: | |
b92d3c53 | 259 | break |
260 | ||
661cc229 S |
261 | for video in videos: |
262 | video_id = video.get('videoSeq') | |
263 | if not video_id: | |
264 | continue | |
265 | video_id = compat_str(video_id) | |
b92d3c53 | 266 | entries.append( |
267 | self.url_result( | |
661cc229 S |
268 | 'http://www.vlive.tv/video/%s' % video_id, |
269 | ie=VLiveIE.ie_key(), video_id=video_id)) | |
b92d3c53 | 270 | |
271 | return self.playlist_result( | |
272 | entries, channel_code, channel_name) | |
b71c18b4 | 273 | |
274 | ||
275 | class VLivePlaylistIE(InfoExtractor): | |
276 | IE_NAME = 'vlive:playlist' | |
277 | _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)' | |
278 | _TEST = { | |
279 | 'url': 'http://www.vlive.tv/video/22867/playlist/22912', | |
280 | 'info_dict': { | |
281 | 'id': '22912', | |
282 | 'title': 'Valentine Day Message from TWICE' | |
283 | }, | |
284 | 'playlist_mincount': 9 | |
285 | } | |
286 | ||
287 | def _real_extract(self, url): | |
e3cd1fcd S |
288 | mobj = re.match(self._VALID_URL, url) |
289 | video_id, playlist_id = mobj.group('video_id', 'id') | |
b71c18b4 | 290 | |
291 | VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' | |
292 | if self._downloader.params.get('noplaylist'): | |
293 | self.to_screen( | |
294 | 'Downloading just video %s because of --no-playlist' % video_id) | |
295 | return self.url_result( | |
296 | VIDEO_URL_TEMPLATE % video_id, | |
297 | ie=VLiveIE.ie_key(), video_id=video_id) | |
298 | ||
299 | self.to_screen( | |
e3cd1fcd S |
300 | 'Downloading playlist %s - add --no-playlist to just download video' |
301 | % playlist_id) | |
b71c18b4 | 302 | |
303 | webpage = self._download_webpage( | |
e3cd1fcd S |
304 | 'http://www.vlive.tv/video/%s/playlist/%s' |
305 | % (video_id, playlist_id), playlist_id) | |
b71c18b4 | 306 | |
e3cd1fcd S |
307 | item_ids = self._parse_json( |
308 | self._search_regex( | |
309 | r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage, | |
310 | 'playlist video seqs'), | |
311 | playlist_id) | |
b71c18b4 | 312 | |
e3cd1fcd S |
313 | entries = [ |
314 | self.url_result( | |
315 | VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(), | |
316 | video_id=compat_str(item_id)) | |
317 | for item_id in item_ids] | |
b71c18b4 | 318 | |
e3cd1fcd S |
319 | playlist_name = self._html_search_regex( |
320 | r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', | |
321 | webpage, 'playlist title', fatal=False) | |
b71c18b4 | 322 | |
e3cd1fcd | 323 | return self.playlist_result(entries, playlist_id, playlist_name) |