]> jfr.im git - yt-dlp.git/blob - youtube_dlc/extractor/vlive.py
Merge pull request #117 from nixxo/la7-proto-fix
[yt-dlp.git] / youtube_dlc / extractor / vlive.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import time
6 import itertools
7
8 from .common import InfoExtractor
9 from .naver import NaverBaseIE
10 from ..compat import compat_str
11 from ..utils import (
12 ExtractorError,
13 merge_dicts,
14 try_get,
15 urlencode_postdata,
16 )
17
18
19 class VLiveIE(NaverBaseIE):
20 IE_NAME = 'vlive'
21 _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)'
22 _NETRC_MACHINE = 'vlive'
23 _TESTS = [{
24 'url': 'https://www.vlive.tv/video/1326',
25 'md5': 'cc7314812855ce56de70a06a27314983',
26 'info_dict': {
27 'id': '1326',
28 'ext': 'mp4',
29 'title': "[V LIVE] Girl's Day's Broadcast",
30 'creator': "Girl's Day",
31 'view_count': int,
32 'uploader_id': 'muploader_a',
33 },
34 },
35 {
36 'url': 'https://vlive.tv/post/1-18244258',
37 'md5': 'cc7314812855ce56de70a06a27314983',
38 'info_dict': {
39 'id': '1326',
40 'ext': 'mp4',
41 'title': "[V LIVE] Girl's Day's Broadcast",
42 'creator': "Girl's Day",
43 'view_count': int,
44 'uploader_id': 'muploader_a',
45 },
46 },
47 {
48 'url': 'https://www.vlive.tv/video/16937',
49 'info_dict': {
50 'id': '16937',
51 'ext': 'mp4',
52 'title': '[V LIVE] 첸백시 걍방',
53 'creator': 'EXO',
54 'view_count': int,
55 'subtitles': 'mincount:12',
56 'uploader_id': 'muploader_j',
57 },
58 'params': {
59 'skip_download': True,
60 },
61 }, {
62 'url': 'https://www.vlive.tv/video/129100',
63 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
64 'info_dict': {
65 'id': '129100',
66 'ext': 'mp4',
67 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
68 'creator': 'BTS+',
69 'view_count': int,
70 'subtitles': 'mincount:10',
71 },
72 'skip': 'This video is only available for CH+ subscribers',
73 }]
74
75 @classmethod
76 def suitable(cls, url):
77 return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
78
79 def _real_initialize(self):
80 self._login()
81
82 def _login(self):
83 email, password = self._get_login_info()
84 if None in (email, password):
85 return
86
87 def is_logged_in():
88 login_info = self._download_json(
89 'https://www.vlive.tv/auth/loginInfo', None,
90 note='Downloading login info',
91 headers={'Referer': 'https://www.vlive.tv/home'})
92 return try_get(
93 login_info, lambda x: x['message']['login'], bool) or False
94
95 LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
96 self._request_webpage(
97 LOGIN_URL, None, note='Downloading login cookies')
98
99 self._download_webpage(
100 LOGIN_URL, None, note='Logging in',
101 data=urlencode_postdata({'email': email, 'pwd': password}),
102 headers={
103 'Referer': LOGIN_URL,
104 'Content-Type': 'application/x-www-form-urlencoded'
105 })
106
107 if not is_logged_in():
108 raise ExtractorError('Unable to log in', expected=True)
109
110 def _real_extract(self, url):
111 # url may match on a post or a video url with a post_id potentially matching a video_id
112 working_id = self._match_id(url)
113 webpage = self._download_webpage(url, working_id)
114
115 PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>'
116 PARAMS_FIELD = 'params'
117
118 params = self._search_regex(
119 PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL)
120 params = self._parse_json(params, working_id, fatal=False)
121
122 video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict)
123
124 if video_params is None:
125 error = try_get(params, lambda x: x["postDetail"]["error"], dict)
126 error_data = try_get(error, lambda x: x["data"], dict)
127 error_video = try_get(error_data, lambda x: x["officialVideo"], dict)
128 error_msg = try_get(error, lambda x: x["message"], compat_str)
129 product_type = try_get(error_data,
130 [lambda x: x["officialVideo"]["productType"],
131 lambda x: x["board"]["boardType"]],
132 compat_str)
133
134 if error_video is not None:
135 if product_type in ('VLIVE_PLUS', 'VLIVE+'):
136 self.raise_login_required('This video is only available with V LIVE+.')
137 elif error_msg is not None:
138 raise ExtractorError('V LIVE reported the following error: %s' % error_msg)
139 else:
140 raise ExtractorError('Failed to extract video parameters.')
141 elif 'post' in url:
142 raise ExtractorError('Url does not appear to be a video post.', expected=True)
143 else:
144 raise ExtractorError('Failed to extract video parameters.')
145
146 video_id = working_id if 'video' in url else str(video_params["videoSeq"])
147
148 video_type = video_params["type"]
149 if video_type in ('VOD'):
150 encoding_status = video_params["encodingStatus"]
151 if encoding_status == 'COMPLETE':
152 return self._replay(video_id, webpage, params, video_params)
153 else:
154 raise ExtractorError('VOD encoding not yet complete. Please try again later.',
155 expected=True)
156 elif video_type in ('LIVE'):
157 video_status = video_params["status"]
158 if video_status in ('RESERVED'):
159 raise ExtractorError('Coming soon!', expected=True)
160 elif video_status in ('ENDED', 'END'):
161 raise ExtractorError('Uploading for replay. Please wait...', expected=True)
162 else:
163 return self._live(video_id, webpage, params)
164 else:
165 raise ExtractorError('Unknown video type %s' % video_type)
166
167 def _get_common_fields(self, webpage, params):
168 title = self._og_search_title(webpage)
169 description = self._html_search_meta(
170 ['og:description', 'description', 'twitter:description'],
171 webpage, 'description', default=None)
172 creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
173 or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
174 thumbnail = self._og_search_thumbnail(webpage)
175 return {
176 'title': title,
177 'creator': creator,
178 'thumbnail': thumbnail,
179 }
180
181 def _live(self, video_id, webpage, params):
182 LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id
183 play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id,
184 headers={"referer": "https://www.vlive.tv"})
185
186 streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or []
187
188 formats = []
189 for stream in streams:
190 formats.extend(self._extract_m3u8_formats(
191 stream['serviceUrl'], video_id, 'mp4',
192 fatal=False, live=True))
193 self._sort_formats(formats)
194
195 info = self._get_common_fields(webpage, params)
196 info.update({
197 'title': self._live_title(info['title']),
198 'id': video_id,
199 'formats': formats,
200 'is_live': True,
201 })
202 return info
203
204 def _replay(self, video_id, webpage, params, video_params):
205 long_video_id = video_params["vodId"]
206
207 VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id
208 key_json = self._download_json(VOD_KEY_ENDPOINT, video_id,
209 headers={"referer": "https://www.vlive.tv"})
210 key = key_json["inkey"]
211
212 return merge_dicts(
213 self._get_common_fields(webpage, params),
214 self._extract_video_info(video_id, long_video_id, key))
215
216
217 class VLiveChannelIE(InfoExtractor):
218 IE_NAME = 'vlive:channel'
219 _VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)'
220 _TESTS = [{
221 'url': 'https://channels.vlive.tv/FCD4B',
222 'info_dict': {
223 'id': 'FCD4B',
224 'title': 'MAMAMOO',
225 },
226 'playlist_mincount': 110
227 }, {
228 'url': 'https://www.vlive.tv/channel/FCD4B',
229 'info_dict': {
230 'id': 'FCD4B',
231 'title': 'MAMAMOO',
232 },
233 'playlist_mincount': 110
234 }]
235 _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
236
237 def _real_extract(self, url):
238 channel_code = self._match_id(url)
239
240 webpage = self._download_webpage(
241 'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
242
243 app_id = None
244
245 app_js_url = self._search_regex(
246 r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
247 webpage, 'app js', default=None, group='url')
248
249 if app_js_url:
250 app_js = self._download_webpage(
251 app_js_url, channel_code, 'Downloading app JS', fatal=False)
252 if app_js:
253 app_id = self._search_regex(
254 r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
255 app_js, 'app id', default=None)
256
257 app_id = app_id or self._APP_ID
258
259 channel_info = self._download_json(
260 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
261 channel_code, note='Downloading decode channel code',
262 query={
263 'app_id': app_id,
264 'channelCode': channel_code,
265 '_': int(time.time())
266 })
267
268 channel_seq = channel_info['result']['channelSeq']
269 channel_name = None
270 entries = []
271
272 for page_num in itertools.count(1):
273 video_list = self._download_json(
274 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
275 channel_code, note='Downloading channel list page #%d' % page_num,
276 query={
277 'app_id': app_id,
278 'channelSeq': channel_seq,
279 # Large values of maxNumOfRows (~300 or above) may cause
280 # empty responses (see [1]), e.g. this happens for [2] that
281 # has more than 300 videos.
282 # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
283 # 2. http://channels.vlive.tv/EDBF.
284 'maxNumOfRows': 100,
285 '_': int(time.time()),
286 'pageNo': page_num
287 }
288 )
289
290 if not channel_name:
291 channel_name = try_get(
292 video_list,
293 lambda x: x['result']['channelInfo']['channelName'],
294 compat_str)
295
296 videos = try_get(
297 video_list, lambda x: x['result']['videoList'], list)
298 if not videos:
299 break
300
301 for video in videos:
302 video_id = video.get('videoSeq')
303 if not video_id:
304 continue
305 video_id = compat_str(video_id)
306 entries.append(
307 self.url_result(
308 'http://www.vlive.tv/video/%s' % video_id,
309 ie=VLiveIE.ie_key(), video_id=video_id))
310
311 return self.playlist_result(
312 entries, channel_code, channel_name)
313
314
315 class VLivePlaylistIE(InfoExtractor):
316 IE_NAME = 'vlive:playlist'
317 _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
318 _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
319 _TESTS = [{
320 # regular working playlist
321 'url': 'https://www.vlive.tv/video/117956/playlist/117963',
322 'info_dict': {
323 'id': '117963',
324 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
325 },
326 'playlist_mincount': 10
327 }, {
328 # playlist with no playlistVideoSeqs
329 'url': 'http://www.vlive.tv/video/22867/playlist/22912',
330 'info_dict': {
331 'id': '22867',
332 'ext': 'mp4',
333 'title': '[V LIVE] Valentine Day Message from MINA',
334 'creator': 'TWICE',
335 'view_count': int
336 },
337 'params': {
338 'skip_download': True,
339 }
340 }]
341
342 def _build_video_result(self, video_id, message):
343 self.to_screen(message)
344 return self.url_result(
345 self._VIDEO_URL_TEMPLATE % video_id,
346 ie=VLiveIE.ie_key(), video_id=video_id)
347
348 def _real_extract(self, url):
349 mobj = re.match(self._VALID_URL, url)
350 video_id, playlist_id = mobj.group('video_id', 'id')
351
352 if self._downloader.params.get('noplaylist'):
353 return self._build_video_result(
354 video_id,
355 'Downloading just video %s because of --no-playlist'
356 % video_id)
357
358 self.to_screen(
359 'Downloading playlist %s - add --no-playlist to just download video'
360 % playlist_id)
361
362 webpage = self._download_webpage(
363 'http://www.vlive.tv/video/%s/playlist/%s'
364 % (video_id, playlist_id), playlist_id)
365
366 raw_item_ids = self._search_regex(
367 r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
368 'playlist video seqs', default=None, fatal=False)
369
370 if not raw_item_ids:
371 return self._build_video_result(
372 video_id,
373 'Downloading just video %s because no playlist was found'
374 % video_id)
375
376 item_ids = self._parse_json(raw_item_ids, playlist_id)
377
378 entries = [
379 self.url_result(
380 self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
381 video_id=compat_str(item_id))
382 for item_id in item_ids]
383
384 playlist_name = self._html_search_regex(
385 r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
386 webpage, 'playlist title', fatal=False)
387
388 return self.playlist_result(entries, playlist_id, playlist_name)