]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/vlive.py
[VLive] Add upload_date and thumbnail (#1486)
[yt-dlp.git] / yt_dlp / extractor / vlive.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5 import json
6
7 from .naver import NaverBaseIE
8 from ..compat import (
9 compat_HTTPError,
10 compat_str,
11 )
12 from ..utils import (
13 ExtractorError,
14 int_or_none,
15 merge_dicts,
16 str_or_none,
17 strip_or_none,
18 try_get,
19 urlencode_postdata,
20 url_or_none,
21 )
22
23
24 class VLiveBaseIE(NaverBaseIE):
25 _NETRC_MACHINE = 'vlive'
26 _logged_in = False
27
28 def _real_initialize(self):
29 if not self._logged_in:
30 VLiveBaseIE._logged_in = self._login()
31
32 def _login(self):
33 email, password = self._get_login_info()
34 if email is None:
35 return False
36
37 LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
38 self._request_webpage(
39 LOGIN_URL, None, note='Downloading login cookies')
40
41 self._download_webpage(
42 LOGIN_URL, None, note='Logging in',
43 data=urlencode_postdata({'email': email, 'pwd': password}),
44 headers={
45 'Referer': LOGIN_URL,
46 'Content-Type': 'application/x-www-form-urlencoded'
47 })
48
49 login_info = self._download_json(
50 'https://www.vlive.tv/auth/loginInfo', None,
51 note='Checking login status',
52 headers={'Referer': 'https://www.vlive.tv/home'})
53
54 if not try_get(login_info, lambda x: x['message']['login'], bool):
55 raise ExtractorError('Unable to log in', expected=True)
56 return True
57
58 def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None):
59 if note is None:
60 note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0]
61 query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'}
62 if fields:
63 query['fields'] = fields
64 if query_add:
65 query.update(query_add)
66 try:
67 return self._download_json(
68 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
69 note, headers={'Referer': 'https://www.vlive.tv/'}, query=query)
70 except ExtractorError as e:
71 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
72 self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
73 raise
74
75
76 class VLiveIE(VLiveBaseIE):
77 IE_NAME = 'vlive'
78 _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
79 _TESTS = [{
80 'url': 'http://www.vlive.tv/video/1326',
81 'md5': 'cc7314812855ce56de70a06a27314983',
82 'info_dict': {
83 'id': '1326',
84 'ext': 'mp4',
85 'title': "Girl's Day's Broadcast",
86 'creator': "Girl's Day",
87 'view_count': int,
88 'uploader_id': 'muploader_a',
89 'upload_date': '20150817',
90 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
91 'timestamp': 1439816449,
92 },
93 'params': {
94 'skip_download': True,
95 },
96 }, {
97 'url': 'http://www.vlive.tv/video/16937',
98 'info_dict': {
99 'id': '16937',
100 'ext': 'mp4',
101 'title': '첸백시 걍방',
102 'creator': 'EXO',
103 'view_count': int,
104 'subtitles': 'mincount:12',
105 'uploader_id': 'muploader_j',
106 'upload_date': '20161112',
107 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
108 'timestamp': 1478923074,
109 },
110 'params': {
111 'skip_download': True,
112 },
113 }, {
114 'url': 'https://www.vlive.tv/video/129100',
115 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
116 'info_dict': {
117 'id': '129100',
118 'ext': 'mp4',
119 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
120 'creator': 'BTS+',
121 'view_count': int,
122 'subtitles': 'mincount:10',
123 },
124 'skip': 'This video is only available for CH+ subscribers',
125 }, {
126 'url': 'https://www.vlive.tv/embed/1326',
127 'only_matching': True,
128 }, {
129 # works only with gcc=KR
130 'url': 'https://www.vlive.tv/video/225019',
131 'only_matching': True,
132 }, {
133 'url': 'https://www.vlive.tv/video/223906',
134 'info_dict': {
135 'id': '58',
136 'title': 'RUN BTS!'
137 },
138 'playlist_mincount': 120
139 }]
140
141 def _real_extract(self, url):
142 video_id = self._match_id(url)
143
144 post = self._call_api(
145 'post/v1.0/officialVideoPost-%s', video_id,
146 'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId},playlist{playlistSeq,totalCount,name}')
147
148 playlist = post.get('playlist')
149 if not playlist or self.get_param('noplaylist'):
150 if playlist:
151 self.to_screen(
152 'Downloading just video %s because of --no-playlist'
153 % video_id)
154
155 video = post['officialVideo']
156 return self._get_vlive_info(post, video, video_id)
157 else:
158 playlist_name = playlist.get('name')
159 playlist_id = str_or_none(playlist.get('playlistSeq'))
160 playlist_count = str_or_none(playlist.get('totalCount'))
161
162 playlist = self._call_api(
163 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count})
164
165 entries = []
166 for video_data in playlist['data']:
167 video = video_data.get('officialVideo')
168 video_id = str_or_none(video.get('videoSeq'))
169 entries.append(self._get_vlive_info(video_data, video, video_id))
170
171 return self.playlist_result(entries, playlist_id, playlist_name)
172
173 def _get_vlive_info(self, post, video, video_id):
174 def get_common_fields():
175 channel = post.get('channel') or {}
176 return {
177 'title': video.get('title'),
178 'creator': post.get('author', {}).get('nickname'),
179 'channel': channel.get('channelName'),
180 'channel_id': channel.get('channelCode'),
181 'duration': int_or_none(video.get('playTime')),
182 'view_count': int_or_none(video.get('playCount')),
183 'like_count': int_or_none(video.get('likeCount')),
184 'comment_count': int_or_none(video.get('commentCount')),
185 'timestamp': int_or_none(video.get('createdAt'), scale=1000),
186 'thumbnail': video.get('thumb'),
187 }
188
189 video_type = video.get('type')
190 if video_type == 'VOD':
191 inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
192 vod_id = video['vodId']
193 info_dict = merge_dicts(
194 get_common_fields(),
195 self._extract_video_info(video_id, vod_id, inkey))
196 thumbnail = video.get('thumb')
197 if thumbnail:
198 if not info_dict.get('thumbnails') and info_dict.get('thumbnail'):
199 info_dict['thumbnails'] = [{'url': info_dict.pop('thumbnail')}]
200 info_dict.setdefault('thumbnails', []).append({'url': thumbnail, 'preference': 1})
201 return info_dict
202 elif video_type == 'LIVE':
203 status = video.get('status')
204 if status == 'ON_AIR':
205 stream_url = self._call_api(
206 'old/v3/live/%s/playInfo',
207 video_id)['result']['adaptiveStreamUrl']
208 formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
209 self._sort_formats(formats)
210 info = get_common_fields()
211 info.update({
212 'title': self._live_title(video['title']),
213 'id': video_id,
214 'formats': formats,
215 'is_live': True,
216 })
217 return info
218 elif status == 'ENDED':
219 raise ExtractorError(
220 'Uploading for replay. Please wait...', expected=True)
221 elif status == 'RESERVED':
222 raise ExtractorError('Coming soon!', expected=True)
223 elif video.get('exposeStatus') == 'CANCEL':
224 raise ExtractorError(
225 'We are sorry, but the live broadcast has been canceled.',
226 expected=True)
227 else:
228 raise ExtractorError('Unknown status ' + status)
229
230
231 class VLivePostIE(VLiveBaseIE):
232 IE_NAME = 'vlive:post'
233 _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
234 _TESTS = [{
235 # uploadType = SOS
236 'url': 'https://www.vlive.tv/post/1-20088044',
237 'info_dict': {
238 'id': '1-20088044',
239 'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
240 'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
241 },
242 'playlist_count': 3,
243 }, {
244 # uploadType = V
245 'url': 'https://www.vlive.tv/post/1-20087926',
246 'info_dict': {
247 'id': '1-20087926',
248 'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
249 },
250 'playlist_count': 1,
251 }]
252 _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
253
254 def _real_extract(self, url):
255 post_id = self._match_id(url)
256
257 post = self._call_api(
258 'post/v1.0/post-%s', post_id,
259 'attachments{video},officialVideo{videoSeq},plainBody,title')
260
261 video_seq = str_or_none(try_get(
262 post, lambda x: x['officialVideo']['videoSeq']))
263 if video_seq:
264 return self.url_result(
265 'http://www.vlive.tv/video/' + video_seq,
266 VLiveIE.ie_key(), video_seq)
267
268 title = post['title']
269 entries = []
270 for idx, video in enumerate(post['attachments']['video'].values()):
271 video_id = video.get('videoId')
272 if not video_id:
273 continue
274 upload_type = video.get('uploadType')
275 upload_info = video.get('uploadInfo') or {}
276 entry = None
277 if upload_type == 'SOS':
278 download = self._call_api(
279 self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download']
280 formats = []
281 for f_id, f_url in download.items():
282 formats.append({
283 'format_id': f_id,
284 'url': f_url,
285 'height': int_or_none(f_id[:-1]),
286 })
287 self._sort_formats(formats)
288 entry = {
289 'formats': formats,
290 'id': video_id,
291 'thumbnail': upload_info.get('imageUrl'),
292 }
293 elif upload_type == 'V':
294 vod_id = upload_info.get('videoId')
295 if not vod_id:
296 continue
297 inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey']
298 entry = self._extract_video_info(video_id, vod_id, inkey)
299 if entry:
300 entry['title'] = '%s_part%s' % (title, idx)
301 entries.append(entry)
302 return self.playlist_result(
303 entries, post_id, title, strip_or_none(post.get('plainBody')))
304
305
306 class VLiveChannelIE(VLiveBaseIE):
307 IE_NAME = 'vlive:channel'
308 _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<channel_id>[0-9A-Z]+)(?:/board/(?P<posts_id>\d+))?'
309 _TESTS = [{
310 'url': 'http://channels.vlive.tv/FCD4B',
311 'info_dict': {
312 'id': 'FCD4B',
313 'title': 'MAMAMOO',
314 },
315 'playlist_mincount': 110
316 }, {
317 'url': 'https://www.vlive.tv/channel/FCD4B',
318 'only_matching': True,
319 }, {
320 'url': 'https://www.vlive.tv/channel/FCD4B/board/3546',
321 'info_dict': {
322 'id': 'FCD4B-3546',
323 'title': 'MAMAMOO - Star Board',
324 },
325 'playlist_mincount': 880
326 }]
327
328 def _entries(self, posts_id, board_name):
329 if board_name:
330 posts_path = 'post/v1.0/board-%s/posts'
331 query_add = {'limit': 100, 'sortType': 'LATEST'}
332 else:
333 posts_path = 'post/v1.0/channel-%s/starPosts'
334 query_add = {'limit': 100}
335
336 for page_num in itertools.count(1):
337 video_list = self._call_api(
338 posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add,
339 note=f'Downloading playlist page {page_num}')
340
341 for video in try_get(video_list, lambda x: x['data'], list) or []:
342 video_id = str(video.get('postId'))
343 video_title = str_or_none(video.get('title'))
344 video_url = url_or_none(video.get('url'))
345 if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO':
346 continue
347 channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str)
348 yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name)
349
350 after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str)
351 if not after:
352 break
353 query_add['after'] = after
354
355 def _real_extract(self, url):
356 channel_id, posts_id = self._match_valid_url(url).groups()
357
358 board_name = None
359 if posts_id:
360 board = self._call_api(
361 'board/v1.0/board-%s', posts_id, 'title,boardType')
362 board_name = board.get('title') or 'Unknown'
363 if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'):
364 raise ExtractorError(f'Board {board_name!r} is not supported', expected=True)
365
366 entries = self._entries(posts_id or channel_id, board_name)
367 first_video = next(entries)
368 channel_name = first_video['channel']
369
370 return self.playlist_result(
371 itertools.chain([first_video], entries),
372 f'{channel_id}-{posts_id}' if posts_id else channel_id,
373 f'{channel_name} - {board_name}' if channel_name and board_name else channel_name)