]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/vlive.py
Merge 'ytdl-org/youtube-dl/master' release 2020.11.19
[yt-dlp.git] / youtube_dlc / extractor / vlive.py
CommitLineData
061f62da 1# coding: utf-8
25bcd355 2from __future__ import unicode_literals
061f62da 3
b24d6336 4import re
b92d3c53 5import time
6import itertools
8bdd16b4 7import json
9d186afa 8
061f62da 9from .common import InfoExtractor
c88debff 10from .naver import NaverBaseIE
8bdd16b4 11from ..compat import (
12 compat_HTTPError,
13 compat_str,
14)
061f62da 15from ..utils import (
9d186afa 16 ExtractorError,
8bdd16b4 17 int_or_none,
c88debff 18 merge_dicts,
661cc229 19 try_get,
89c63cc5 20 urlencode_postdata,
061f62da 21)
061f62da 22
23
8bdd16b4 24class VLiveBaseIE(NaverBaseIE):
25 _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
26
27
28class VLiveIE(VLiveBaseIE):
061f62da 29 IE_NAME = 'vlive'
8bdd16b4 30 _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
01b517a2 31 _NETRC_MACHINE = 'vlive'
58355a3b 32 _TESTS = [{
8bdd16b4 33 'url': 'http://www.vlive.tv/video/1326',
5dcfd250 34 'md5': 'cc7314812855ce56de70a06a27314983',
35 'info_dict': {
36 'id': '1326',
37 'ext': 'mp4',
8bdd16b4 38 'title': "Girl's Day's Broadcast",
5dcfd250 39 'creator': "Girl's Day",
40 'view_count': int,
41 'uploader_id': 'muploader_a',
42 },
8bdd16b4 43 }, {
44 'url': 'http://www.vlive.tv/video/16937',
58355a3b
S
45 'info_dict': {
46 'id': '16937',
47 'ext': 'mp4',
8bdd16b4 48 'title': '첸백시 걍방',
58355a3b
S
49 'creator': 'EXO',
50 'view_count': int,
51 'subtitles': 'mincount:12',
c88debff 52 'uploader_id': 'muploader_j',
58355a3b
S
53 },
54 'params': {
55 'skip_download': True,
56 },
01b517a2 57 }, {
58 'url': 'https://www.vlive.tv/video/129100',
59 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
60 'info_dict': {
61 'id': '129100',
62 'ext': 'mp4',
4831ef7f
S
63 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
64 'creator': 'BTS+',
01b517a2 65 'view_count': int,
66 'subtitles': 'mincount:10',
67 },
68 'skip': 'This video is only available for CH+ subscribers',
8bdd16b4 69 }, {
70 'url': 'https://www.vlive.tv/embed/1326',
71 'only_matching': True,
58355a3b 72 }]
061f62da 73
01b517a2 74 def _real_initialize(self):
75 self._login()
76
77 def _login(self):
78 email, password = self._get_login_info()
79 if None in (email, password):
80 return
81
82 def is_logged_in():
83 login_info = self._download_json(
84 'https://www.vlive.tv/auth/loginInfo', None,
85 note='Downloading login info',
86 headers={'Referer': 'https://www.vlive.tv/home'})
ef19739e
S
87 return try_get(
88 login_info, lambda x: x['message']['login'], bool) or False
01b517a2 89
90 LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
ef19739e
S
91 self._request_webpage(
92 LOGIN_URL, None, note='Downloading login cookies')
01b517a2 93
94 self._download_webpage(
95 LOGIN_URL, None, note='Logging in',
96 data=urlencode_postdata({'email': email, 'pwd': password}),
97 headers={
98 'Referer': LOGIN_URL,
99 'Content-Type': 'application/x-www-form-urlencoded'
100 })
101
102 if not is_logged_in():
103 raise ExtractorError('Unable to log in', expected=True)
104
8bdd16b4 105 def _call_api(self, path_template, video_id, fields=None):
106 query = {'appId': self._APP_ID}
107 if fields:
108 query['fields'] = fields
109 return self._download_json(
110 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
111 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
112 headers={'Referer': 'https://www.vlive.tv/'}, query=query)
0536e60b 113
8bdd16b4 114 def _real_extract(self, url):
115 video_id = self._match_id(url)
116
117 try:
118 post = self._call_api(
119 'post/v1.0/officialVideoPost-%s', video_id,
120 'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
121 except ExtractorError as e:
122 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
123 self.raise_login_required(json.loads(e.cause.read().decode())['message'])
124 raise
125
126 video = post['officialVideo']
127
128 def get_common_fields():
129 channel = post.get('channel') or {}
130 return {
131 'title': video.get('title'),
132 'creator': post.get('author', {}).get('nickname'),
133 'channel': channel.get('channelName'),
134 'channel_id': channel.get('channelCode'),
135 'duration': int_or_none(video.get('playTime')),
136 'view_count': int_or_none(video.get('playCount')),
137 'like_count': int_or_none(video.get('likeCount')),
138 'comment_count': int_or_none(video.get('commentCount')),
139 }
140
141 video_type = video.get('type')
142 if video_type == 'VOD':
143 inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
144 vod_id = video['vodId']
145 return merge_dicts(
146 get_common_fields(),
147 self._extract_video_info(video_id, vod_id, inkey))
148 elif video_type == 'LIVE':
149 status = video.get('status')
150 if status == 'ON_AIR':
151 stream_url = self._call_api(
152 'old/v3/live/%s/playInfo',
153 video_id)['result']['adaptiveStreamUrl']
154 formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
155 info = get_common_fields()
156 info.update({
157 'title': self._live_title(video['title']),
158 'id': video_id,
159 'formats': formats,
160 'is_live': True,
161 })
162 return info
163 elif status == 'ENDED':
164 raise ExtractorError(
165 'Uploading for replay. Please wait...', expected=True)
166 elif status == 'RESERVED':
0536e60b 167 raise ExtractorError('Coming soon!', expected=True)
8bdd16b4 168 elif video.get('exposeStatus') == 'CANCEL':
169 raise ExtractorError(
170 'We are sorry, but the live broadcast has been canceled.',
171 expected=True)
0536e60b 172 else:
8bdd16b4 173 raise ExtractorError('Unknown status ' + status)
57774807 174
57774807 175
8bdd16b4 176class VLiveChannelIE(VLiveBaseIE):
b92d3c53 177 IE_NAME = 'vlive:channel'
8bdd16b4 178 _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
1923b146 179 _TESTS = [{
8bdd16b4 180 'url': 'http://channels.vlive.tv/FCD4B',
1923b146 181 'info_dict': {
182 'id': 'FCD4B',
183 'title': 'MAMAMOO',
184 },
185 'playlist_mincount': 110
186 }, {
187 'url': 'https://www.vlive.tv/channel/FCD4B',
8bdd16b4 188 'only_matching': True,
1923b146 189 }]
8bdd16b4 190
191 def _call_api(self, path, channel_key_suffix, channel_value, note, query):
192 q = {
193 'app_id': self._APP_ID,
194 'channel' + channel_key_suffix: channel_value,
195 }
196 q.update(query)
197 return self._download_json(
198 'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
199 channel_value, note='Downloading ' + note, query=q)['result']
b92d3c53 200
201 def _real_extract(self, url):
202 channel_code = self._match_id(url)
203
8bdd16b4 204 channel_seq = self._call_api(
205 'decodeChannelCode', 'Code', channel_code,
206 'decode channel code', {})['channelSeq']
b92d3c53 207
b92d3c53 208 channel_name = None
209 entries = []
210
211 for page_num in itertools.count(1):
8bdd16b4 212 video_list = self._call_api(
213 'getChannelVideoList', 'Seq', channel_seq,
214 'channel list page #%d' % page_num, {
f172c86d
S
215 # Large values of maxNumOfRows (~300 or above) may cause
216 # empty responses (see [1]), e.g. this happens for [2] that
217 # has more than 300 videos.
067aa17e 218 # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
f172c86d
S
219 # 2. http://channels.vlive.tv/EDBF.
220 'maxNumOfRows': 100,
b92d3c53 221 'pageNo': page_num
222 }
223 )
b92d3c53 224
661cc229
S
225 if not channel_name:
226 channel_name = try_get(
227 video_list,
8bdd16b4 228 lambda x: x['channelInfo']['channelName'],
661cc229
S
229 compat_str)
230
231 videos = try_get(
8bdd16b4 232 video_list, lambda x: x['videoList'], list)
661cc229 233 if not videos:
b92d3c53 234 break
235
661cc229
S
236 for video in videos:
237 video_id = video.get('videoSeq')
238 if not video_id:
239 continue
240 video_id = compat_str(video_id)
b92d3c53 241 entries.append(
242 self.url_result(
661cc229
S
243 'http://www.vlive.tv/video/%s' % video_id,
244 ie=VLiveIE.ie_key(), video_id=video_id))
b92d3c53 245
246 return self.playlist_result(
247 entries, channel_code, channel_name)
b71c18b4 248
249
8bdd16b4 250# old extractor. Rewrite?
251
252class VLivePlaylistIE(VLiveBaseIE):
b71c18b4 253 IE_NAME = 'vlive:playlist'
254 _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
178663df 255 _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
256 _TESTS = [{
4831ef7f 257 # regular working playlist
178663df 258 'url': 'https://www.vlive.tv/video/117956/playlist/117963',
259 'info_dict': {
260 'id': '117963',
261 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
262 },
263 'playlist_mincount': 10
264 }, {
4831ef7f 265 # playlist with no playlistVideoSeqs
b71c18b4 266 'url': 'http://www.vlive.tv/video/22867/playlist/22912',
267 'info_dict': {
178663df 268 'id': '22867',
269 'ext': 'mp4',
270 'title': '[V LIVE] Valentine Day Message from MINA',
4831ef7f 271 'creator': 'TWICE',
178663df 272 'view_count': int
b71c18b4 273 },
178663df 274 'params': {
275 'skip_download': True,
276 }
277 }]
278
279 def _build_video_result(self, video_id, message):
280 self.to_screen(message)
281 return self.url_result(
282 self._VIDEO_URL_TEMPLATE % video_id,
283 ie=VLiveIE.ie_key(), video_id=video_id)
b71c18b4 284
285 def _real_extract(self, url):
e3cd1fcd
S
286 mobj = re.match(self._VALID_URL, url)
287 video_id, playlist_id = mobj.group('video_id', 'id')
b71c18b4 288
b71c18b4 289 if self._downloader.params.get('noplaylist'):
178663df 290 return self._build_video_result(
291 video_id,
292 'Downloading just video %s because of --no-playlist'
293 % video_id)
b71c18b4 294
295 self.to_screen(
e3cd1fcd
S
296 'Downloading playlist %s - add --no-playlist to just download video'
297 % playlist_id)
b71c18b4 298
299 webpage = self._download_webpage(
e3cd1fcd
S
300 'http://www.vlive.tv/video/%s/playlist/%s'
301 % (video_id, playlist_id), playlist_id)
b71c18b4 302
178663df 303 raw_item_ids = self._search_regex(
304 r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
305 'playlist video seqs', default=None, fatal=False)
306
307 if not raw_item_ids:
308 return self._build_video_result(
309 video_id,
310 'Downloading just video %s because no playlist was found'
311 % video_id)
312
313 item_ids = self._parse_json(raw_item_ids, playlist_id)
b71c18b4 314
e3cd1fcd
S
315 entries = [
316 self.url_result(
178663df 317 self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
e3cd1fcd
S
318 video_id=compat_str(item_id))
319 for item_id in item_ids]
b71c18b4 320
e3cd1fcd
S
321 playlist_name = self._html_search_regex(
322 r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
323 webpage, 'playlist title', fatal=False)
b71c18b4 324
e3cd1fcd 325 return self.playlist_result(entries, playlist_id, playlist_name)