]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vk.py
[cleanup] Misc cleanup
[yt-dlp.git] / yt_dlp / extractor / vk.py
CommitLineData
51815886 1import collections
60d142aa 2import re
60d142aa
JMF
3
4from .common import InfoExtractor
059cd768 5from ..compat import compat_urlparse
60d142aa 6from ..utils import (
2d19fb50 7 clean_html,
9032dc28 8 ExtractorError,
2d19fb50 9 get_element_by_class,
bf4b3b6b 10 int_or_none,
1cc79574 11 orderedSet,
ad1bc71a 12 str_or_none,
8117df4c 13 str_to_int,
60d142aa 14 unescapeHTML,
a7ee8a00 15 unified_timestamp,
3052a30d 16 url_or_none,
6e6bc8da 17 urlencode_postdata,
1cc79574 18)
e3845525 19from .dailymotion import DailymotionIE
3c989818 20from .odnoklassniki import OdnoklassnikiIE
c4737bea 21from .pladform import PladformIE
e3845525 22from .vimeo import VimeoIE
5113b691 23from .youtube import YoutubeIE
60d142aa
JMF
24
25
2d19fb50
S
26class VKBaseIE(InfoExtractor):
27 _NETRC_MACHINE = 'vk'
28
52efa4b3 29 def _perform_login(self, username, password):
2d19fb50
S
30 login_page, url_handle = self._download_webpage_handle(
31 'https://vk.com', None, 'Downloading login page')
32
33 login_form = self._hidden_inputs(login_page)
34
35 login_form.update({
36 'email': username.encode('cp1251'),
37 'pass': password.encode('cp1251'),
38 })
39
e3c1266f
S
40 # vk serves two same remixlhk cookies in Set-Cookie header and expects
41 # first one to be actually set
42 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
2d19fb50
S
43
44 login_page = self._download_webpage(
f0ffaa16 45 'https://vk.com/login', None,
e4d95865 46 note='Logging in',
2d19fb50
S
47 data=urlencode_postdata(login_form))
48
49 if re.search(r'onLoginFailed', login_page):
50 raise ExtractorError(
51 'Unable to login, incorrect username and/or password', expected=True)
52
3c989818
RA
53 def _download_payload(self, path, video_id, data, fatal=True):
54 data['al'] = 1
55 code, payload = self._download_json(
56 'https://vk.com/%s.php' % path, video_id,
57 data=urlencode_postdata(data), fatal=fatal,
58 headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
59 if code == '3':
60 self.raise_login_required()
61 elif code == '8':
62 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
63 return payload
64
2d19fb50
S
65
66class VKIE(VKBaseIE):
1ecb5d1d
S
67 IE_NAME = 'vk'
68 IE_DESC = 'VK'
cf9cf7dd
S
69 _VALID_URL = r'''(?x)
70 https?://
71 (?:
04e88ca2 72 (?:
bdafd88d 73 (?:(?:m|new)\.)?vk\.com/video_|
04e88ca2 74 (?:www\.)?daxab.com/
75 )
76 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
cf9cf7dd 77 (?:
21df2117 78 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
04e88ca2 79 (?:www\.)?daxab.com/embed/
cf9cf7dd 80 )
af3cbd87 81 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
cf9cf7dd
S
82 )
83 '''
9032dc28
S
84 _TESTS = [
85 {
86 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
09f934b0 87 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
9032dc28 88 'info_dict': {
220828f2 89 'id': '-77521_162222515',
09f934b0 90 'ext': 'mp4',
9032dc28 91 'title': 'ProtivoGunz - Хуёвая песня',
36300346 92 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
ad1bc71a 93 'uploader_id': '-77521',
9032dc28 94 'duration': 195,
ad1bc71a 95 'timestamp': 1329049880,
42e1ff86 96 'upload_date': '20120212',
9032dc28 97 },
60d142aa 98 },
9032dc28 99 {
c52331f3 100 'url': 'http://vk.com/video205387401_165548505',
9032dc28 101 'info_dict': {
220828f2 102 'id': '205387401_165548505',
9032dc28 103 'ext': 'mp4',
c52331f3 104 'title': 'No name',
ad1bc71a
RA
105 'uploader': 'Tom Cruise',
106 'uploader_id': '205387401',
c52331f3 107 'duration': 9,
ad1bc71a
RA
108 'timestamp': 1374364108,
109 'upload_date': '20130720',
9032dc28
S
110 }
111 },
ca97a56e
S
112 {
113 'note': 'Embedded video',
3c989818
RA
114 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
115 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
ca97a56e 116 'info_dict': {
3c989818 117 'id': '-77521_162222515',
ca97a56e 118 'ext': 'mp4',
3c989818
RA
119 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
120 'title': 'ProtivoGunz - Хуёвая песня',
121 'duration': 195,
122 'upload_date': '20120212',
123 'timestamp': 1329049880,
124 'uploader_id': '-77521',
04e88ca2 125 },
ca97a56e 126 },
9032dc28 127 {
c52331f3
WS
128 # VIDEO NOW REMOVED
129 # please update if you find a video whose URL follows the same pattern
9032dc28
S
130 'url': 'http://vk.com/video-8871596_164049491',
131 'md5': 'a590bcaf3d543576c9bd162812387666',
132 'note': 'Only available for registered users',
133 'info_dict': {
220828f2 134 'id': '-8871596_164049491',
9032dc28
S
135 'ext': 'mp4',
136 'uploader': 'Триллеры',
57bdc730 137 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
9032dc28 138 'duration': 8352,
8117df4c
S
139 'upload_date': '20121218',
140 'view_count': int,
9032dc28 141 },
3c989818 142 'skip': 'Removed',
ca97a56e 143 },
57bdc730
S
144 {
145 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
57bdc730 146 'info_dict': {
220828f2 147 'id': '-43215063_168067957',
57bdc730 148 'ext': 'mp4',
3c989818 149 'uploader': 'Bro Mazter',
57bdc730
S
150 'title': ' ',
151 'duration': 7291,
42e1ff86 152 'upload_date': '20140328',
3c989818
RA
153 'uploader_id': '223413403',
154 'timestamp': 1396018030,
57bdc730
S
155 },
156 'skip': 'Requires vk account credentials',
157 },
849086a1
S
158 {
159 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
160 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
161 'note': 'ivi.ru embed',
162 'info_dict': {
220828f2 163 'id': '-43215063_169084319',
849086a1
S
164 'ext': 'mp4',
165 'title': 'Книга Илая',
166 'duration': 6771,
42e1ff86 167 'upload_date': '20140626',
8117df4c 168 'view_count': int,
849086a1 169 },
3c989818 170 'skip': 'Removed',
849086a1 171 },
af3cbd87 172 {
173 'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
174 'info_dict': {
175 'id': '-93049196_456239755',
176 'ext': 'mp4',
177 'title': '8 серия (озвучка)',
178 'duration': 8383,
179 'upload_date': '20211222',
180 'view_count': int,
181 },
182 },
79913fde
S
183 {
184 # video (removed?) only available with list id
185 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
186 'md5': '091287af5402239a1051c37ec7b92913',
187 'info_dict': {
220828f2 188 'id': '30481095_171201961',
79913fde
S
189 'ext': 'mp4',
190 'title': 'ТюменцевВВ_09.07.2015',
191 'uploader': 'Anton Ivanov',
192 'duration': 109,
193 'upload_date': '20150709',
194 'view_count': int,
195 },
a7ee8a00 196 'skip': 'Removed',
79913fde 197 },
9281f6d2
S
198 {
199 # youtube embed
200 'url': 'https://vk.com/video276849682_170681728',
201 'info_dict': {
202 'id': 'V3K4mi0SYkc',
220828f2 203 'ext': 'mp4',
9281f6d2 204 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
ad1bc71a 205 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
220828f2 206 'duration': 178,
9281f6d2 207 'upload_date': '20130116',
ad1bc71a 208 'uploader': "Children's Joy Foundation Inc.",
9281f6d2
S
209 'uploader_id': 'thecjf',
210 'view_count': int,
211 },
212 },
e3845525
KM
213 {
214 # dailymotion embed
215 'url': 'https://vk.com/video-37468416_456239855',
216 'info_dict': {
217 'id': 'k3lz2cmXyRuJQSjGHUv',
218 'ext': 'mp4',
219 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
5ef62fc4 220 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
e3845525
KM
221 'uploader': 'AniLibria.Tv',
222 'upload_date': '20160914',
223 'uploader_id': 'x1p5vl5',
224 'timestamp': 1473877246,
225 },
226 'params': {
227 'skip_download': True,
93aa0b63 228 },
e3845525 229 },
bf4b3b6b
S
230 {
231 # video key is extra_data not url\d+
232 'url': 'http://vk.com/video-110305615_171782105',
233 'md5': 'e13fcda136f99764872e739d13fac1d1',
234 'info_dict': {
220828f2 235 'id': '-110305615_171782105',
bf4b3b6b
S
236 'ext': 'mp4',
237 'title': 'S-Dance, репетиции к The way show',
238 'uploader': 'THE WAY SHOW | 17 апреля',
ad1bc71a
RA
239 'uploader_id': '-110305615',
240 'timestamp': 1454859345,
bf4b3b6b 241 'upload_date': '20160207',
ad1bc71a
RA
242 },
243 'params': {
244 'skip_download': True,
bf4b3b6b
S
245 },
246 },
93aa0b63 247 {
424ed37e 248 # finished live stream, postlive_mp4
93aa0b63 249 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
93aa0b63 250 'info_dict': {
220828f2 251 'id': '-387766_456242764',
93aa0b63 252 'ext': 'mp4',
220828f2 253 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
93aa0b63
S
254 'uploader': 'Игромания',
255 'duration': 5239,
220828f2
RA
256 # TODO: use act=show to extract view_count
257 # 'view_count': int,
258 'upload_date': '20160929',
259 'uploader_id': '-387766',
260 'timestamp': 1475137527,
93aa0b63 261 },
3c989818
RA
262 'params': {
263 'skip_download': True,
264 },
93aa0b63 265 },
475f8a45 266 {
424ed37e 267 # live stream, hls and rtmp links, most likely already finished live
475f8a45
S
268 # stream by the time you are reading this comment
269 'url': 'https://vk.com/video-140332_456239111',
270 'only_matching': True,
271 },
a8363f3a
PH
272 {
273 # removed video, just testing that we match the pattern
274 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
275 'only_matching': True,
276 },
e58066e2
S
277 {
278 # age restricted video, requires vk account credentials
279 'url': 'https://vk.com/video205387401_164765225',
280 'only_matching': True,
281 },
a5e52a1f
S
282 {
283 # pladform embed
284 'url': 'https://vk.com/video-76116461_171554880',
285 'only_matching': True,
bdafd88d
S
286 },
287 {
288 'url': 'http://new.vk.com/video205387401_165548505',
289 'only_matching': True,
643dc0fc
CP
290 },
291 {
292 # This video is no longer available, because its author has been blocked.
293 'url': 'https://vk.com/video-10639516_456240611',
294 'only_matching': True,
a640c4d2 295 },
296 {
297 # The video is not available in your region.
298 'url': 'https://vk.com/video-51812607_171445436',
299 'only_matching': True,
21df2117 300 },
301 {
302 'url': 'https://vk.com/clip30014565_456240946',
303 'only_matching': True,
a640c4d2 304 }]
9032dc28 305
b73612a2 306 @staticmethod
307 def _extract_sibnet_urls(webpage):
308 # https://help.sibnet.ru/?sibnet_video_embed
309 return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
310 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
311 webpage)]
312
60d142aa 313 def _real_extract(self, url):
5ad28e7f 314 mobj = self._match_valid_url(url)
ca97a56e
S
315 video_id = mobj.group('videoid')
316
3c989818 317 mv_data = {}
04e88ca2 318 if video_id:
3c989818
RA
319 data = {
320 'act': 'show_inline',
321 'video': video_id,
322 }
04e88ca2 323 # Some videos (removed?) can only be downloaded with list id specified
324 list_id = mobj.group('list_id')
325 if list_id:
3c989818
RA
326 data['list'] = list_id
327
328 payload = self._download_payload('al_video', video_id, data)
329 info_page = payload[1]
330 opts = payload[-1]
331 mv_data = opts.get('mvData') or {}
332 player = opts.get('player') or {}
04e88ca2 333 else:
ca97a56e 334 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
9032dc28 335
3c989818
RA
336 info_page = self._download_webpage(
337 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
9032dc28 338
3c989818
RA
339 error_message = self._html_search_regex(
340 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
341 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
342 info_page, 'error message', default=None)
343 if error_message:
344 raise ExtractorError(error_message, expected=True)
ee48b6a8 345
3c989818
RA
346 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
347 raise ExtractorError(
348 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
349 expected=True)
7f220b2f 350
3c989818 351 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
1d1d60f6 352
3c989818
RA
353 ERRORS = {
354 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
355 ERROR_COPYRIGHT,
1d1d60f6 356
3c989818
RA
357 r'>The video .*? was removed from public access by request of the copyright holder.<':
358 ERROR_COPYRIGHT,
3d36cea4 359
3c989818
RA
360 r'<!>Please log in or <':
361 'Video %s is only available for registered users, '
362 'use --username and --password options to provide account credentials.',
3d36cea4 363
3c989818
RA
364 r'<!>Unknown error':
365 'Video %s does not exist.',
1aa5172f 366
3c989818
RA
367 r'<!>Видео временно недоступно':
368 'Video %s is temporarily unavailable.',
d919fa33 369
3c989818
RA
370 r'<!>Access denied':
371 'Access denied to video %s.',
643dc0fc 372
3c989818
RA
373 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
374 'Video %s is no longer available, because its author has been blocked.',
643dc0fc 375
3c989818
RA
376 r'<!>This video is no longer available, because its author has been blocked.':
377 'Video %s is no longer available, because its author has been blocked.',
ad1bc71a 378
3c989818
RA
379 r'<!>This video is no longer available, because it has been deleted.':
380 'Video %s is no longer available, because it has been deleted.',
a640c4d2 381
3c989818
RA
382 r'<!>The video .+? is not available in your region.':
383 'Video %s is not available in your region.',
384 }
385
386 for error_re, error_msg in ERRORS.items():
387 if re.search(error_re, info_page):
388 raise ExtractorError(error_msg % video_id, expected=True)
9032dc28 389
3c989818
RA
390 player = self._parse_json(self._search_regex(
391 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
392 info_page, 'player params'), video_id)
9334f8f1 393
5113b691 394 youtube_url = YoutubeIE._extract_url(info_page)
46478456 395 if youtube_url:
3c989818 396 return self.url_result(youtube_url, YoutubeIE.ie_key())
849086a1 397
09b9c45e 398 vimeo_url = VimeoIE._extract_url(url, info_page)
84663361 399 if vimeo_url is not None:
3c989818 400 return self.url_result(vimeo_url, VimeoIE.ie_key())
84663361 401
c4737bea
S
402 pladform_url = PladformIE._extract_url(info_page)
403 if pladform_url:
3c989818 404 return self.url_result(pladform_url, PladformIE.ie_key())
c4737bea 405
7a1818c9 406 m_rutube = re.search(
35972ba1 407 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
7a1818c9 408 if m_rutube is not None:
7a1818c9
PH
409 rutube_url = self._proto_relative_url(
410 m_rutube.group(1).replace('\\', ''))
411 return self.url_result(rutube_url)
412
e3845525
KM
413 dailymotion_urls = DailymotionIE._extract_urls(info_page)
414 if dailymotion_urls:
415 return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
416
3c989818
RA
417 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
418 if odnoklassniki_url:
419 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
420
b73612a2 421 sibnet_urls = self._extract_sibnet_urls(info_page)
422 if sibnet_urls:
423 return self.url_result(sibnet_urls[0])
424
054932f4 425 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
849086a1 426 if m_opts:
054932f4 427 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
849086a1
S
428 if m_opts_url:
429 opts_url = m_opts_url.group(1)
430 if opts_url.startswith('//'):
431 opts_url = 'http:' + opts_url
432 return self.url_result(opts_url)
433
3c989818 434 data = player['params'][0]
475f8a45
S
435 title = unescapeHTML(data['md_title'])
436
424ed37e
S
437 # 2 = live
438 # 3 = post live (finished live)
9cdb0a33 439 is_live = data.get('live') == 2
475f8a45 440
a7ee8a00 441 timestamp = unified_timestamp(self._html_search_regex(
70d7b323 442 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
ad1bc71a 443 'upload date', default=None)) or int_or_none(data.get('date'))
3aa3953d 444
70d7b323
S
445 view_count = str_to_int(self._search_regex(
446 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
498a8a4c 447 info_page, 'view count', default=None))
8117df4c 448
bf4b3b6b 449 formats = []
475f8a45 450 for format_id, format_url in data.items():
3052a30d
S
451 format_url = url_or_none(format_url)
452 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
bf4b3b6b 453 continue
3089bc74
S
454 if (format_id.startswith(('url', 'cache'))
455 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
475f8a45
S
456 height = int_or_none(self._search_regex(
457 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
458 formats.append({
459 'format_id': format_id,
460 'url': format_url,
461 'height': height,
462 })
463 elif format_id == 'hls':
464 formats.extend(self._extract_m3u8_formats(
fb4fc449 465 format_url, video_id, 'mp4', 'm3u8_native',
9cdb0a33 466 m3u8_id=format_id, fatal=False, live=is_live))
475f8a45
S
467 elif format_id == 'rtmp':
468 formats.append({
469 'format_id': format_id,
470 'url': format_url,
471 'ext': 'flv',
472 })
913f3292
PH
473 self._sort_formats(formats)
474
5b6cb562 475 subtitles = {}
476 for sub in data.get('subs') or {}:
477 subtitles.setdefault(sub.get('lang', 'en'), []).append({
478 'ext': sub.get('title', '.srt').split('.')[-1],
479 'url': url_or_none(sub.get('url')),
480 })
481
60d142aa 482 return {
220828f2 483 'id': video_id,
913f3292 484 'formats': formats,
475f8a45 485 'title': title,
913f3292
PH
486 'thumbnail': data.get('jpg'),
487 'uploader': data.get('md_author'),
3c989818
RA
488 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
489 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
a7ee8a00 490 'timestamp': timestamp,
8117df4c 491 'view_count': view_count,
3c989818
RA
492 'like_count': int_or_none(mv_data.get('likes')),
493 'comment_count': int_or_none(mv_data.get('commcount')),
9cdb0a33 494 'is_live': is_live,
5b6cb562 495 'subtitles': subtitles,
60d142aa 496 }
469d4c89
WS
497
498
2d19fb50 499class VKUserVideosIE(VKBaseIE):
1ecb5d1d
S
500 IE_NAME = 'vk:uservideos'
501 IE_DESC = "VK - User's Videos"
a70b71e8 502 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/@(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
469d4c89 503 _TEMPLATE_URL = 'https://vk.com/videos'
dc786d3d 504 _TESTS = [{
a70b71e8 505 'url': 'https://vk.com/video/@mobidevices',
0e6ec3ca 506 'info_dict': {
a70b71e8 507 'id': '-17892518_all',
0e6ec3ca 508 },
a70b71e8 509 'playlist_mincount': 1355,
0e6ec3ca 510 }, {
a70b71e8 511 'url': 'https://vk.com/video/@mobidevices?section=uploaded',
15ec6693 512 'info_dict': {
a70b71e8 513 'id': '-17892518_uploaded',
15ec6693 514 },
a70b71e8 515 'playlist_mincount': 182,
dc786d3d 516 }]
0e6ec3ca 517 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
dc786d3d 518
a70b71e8
AG
519 def _entries(self, page_id, section):
520 video_list_json = self._download_payload('al_video', page_id, {
3c989818 521 'act': 'load_videos_silent',
a70b71e8 522 'offset': 0,
3c989818 523 'oid': page_id,
0e6ec3ca 524 'section': section,
a70b71e8
AG
525 })[0][section]
526 count = video_list_json['count']
527 total = video_list_json['total']
528 video_list = video_list_json['list']
529
530 while True:
531 for video in video_list:
532 v = self._VIDEO._make(video[:2])
533 video_id = '%d_%d' % (v.owner_id, v.id)
534 yield self.url_result(
535 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
536 if count >= total:
537 break
538 video_list_json = self._download_payload('al_video', page_id, {
539 'act': 'load_videos_silent',
540 'offset': count,
541 'oid': page_id,
542 'section': section,
543 })[0][section]
544 count += video_list_json['count']
545 video_list = video_list_json['list']
0e6ec3ca
RA
546
547 def _real_extract(self, url):
a70b71e8
AG
548 u_id, section = self._match_valid_url(url).groups()
549 webpage = self._download_webpage(url, u_id)
550 page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
0e6ec3ca
RA
551 if not section:
552 section = 'all'
553
a70b71e8 554 return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section))
2d19fb50
S
555
556
557class VKWallPostIE(VKBaseIE):
558 IE_NAME = 'vk:wallpost'
559 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
560 _TESTS = [{
561 # public page URL, audio playlist
562 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
563 'info_dict': {
3c989818
RA
564 'id': '-23538238_35',
565 'title': 'Black Shadow - Wall post -23538238_35',
2d19fb50
S
566 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
567 },
568 'playlist': [{
569 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
570 'info_dict': {
571 'id': '135220665_111806521',
3c989818 572 'ext': 'mp4',
2d19fb50
S
573 'title': 'Black Shadow - Слепое Верование',
574 'duration': 370,
575 'uploader': 'Black Shadow',
576 'artist': 'Black Shadow',
577 'track': 'Слепое Верование',
578 },
579 }, {
580 'md5': '4cc7e804579122b17ea95af7834c9233',
581 'info_dict': {
582 'id': '135220665_111802303',
3c989818 583 'ext': 'mp4',
2d19fb50
S
584 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
585 'duration': 423,
586 'uploader': 'Black Shadow',
587 'artist': 'Black Shadow',
588 'track': 'Война - Негасимое Бездны Пламя!',
589 },
2d19fb50 590 }],
51815886 591 'params': {
3c989818 592 'skip_download': True,
51815886 593 },
2d19fb50
S
594 'skip': 'Requires vk account credentials',
595 }, {
596 # single YouTube embed, no leading -
597 'url': 'https://vk.com/wall85155021_6319',
598 'info_dict': {
599 'id': '85155021_6319',
3c989818 600 'title': 'Сергей Горбунов - Wall post 85155021_6319',
2d19fb50
S
601 },
602 'playlist_count': 1,
603 'skip': 'Requires vk account credentials',
604 }, {
605 # wall page URL
606 'url': 'https://vk.com/wall-23538238_35',
607 'only_matching': True,
608 }, {
609 # mobile wall page URL
610 'url': 'https://m.vk.com/wall-23538238_35',
611 'only_matching': True,
612 }]
3c989818 613 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
0e6ec3ca 614 _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
3c989818
RA
615
616 def _decode(self, enc):
617 dec = ''
618 e = n = 0
619 for c in enc:
620 r = self._BASE64_CHARS.index(c)
621 cond = n % 4
622 e = 64 * e + r if cond else r
623 n += 1
624 if cond:
625 dec += chr(255 & e >> (-2 * n & 6))
626 return dec
627
628 def _unmask_url(self, mask_url, vk_id):
629 if 'audio_api_unavailable' in mask_url:
630 extra = mask_url.split('?extra=')[1].split('#')
631 func, base = self._decode(extra[1]).split(chr(11))
3c989818
RA
632 mask_url = list(self._decode(extra[0]))
633 url_len = len(mask_url)
634 indexes = [None] * url_len
635 index = int(base) ^ vk_id
636 for n in range(url_len - 1, -1, -1):
637 index = (url_len * (n + 1) ^ index + n) % url_len
638 indexes[n] = index
639 for n in range(1, url_len):
640 c = mask_url[n]
641 index = indexes[url_len - 1 - n]
642 mask_url[n] = mask_url[index]
643 mask_url[index] = c
644 mask_url = ''.join(mask_url)
645 return mask_url
2d19fb50
S
646
647 def _real_extract(self, url):
648 post_id = self._match_id(url)
649
3c989818
RA
650 webpage = self._download_payload('wkview', post_id, {
651 'act': 'show',
652 'w': 'wall' + post_id,
653 })[1]
2d19fb50
S
654
655 description = clean_html(get_element_by_class('wall_post_text', webpage))
51815886 656 uploader = clean_html(get_element_by_class('author', webpage))
2d19fb50
S
657
658 entries = []
659
3c989818
RA
660 for audio in re.findall(r'data-audio="([^"]+)', webpage):
661 audio = self._parse_json(unescapeHTML(audio), post_id)
0e6ec3ca 662 a = self._AUDIO._make(audio[:16])
3c989818
RA
663 if not a.url:
664 continue
665 title = unescapeHTML(a.title)
7e70620a 666 performer = unescapeHTML(a.performer)
3c989818
RA
667 entries.append({
668 'id': '%s_%s' % (a.owner_id, a.id),
669 'url': self._unmask_url(a.url, a.ads['vk_id']),
7e70620a
RA
670 'title': '%s - %s' % (performer, title) if performer else title,
671 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
672 'duration': int_or_none(a.duration),
3c989818 673 'uploader': uploader,
7e70620a 674 'artist': performer,
3c989818
RA
675 'track': title,
676 'ext': 'mp4',
079a7cfc 677 'protocol': 'm3u8_native',
3c989818 678 })
2d19fb50
S
679
680 for video in re.finditer(
681 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
682 entries.append(self.url_result(
683 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
684
685 title = 'Wall post %s' % post_id
686
687 return self.playlist_result(
688 orderedSet(entries), post_id,
689 '%s - %s' % (uploader, title) if uploader else title,
690 description)