]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vk.py
Tolerate failure to `--write-link` due to unknown URL
[yt-dlp.git] / yt_dlp / extractor / vk.py
CommitLineData
dcdb292f 1# coding: utf-8
94a23d2a
PH
2from __future__ import unicode_literals
3
51815886 4import collections
60d142aa 5import re
60d142aa
JMF
6
7from .common import InfoExtractor
059cd768 8from ..compat import compat_urlparse
60d142aa 9from ..utils import (
2d19fb50 10 clean_html,
9032dc28 11 ExtractorError,
2d19fb50 12 get_element_by_class,
bf4b3b6b 13 int_or_none,
1cc79574 14 orderedSet,
ad1bc71a 15 str_or_none,
8117df4c 16 str_to_int,
60d142aa 17 unescapeHTML,
a7ee8a00 18 unified_timestamp,
3052a30d 19 url_or_none,
6e6bc8da 20 urlencode_postdata,
1cc79574 21)
e3845525 22from .dailymotion import DailymotionIE
3c989818 23from .odnoklassniki import OdnoklassnikiIE
c4737bea 24from .pladform import PladformIE
e3845525 25from .vimeo import VimeoIE
5113b691 26from .youtube import YoutubeIE
60d142aa
JMF
27
28
2d19fb50
S
29class VKBaseIE(InfoExtractor):
30 _NETRC_MACHINE = 'vk'
31
32 def _login(self):
68217024 33 username, password = self._get_login_info()
2d19fb50
S
34 if username is None:
35 return
36
37 login_page, url_handle = self._download_webpage_handle(
38 'https://vk.com', None, 'Downloading login page')
39
40 login_form = self._hidden_inputs(login_page)
41
42 login_form.update({
43 'email': username.encode('cp1251'),
44 'pass': password.encode('cp1251'),
45 })
46
e3c1266f
S
47 # vk serves two same remixlhk cookies in Set-Cookie header and expects
48 # first one to be actually set
49 self._apply_first_set_cookie_header(url_handle, 'remixlhk')
2d19fb50
S
50
51 login_page = self._download_webpage(
f0ffaa16 52 'https://vk.com/login', None,
e4d95865 53 note='Logging in',
2d19fb50
S
54 data=urlencode_postdata(login_form))
55
56 if re.search(r'onLoginFailed', login_page):
57 raise ExtractorError(
58 'Unable to login, incorrect username and/or password', expected=True)
59
60 def _real_initialize(self):
61 self._login()
62
3c989818
RA
63 def _download_payload(self, path, video_id, data, fatal=True):
64 data['al'] = 1
65 code, payload = self._download_json(
66 'https://vk.com/%s.php' % path, video_id,
67 data=urlencode_postdata(data), fatal=fatal,
68 headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
69 if code == '3':
70 self.raise_login_required()
71 elif code == '8':
72 raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
73 return payload
74
2d19fb50
S
75
76class VKIE(VKBaseIE):
1ecb5d1d
S
77 IE_NAME = 'vk'
78 IE_DESC = 'VK'
cf9cf7dd
S
79 _VALID_URL = r'''(?x)
80 https?://
81 (?:
04e88ca2 82 (?:
bdafd88d 83 (?:(?:m|new)\.)?vk\.com/video_|
04e88ca2 84 (?:www\.)?daxab.com/
85 )
86 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
cf9cf7dd 87 (?:
21df2117 88 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
04e88ca2 89 (?:www\.)?daxab.com/embed/
cf9cf7dd 90 )
af3cbd87 91 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
cf9cf7dd
S
92 )
93 '''
9032dc28
S
94 _TESTS = [
95 {
96 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
09f934b0 97 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
9032dc28 98 'info_dict': {
220828f2 99 'id': '-77521_162222515',
09f934b0 100 'ext': 'mp4',
9032dc28 101 'title': 'ProtivoGunz - Хуёвая песня',
36300346 102 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
ad1bc71a 103 'uploader_id': '-77521',
9032dc28 104 'duration': 195,
ad1bc71a 105 'timestamp': 1329049880,
42e1ff86 106 'upload_date': '20120212',
9032dc28 107 },
60d142aa 108 },
9032dc28 109 {
c52331f3 110 'url': 'http://vk.com/video205387401_165548505',
9032dc28 111 'info_dict': {
220828f2 112 'id': '205387401_165548505',
9032dc28 113 'ext': 'mp4',
c52331f3 114 'title': 'No name',
ad1bc71a
RA
115 'uploader': 'Tom Cruise',
116 'uploader_id': '205387401',
c52331f3 117 'duration': 9,
ad1bc71a
RA
118 'timestamp': 1374364108,
119 'upload_date': '20130720',
9032dc28
S
120 }
121 },
ca97a56e
S
122 {
123 'note': 'Embedded video',
3c989818
RA
124 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
125 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
ca97a56e 126 'info_dict': {
3c989818 127 'id': '-77521_162222515',
ca97a56e 128 'ext': 'mp4',
3c989818
RA
129 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
130 'title': 'ProtivoGunz - Хуёвая песня',
131 'duration': 195,
132 'upload_date': '20120212',
133 'timestamp': 1329049880,
134 'uploader_id': '-77521',
04e88ca2 135 },
ca97a56e 136 },
9032dc28 137 {
c52331f3
WS
138 # VIDEO NOW REMOVED
139 # please update if you find a video whose URL follows the same pattern
9032dc28
S
140 'url': 'http://vk.com/video-8871596_164049491',
141 'md5': 'a590bcaf3d543576c9bd162812387666',
142 'note': 'Only available for registered users',
143 'info_dict': {
220828f2 144 'id': '-8871596_164049491',
9032dc28
S
145 'ext': 'mp4',
146 'uploader': 'Триллеры',
57bdc730 147 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
9032dc28 148 'duration': 8352,
8117df4c
S
149 'upload_date': '20121218',
150 'view_count': int,
9032dc28 151 },
3c989818 152 'skip': 'Removed',
ca97a56e 153 },
57bdc730
S
154 {
155 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
57bdc730 156 'info_dict': {
220828f2 157 'id': '-43215063_168067957',
57bdc730 158 'ext': 'mp4',
3c989818 159 'uploader': 'Bro Mazter',
57bdc730
S
160 'title': ' ',
161 'duration': 7291,
42e1ff86 162 'upload_date': '20140328',
3c989818
RA
163 'uploader_id': '223413403',
164 'timestamp': 1396018030,
57bdc730
S
165 },
166 'skip': 'Requires vk account credentials',
167 },
849086a1
S
168 {
169 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
170 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
171 'note': 'ivi.ru embed',
172 'info_dict': {
220828f2 173 'id': '-43215063_169084319',
849086a1
S
174 'ext': 'mp4',
175 'title': 'Книга Илая',
176 'duration': 6771,
42e1ff86 177 'upload_date': '20140626',
8117df4c 178 'view_count': int,
849086a1 179 },
3c989818 180 'skip': 'Removed',
849086a1 181 },
af3cbd87 182 {
183 'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
184 'info_dict': {
185 'id': '-93049196_456239755',
186 'ext': 'mp4',
187 'title': '8 серия (озвучка)',
188 'duration': 8383,
189 'upload_date': '20211222',
190 'view_count': int,
191 },
192 },
79913fde
S
193 {
194 # video (removed?) only available with list id
195 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
196 'md5': '091287af5402239a1051c37ec7b92913',
197 'info_dict': {
220828f2 198 'id': '30481095_171201961',
79913fde
S
199 'ext': 'mp4',
200 'title': 'ТюменцевВВ_09.07.2015',
201 'uploader': 'Anton Ivanov',
202 'duration': 109,
203 'upload_date': '20150709',
204 'view_count': int,
205 },
a7ee8a00 206 'skip': 'Removed',
79913fde 207 },
9281f6d2
S
208 {
209 # youtube embed
210 'url': 'https://vk.com/video276849682_170681728',
211 'info_dict': {
212 'id': 'V3K4mi0SYkc',
220828f2 213 'ext': 'mp4',
9281f6d2 214 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
ad1bc71a 215 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
220828f2 216 'duration': 178,
9281f6d2 217 'upload_date': '20130116',
ad1bc71a 218 'uploader': "Children's Joy Foundation Inc.",
9281f6d2
S
219 'uploader_id': 'thecjf',
220 'view_count': int,
221 },
222 },
e3845525
KM
223 {
224 # dailymotion embed
225 'url': 'https://vk.com/video-37468416_456239855',
226 'info_dict': {
227 'id': 'k3lz2cmXyRuJQSjGHUv',
228 'ext': 'mp4',
229 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
5ef62fc4 230 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
e3845525
KM
231 'uploader': 'AniLibria.Tv',
232 'upload_date': '20160914',
233 'uploader_id': 'x1p5vl5',
234 'timestamp': 1473877246,
235 },
236 'params': {
237 'skip_download': True,
93aa0b63 238 },
e3845525 239 },
bf4b3b6b
S
240 {
241 # video key is extra_data not url\d+
242 'url': 'http://vk.com/video-110305615_171782105',
243 'md5': 'e13fcda136f99764872e739d13fac1d1',
244 'info_dict': {
220828f2 245 'id': '-110305615_171782105',
bf4b3b6b
S
246 'ext': 'mp4',
247 'title': 'S-Dance, репетиции к The way show',
248 'uploader': 'THE WAY SHOW | 17 апреля',
ad1bc71a
RA
249 'uploader_id': '-110305615',
250 'timestamp': 1454859345,
bf4b3b6b 251 'upload_date': '20160207',
ad1bc71a
RA
252 },
253 'params': {
254 'skip_download': True,
bf4b3b6b
S
255 },
256 },
93aa0b63 257 {
424ed37e 258 # finished live stream, postlive_mp4
93aa0b63 259 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
93aa0b63 260 'info_dict': {
220828f2 261 'id': '-387766_456242764',
93aa0b63 262 'ext': 'mp4',
220828f2 263 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
93aa0b63
S
264 'uploader': 'Игромания',
265 'duration': 5239,
220828f2
RA
266 # TODO: use act=show to extract view_count
267 # 'view_count': int,
268 'upload_date': '20160929',
269 'uploader_id': '-387766',
270 'timestamp': 1475137527,
93aa0b63 271 },
3c989818
RA
272 'params': {
273 'skip_download': True,
274 },
93aa0b63 275 },
475f8a45 276 {
424ed37e 277 # live stream, hls and rtmp links, most likely already finished live
475f8a45
S
278 # stream by the time you are reading this comment
279 'url': 'https://vk.com/video-140332_456239111',
280 'only_matching': True,
281 },
a8363f3a
PH
282 {
283 # removed video, just testing that we match the pattern
284 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
285 'only_matching': True,
286 },
e58066e2
S
287 {
288 # age restricted video, requires vk account credentials
289 'url': 'https://vk.com/video205387401_164765225',
290 'only_matching': True,
291 },
a5e52a1f
S
292 {
293 # pladform embed
294 'url': 'https://vk.com/video-76116461_171554880',
295 'only_matching': True,
bdafd88d
S
296 },
297 {
298 'url': 'http://new.vk.com/video205387401_165548505',
299 'only_matching': True,
643dc0fc
CP
300 },
301 {
302 # This video is no longer available, because its author has been blocked.
303 'url': 'https://vk.com/video-10639516_456240611',
304 'only_matching': True,
a640c4d2 305 },
306 {
307 # The video is not available in your region.
308 'url': 'https://vk.com/video-51812607_171445436',
309 'only_matching': True,
21df2117 310 },
311 {
312 'url': 'https://vk.com/clip30014565_456240946',
313 'only_matching': True,
a640c4d2 314 }]
9032dc28 315
b73612a2 316 @staticmethod
317 def _extract_sibnet_urls(webpage):
318 # https://help.sibnet.ru/?sibnet_video_embed
319 return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
320 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
321 webpage)]
322
60d142aa 323 def _real_extract(self, url):
5ad28e7f 324 mobj = self._match_valid_url(url)
ca97a56e
S
325 video_id = mobj.group('videoid')
326
3c989818 327 mv_data = {}
04e88ca2 328 if video_id:
3c989818
RA
329 data = {
330 'act': 'show_inline',
331 'video': video_id,
332 }
04e88ca2 333 # Some videos (removed?) can only be downloaded with list id specified
334 list_id = mobj.group('list_id')
335 if list_id:
3c989818
RA
336 data['list'] = list_id
337
338 payload = self._download_payload('al_video', video_id, data)
339 info_page = payload[1]
340 opts = payload[-1]
341 mv_data = opts.get('mvData') or {}
342 player = opts.get('player') or {}
04e88ca2 343 else:
ca97a56e 344 video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
9032dc28 345
3c989818
RA
346 info_page = self._download_webpage(
347 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
9032dc28 348
3c989818
RA
349 error_message = self._html_search_regex(
350 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
351 r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
352 info_page, 'error message', default=None)
353 if error_message:
354 raise ExtractorError(error_message, expected=True)
ee48b6a8 355
3c989818
RA
356 if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
357 raise ExtractorError(
358 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
359 expected=True)
7f220b2f 360
3c989818 361 ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
1d1d60f6 362
3c989818
RA
363 ERRORS = {
364 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
365 ERROR_COPYRIGHT,
1d1d60f6 366
3c989818
RA
367 r'>The video .*? was removed from public access by request of the copyright holder.<':
368 ERROR_COPYRIGHT,
3d36cea4 369
3c989818
RA
370 r'<!>Please log in or <':
371 'Video %s is only available for registered users, '
372 'use --username and --password options to provide account credentials.',
3d36cea4 373
3c989818
RA
374 r'<!>Unknown error':
375 'Video %s does not exist.',
1aa5172f 376
3c989818
RA
377 r'<!>Видео временно недоступно':
378 'Video %s is temporarily unavailable.',
d919fa33 379
3c989818
RA
380 r'<!>Access denied':
381 'Access denied to video %s.',
643dc0fc 382
3c989818
RA
383 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
384 'Video %s is no longer available, because its author has been blocked.',
643dc0fc 385
3c989818
RA
386 r'<!>This video is no longer available, because its author has been blocked.':
387 'Video %s is no longer available, because its author has been blocked.',
ad1bc71a 388
3c989818
RA
389 r'<!>This video is no longer available, because it has been deleted.':
390 'Video %s is no longer available, because it has been deleted.',
a640c4d2 391
3c989818
RA
392 r'<!>The video .+? is not available in your region.':
393 'Video %s is not available in your region.',
394 }
395
396 for error_re, error_msg in ERRORS.items():
397 if re.search(error_re, info_page):
398 raise ExtractorError(error_msg % video_id, expected=True)
9032dc28 399
3c989818
RA
400 player = self._parse_json(self._search_regex(
401 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
402 info_page, 'player params'), video_id)
9334f8f1 403
5113b691 404 youtube_url = YoutubeIE._extract_url(info_page)
46478456 405 if youtube_url:
3c989818 406 return self.url_result(youtube_url, YoutubeIE.ie_key())
849086a1 407
09b9c45e 408 vimeo_url = VimeoIE._extract_url(url, info_page)
84663361 409 if vimeo_url is not None:
3c989818 410 return self.url_result(vimeo_url, VimeoIE.ie_key())
84663361 411
c4737bea
S
412 pladform_url = PladformIE._extract_url(info_page)
413 if pladform_url:
3c989818 414 return self.url_result(pladform_url, PladformIE.ie_key())
c4737bea 415
7a1818c9 416 m_rutube = re.search(
35972ba1 417 r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
7a1818c9 418 if m_rutube is not None:
7a1818c9
PH
419 rutube_url = self._proto_relative_url(
420 m_rutube.group(1).replace('\\', ''))
421 return self.url_result(rutube_url)
422
e3845525
KM
423 dailymotion_urls = DailymotionIE._extract_urls(info_page)
424 if dailymotion_urls:
425 return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
426
3c989818
RA
427 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
428 if odnoklassniki_url:
429 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
430
b73612a2 431 sibnet_urls = self._extract_sibnet_urls(info_page)
432 if sibnet_urls:
433 return self.url_result(sibnet_urls[0])
434
054932f4 435 m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
849086a1 436 if m_opts:
054932f4 437 m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
849086a1
S
438 if m_opts_url:
439 opts_url = m_opts_url.group(1)
440 if opts_url.startswith('//'):
441 opts_url = 'http:' + opts_url
442 return self.url_result(opts_url)
443
3c989818 444 data = player['params'][0]
475f8a45
S
445 title = unescapeHTML(data['md_title'])
446
424ed37e
S
447 # 2 = live
448 # 3 = post live (finished live)
9cdb0a33 449 is_live = data.get('live') == 2
475f8a45 450
a7ee8a00 451 timestamp = unified_timestamp(self._html_search_regex(
70d7b323 452 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
ad1bc71a 453 'upload date', default=None)) or int_or_none(data.get('date'))
3aa3953d 454
70d7b323
S
455 view_count = str_to_int(self._search_regex(
456 r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
498a8a4c 457 info_page, 'view count', default=None))
8117df4c 458
bf4b3b6b 459 formats = []
475f8a45 460 for format_id, format_url in data.items():
3052a30d
S
461 format_url = url_or_none(format_url)
462 if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
bf4b3b6b 463 continue
3089bc74
S
464 if (format_id.startswith(('url', 'cache'))
465 or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
475f8a45
S
466 height = int_or_none(self._search_regex(
467 r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
468 formats.append({
469 'format_id': format_id,
470 'url': format_url,
471 'height': height,
472 })
473 elif format_id == 'hls':
474 formats.extend(self._extract_m3u8_formats(
fb4fc449 475 format_url, video_id, 'mp4', 'm3u8_native',
9cdb0a33 476 m3u8_id=format_id, fatal=False, live=is_live))
475f8a45
S
477 elif format_id == 'rtmp':
478 formats.append({
479 'format_id': format_id,
480 'url': format_url,
481 'ext': 'flv',
482 })
913f3292
PH
483 self._sort_formats(formats)
484
5b6cb562 485 subtitles = {}
486 for sub in data.get('subs') or {}:
487 subtitles.setdefault(sub.get('lang', 'en'), []).append({
488 'ext': sub.get('title', '.srt').split('.')[-1],
489 'url': url_or_none(sub.get('url')),
490 })
491
60d142aa 492 return {
220828f2 493 'id': video_id,
913f3292 494 'formats': formats,
475f8a45 495 'title': title,
913f3292
PH
496 'thumbnail': data.get('jpg'),
497 'uploader': data.get('md_author'),
3c989818
RA
498 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
499 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
a7ee8a00 500 'timestamp': timestamp,
8117df4c 501 'view_count': view_count,
3c989818
RA
502 'like_count': int_or_none(mv_data.get('likes')),
503 'comment_count': int_or_none(mv_data.get('commcount')),
9cdb0a33 504 'is_live': is_live,
5b6cb562 505 'subtitles': subtitles,
60d142aa 506 }
469d4c89
WS
507
508
2d19fb50 509class VKUserVideosIE(VKBaseIE):
1ecb5d1d
S
510 IE_NAME = 'vk:uservideos'
511 IE_DESC = "VK - User's Videos"
a70b71e8 512 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/@(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
469d4c89 513 _TEMPLATE_URL = 'https://vk.com/videos'
dc786d3d 514 _TESTS = [{
a70b71e8 515 'url': 'https://vk.com/video/@mobidevices',
0e6ec3ca 516 'info_dict': {
a70b71e8 517 'id': '-17892518_all',
0e6ec3ca 518 },
a70b71e8 519 'playlist_mincount': 1355,
0e6ec3ca 520 }, {
a70b71e8 521 'url': 'https://vk.com/video/@mobidevices?section=uploaded',
15ec6693 522 'info_dict': {
a70b71e8 523 'id': '-17892518_uploaded',
15ec6693 524 },
a70b71e8 525 'playlist_mincount': 182,
dc786d3d 526 }]
0e6ec3ca 527 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
dc786d3d 528
a70b71e8
AG
529 def _entries(self, page_id, section):
530 video_list_json = self._download_payload('al_video', page_id, {
3c989818 531 'act': 'load_videos_silent',
a70b71e8 532 'offset': 0,
3c989818 533 'oid': page_id,
0e6ec3ca 534 'section': section,
a70b71e8
AG
535 })[0][section]
536 count = video_list_json['count']
537 total = video_list_json['total']
538 video_list = video_list_json['list']
539
540 while True:
541 for video in video_list:
542 v = self._VIDEO._make(video[:2])
543 video_id = '%d_%d' % (v.owner_id, v.id)
544 yield self.url_result(
545 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
546 if count >= total:
547 break
548 video_list_json = self._download_payload('al_video', page_id, {
549 'act': 'load_videos_silent',
550 'offset': count,
551 'oid': page_id,
552 'section': section,
553 })[0][section]
554 count += video_list_json['count']
555 video_list = video_list_json['list']
0e6ec3ca
RA
556
557 def _real_extract(self, url):
a70b71e8
AG
558 u_id, section = self._match_valid_url(url).groups()
559 webpage = self._download_webpage(url, u_id)
560 page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
0e6ec3ca
RA
561 if not section:
562 section = 'all'
563
a70b71e8 564 return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section))
2d19fb50
S
565
566
567class VKWallPostIE(VKBaseIE):
568 IE_NAME = 'vk:wallpost'
569 _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
570 _TESTS = [{
571 # public page URL, audio playlist
572 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
573 'info_dict': {
3c989818
RA
574 'id': '-23538238_35',
575 'title': 'Black Shadow - Wall post -23538238_35',
2d19fb50
S
576 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
577 },
578 'playlist': [{
579 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
580 'info_dict': {
581 'id': '135220665_111806521',
3c989818 582 'ext': 'mp4',
2d19fb50
S
583 'title': 'Black Shadow - Слепое Верование',
584 'duration': 370,
585 'uploader': 'Black Shadow',
586 'artist': 'Black Shadow',
587 'track': 'Слепое Верование',
588 },
589 }, {
590 'md5': '4cc7e804579122b17ea95af7834c9233',
591 'info_dict': {
592 'id': '135220665_111802303',
3c989818 593 'ext': 'mp4',
2d19fb50
S
594 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
595 'duration': 423,
596 'uploader': 'Black Shadow',
597 'artist': 'Black Shadow',
598 'track': 'Война - Негасимое Бездны Пламя!',
599 },
2d19fb50 600 }],
51815886 601 'params': {
3c989818 602 'skip_download': True,
51815886
S
603 'usenetrc': True,
604 },
2d19fb50
S
605 'skip': 'Requires vk account credentials',
606 }, {
607 # single YouTube embed, no leading -
608 'url': 'https://vk.com/wall85155021_6319',
609 'info_dict': {
610 'id': '85155021_6319',
3c989818 611 'title': 'Сергей Горбунов - Wall post 85155021_6319',
2d19fb50
S
612 },
613 'playlist_count': 1,
51815886
S
614 'params': {
615 'usenetrc': True,
616 },
2d19fb50
S
617 'skip': 'Requires vk account credentials',
618 }, {
619 # wall page URL
620 'url': 'https://vk.com/wall-23538238_35',
621 'only_matching': True,
622 }, {
623 # mobile wall page URL
624 'url': 'https://m.vk.com/wall-23538238_35',
625 'only_matching': True,
626 }]
3c989818 627 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
0e6ec3ca 628 _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
3c989818
RA
629
630 def _decode(self, enc):
631 dec = ''
632 e = n = 0
633 for c in enc:
634 r = self._BASE64_CHARS.index(c)
635 cond = n % 4
636 e = 64 * e + r if cond else r
637 n += 1
638 if cond:
639 dec += chr(255 & e >> (-2 * n & 6))
640 return dec
641
642 def _unmask_url(self, mask_url, vk_id):
643 if 'audio_api_unavailable' in mask_url:
644 extra = mask_url.split('?extra=')[1].split('#')
645 func, base = self._decode(extra[1]).split(chr(11))
3c989818
RA
646 mask_url = list(self._decode(extra[0]))
647 url_len = len(mask_url)
648 indexes = [None] * url_len
649 index = int(base) ^ vk_id
650 for n in range(url_len - 1, -1, -1):
651 index = (url_len * (n + 1) ^ index + n) % url_len
652 indexes[n] = index
653 for n in range(1, url_len):
654 c = mask_url[n]
655 index = indexes[url_len - 1 - n]
656 mask_url[n] = mask_url[index]
657 mask_url[index] = c
658 mask_url = ''.join(mask_url)
659 return mask_url
2d19fb50
S
660
661 def _real_extract(self, url):
662 post_id = self._match_id(url)
663
3c989818
RA
664 webpage = self._download_payload('wkview', post_id, {
665 'act': 'show',
666 'w': 'wall' + post_id,
667 })[1]
2d19fb50
S
668
669 description = clean_html(get_element_by_class('wall_post_text', webpage))
51815886 670 uploader = clean_html(get_element_by_class('author', webpage))
2d19fb50
S
671
672 entries = []
673
3c989818
RA
674 for audio in re.findall(r'data-audio="([^"]+)', webpage):
675 audio = self._parse_json(unescapeHTML(audio), post_id)
0e6ec3ca 676 a = self._AUDIO._make(audio[:16])
3c989818
RA
677 if not a.url:
678 continue
679 title = unescapeHTML(a.title)
7e70620a 680 performer = unescapeHTML(a.performer)
3c989818
RA
681 entries.append({
682 'id': '%s_%s' % (a.owner_id, a.id),
683 'url': self._unmask_url(a.url, a.ads['vk_id']),
7e70620a
RA
684 'title': '%s - %s' % (performer, title) if performer else title,
685 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
686 'duration': int_or_none(a.duration),
3c989818 687 'uploader': uploader,
7e70620a 688 'artist': performer,
3c989818
RA
689 'track': title,
690 'ext': 'mp4',
079a7cfc 691 'protocol': 'm3u8_native',
3c989818 692 })
2d19fb50
S
693
694 for video in re.finditer(
695 r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
696 entries.append(self.url_result(
697 compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
698
699 title = 'Wall post %s' % post_id
700
701 return self.playlist_result(
702 orderedSet(entries), post_id,
703 '%s - %s' % (uploader, title) if uploader else title,
704 description)