4 from .common
import InfoExtractor
5 from ..compat
import compat_urlparse
19 from .dailymotion
import DailymotionIE
20 from .odnoklassniki
import OdnoklassnikiIE
21 from .pladform
import PladformIE
22 from .vimeo
import VimeoIE
23 from .youtube
import YoutubeIE
26 class VKBaseIE(InfoExtractor
):
29 def _perform_login(self
, username
, password
):
30 login_page
, url_handle
= self
._download
_webpage
_handle
(
31 'https://vk.com', None, 'Downloading login page')
33 login_form
= self
._hidden
_inputs
(login_page
)
36 'email': username
.encode('cp1251'),
37 'pass': password
.encode('cp1251'),
40 # vk serves two same remixlhk cookies in Set-Cookie header and expects
41 # first one to be actually set
42 self
._apply
_first
_set
_cookie
_header
(url_handle
, 'remixlhk')
44 login_page
= self
._download
_webpage
(
45 'https://vk.com/login', None,
47 data
=urlencode_postdata(login_form
))
49 if re
.search(r
'onLoginFailed', login_page
):
51 'Unable to login, incorrect username and/or password', expected
=True)
53 def _download_payload(self
, path
, video_id
, data
, fatal
=True):
55 code
, payload
= self
._download
_json
(
56 'https://vk.com/%s.php' % path
, video_id
,
57 data
=urlencode_postdata(data
), fatal
=fatal
,
58 headers
={'X-Requested-With': 'XMLHttpRequest'}
)['payload']
60 self
.raise_login_required()
62 raise ExtractorError(clean_html(payload
[0][1:-1]), expected
=True)
73 (?:(?:m|new)\.)?vk\.com/video_|
76 ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
78 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
79 (?:www\.)?daxab.com/embed/
81 (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
86 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
87 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
89 'id': '-77521_162222515',
91 'title': 'ProtivoGunz - Хуёвая песня',
92 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
93 'uploader_id': '-77521',
95 'timestamp': 1329049880,
96 'upload_date': '20120212',
100 'url': 'http://vk.com/video205387401_165548505',
102 'id': '205387401_165548505',
105 'uploader': 'Tom Cruise',
106 'uploader_id': '205387401',
108 'timestamp': 1374364108,
109 'upload_date': '20130720',
113 'note': 'Embedded video',
114 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
115 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
117 'id': '-77521_162222515',
119 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
120 'title': 'ProtivoGunz - Хуёвая песня',
122 'upload_date': '20120212',
123 'timestamp': 1329049880,
124 'uploader_id': '-77521',
129 # please update if you find a video whose URL follows the same pattern
130 'url': 'http://vk.com/video-8871596_164049491',
131 'md5': 'a590bcaf3d543576c9bd162812387666',
132 'note': 'Only available for registered users',
134 'id': '-8871596_164049491',
136 'uploader': 'Триллеры',
137 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
139 'upload_date': '20121218',
145 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
147 'id': '-43215063_168067957',
149 'uploader': 'Bro Mazter',
152 'upload_date': '20140328',
153 'uploader_id': '223413403',
154 'timestamp': 1396018030,
156 'skip': 'Requires vk account credentials',
159 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
160 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
161 'note': 'ivi.ru embed',
163 'id': '-43215063_169084319',
165 'title': 'Книга Илая',
167 'upload_date': '20140626',
173 'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
175 'id': '-93049196_456239755',
177 'title': '8 серия (озвучка)',
179 'upload_date': '20211222',
184 # video (removed?) only available with list id
185 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
186 'md5': '091287af5402239a1051c37ec7b92913',
188 'id': '30481095_171201961',
190 'title': 'ТюменцевВВ_09.07.2015',
191 'uploader': 'Anton Ivanov',
193 'upload_date': '20150709',
200 'url': 'https://vk.com/video276849682_170681728',
204 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
205 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
207 'upload_date': '20130116',
208 'uploader': "Children's Joy Foundation Inc.",
209 'uploader_id': 'thecjf',
215 'url': 'https://vk.com/video-37468416_456239855',
217 'id': 'k3lz2cmXyRuJQSjGHUv',
219 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
220 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
221 'uploader': 'AniLibria.Tv',
222 'upload_date': '20160914',
223 'uploader_id': 'x1p5vl5',
224 'timestamp': 1473877246,
227 'skip_download': True,
231 # video key is extra_data not url\d+
232 'url': 'http://vk.com/video-110305615_171782105',
233 'md5': 'e13fcda136f99764872e739d13fac1d1',
235 'id': '-110305615_171782105',
237 'title': 'S-Dance, репетиции к The way show',
238 'uploader': 'THE WAY SHOW | 17 апреля',
239 'uploader_id': '-110305615',
240 'timestamp': 1454859345,
241 'upload_date': '20160207',
244 'skip_download': True,
248 # finished live stream, postlive_mp4
249 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
251 'id': '-387766_456242764',
253 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
254 'uploader': 'Игромания',
256 # TODO: use act=show to extract view_count
258 'upload_date': '20160929',
259 'uploader_id': '-387766',
260 'timestamp': 1475137527,
263 'skip_download': True,
267 # live stream, hls and rtmp links, most likely already finished live
268 # stream by the time you are reading this comment
269 'url': 'https://vk.com/video-140332_456239111',
270 'only_matching': True,
273 # removed video, just testing that we match the pattern
274 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
275 'only_matching': True,
278 # age restricted video, requires vk account credentials
279 'url': 'https://vk.com/video205387401_164765225',
280 'only_matching': True,
284 'url': 'https://vk.com/video-76116461_171554880',
285 'only_matching': True,
288 'url': 'http://new.vk.com/video205387401_165548505',
289 'only_matching': True,
292 # This video is no longer available, because its author has been blocked.
293 'url': 'https://vk.com/video-10639516_456240611',
294 'only_matching': True,
297 # The video is not available in your region.
298 'url': 'https://vk.com/video-51812607_171445436',
299 'only_matching': True,
302 'url': 'https://vk.com/clip30014565_456240946',
303 'only_matching': True,
307 def _extract_sibnet_urls(webpage
):
308 # https://help.sibnet.ru/?sibnet_video_embed
309 return [unescapeHTML(mobj
.group('url')) for mobj
in re
.finditer(
310 r
'<iframe\b[^>]+\bsrc=(["\'])(?P
<url
>(?
:https?
:)?
//video\
.sibnet\
.ru
/shell\
.php
\?.*?
\bvideoid
=\d
+.*?
)\
1',
313 def _real_extract(self, url):
314 mobj = self._match_valid_url(url)
315 video_id = mobj.group('videoid
')
320 'act
': 'show_inline
',
323 # Some videos (removed?) can only be downloaded with list id specified
324 list_id = mobj.group('list_id
')
326 data['list'] = list_id
328 payload = self._download_payload('al_video
', video_id, data)
329 info_page = payload[1]
331 mv_data = opts.get('mvData
') or {}
332 player = opts.get('player
') or {}
334 video_id = '%s_%s' % (mobj.group('oid
'), mobj.group('id'))
336 info_page = self._download_webpage(
337 'http
://vk
.com
/video_ext
.php?
' + mobj.group('embed_query
'), video_id)
339 error_message = self._html_search_regex(
340 [r'(?s
)<!><div
[^
>]+class="video_layer_message"[^
>]*>(.+?
)</div
>',
341 r'(?s
)<div
[^
>]+id="video_ext_msg"[^
>]*>(.+?
)</div
>'],
342 info_page, 'error message
', default=None)
344 raise ExtractorError(error_message, expected=True)
346 if re.search(r'<!>/login\
.php
\?.*\bact
=security_check
', info_page):
347 raise ExtractorError(
348 'You are trying to log
in from an unusual location
. You should confirm ownership at vk
.com to log
in with this IP
.',
351 ERROR_COPYRIGHT = 'Video
%s has been removed
from public access due to rightholder complaint
.'
354 r'>Видеозапись
.*? была изъята из публичного доступа в связи с обращением правообладателя
.<':
357 r'>The video
.*? was removed
from public access by request of the copyright holder
.<':
360 r'<!>Please log
in or <':
361 'Video
%s is only available
for registered users
, '
362 'use
--username
and --password options to provide account credentials
.',
365 'Video
%s does
not exist
.',
367 r'<!>Видео временно недоступно
':
368 'Video
%s is temporarily unavailable
.',
371 'Access denied to video
%s.',
373 r'<!>Видеозапись недоступна
, так как её автор был заблокирован
.':
374 'Video
%s is no longer available
, because its author has been blocked
.',
376 r'<!>This video
is no longer available
, because its author has been blocked
.':
377 'Video
%s is no longer available
, because its author has been blocked
.',
379 r'<!>This video
is no longer available
, because it has been deleted
.':
380 'Video
%s is no longer available
, because it has been deleted
.',
382 r'<!>The video
.+?
is not available
in your region
.':
383 'Video
%s is not available
in your region
.',
386 for error_re, error_msg in ERRORS.items():
387 if re.search(error_re, info_page):
388 raise ExtractorError(error_msg % video_id, expected=True)
390 player = self._parse_json(self._search_regex(
391 r'var\s
+playerParams\s
*=\s
*({.+?}
)\s
*;\s
*\n',
392 info_page, 'player params
'), video_id)
394 youtube_url = YoutubeIE._extract_url(info_page)
396 return self.url_result(youtube_url, YoutubeIE.ie_key())
398 vimeo_url = VimeoIE._extract_url(url, info_page)
399 if vimeo_url is not None:
400 return self.url_result(vimeo_url, VimeoIE.ie_key())
402 pladform_url = PladformIE._extract_url(info_page)
404 return self.url_result(pladform_url, PladformIE.ie_key())
406 m_rutube = re.search(
407 r'\ssrc
="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
408 if m_rutube is not None:
409 rutube_url = self._proto_relative_url(
410 m_rutube.group(1).replace('\\', ''))
411 return self.url_result(rutube_url)
413 dailymotion_urls = DailymotionIE._extract_urls(info_page)
415 return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
417 odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
418 if odnoklassniki_url:
419 return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
421 sibnet_urls = self._extract_sibnet_urls(info_page)
423 return self.url_result(sibnet_urls[0])
425 m_opts = re.search(r'(?s
)var\s
+opts\s
*=\s
*({.+?}
);', info_page)
427 m_opts_url = re.search(r"url\s*:\s*'((?
!/\b)[^
']+)", m_opts.group(1))
429 opts_url = m_opts_url.group(1)
430 if opts_url.startswith('//'):
431 opts_url = 'http
:' + opts_url
432 return self.url_result(opts_url)
434 data = player['params
'][0]
435 title = unescapeHTML(data['md_title
'])
438 # 3 = post live (finished live)
439 is_live = data.get('live
') == 2
441 timestamp = unified_timestamp(self._html_search_regex(
442 r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
443 'upload date', default=None)) or int_or_none(data.get('date'))
445 view_count = str_to_int(self._search_regex(
446 r'class=["\']mv_views_count
[^
>]+>\s
*([\d
,.]+)',
447 info_page, 'view count
', default=None))
450 for format_id, format_url in data.items():
451 format_url = url_or_none(format_url)
452 if not format_url or not format_url.startswith(('http
', '//', 'rtmp
')):
454 if (format_id.startswith(('url
', 'cache
'))
455 or format_id in ('extra_data
', 'live_mp4
', 'postlive_mp4
')):
456 height = int_or_none(self._search_regex(
457 r'^
(?
:url|cache
)(\d
+)', format_id, 'height
', default=None))
459 'format_id
': format_id,
463 elif format_id == 'hls
':
464 formats.extend(self._extract_m3u8_formats(
465 format_url, video_id, 'mp4
', 'm3u8_native
',
466 m3u8_id=format_id, fatal=False, live=is_live))
467 elif format_id == 'rtmp
':
469 'format_id
': format_id,
473 self._sort_formats(formats)
476 for sub in data.get('subs
') or {}:
477 subtitles.setdefault(sub.get('lang
', 'en
'), []).append({
478 'ext
': sub.get('title
', '.srt
').split('.')[-1],
479 'url
': url_or_none(sub.get('url
')),
486 'thumbnail
': data.get('jpg
'),
487 'uploader
': data.get('md_author
'),
488 'uploader_id
': str_or_none(data.get('author_id
') or mv_data.get('authorId
')),
489 'duration
': int_or_none(data.get('duration
') or mv_data.get('duration
')),
490 'timestamp
': timestamp,
491 'view_count
': view_count,
492 'like_count
': int_or_none(mv_data.get('likes
')),
493 'comment_count
': int_or_none(mv_data.get('commcount
')),
495 'subtitles
': subtitles,
499 class VKUserVideosIE(VKBaseIE):
500 IE_NAME = 'vk
:uservideos
'
501 IE_DESC = "VK - User's Videos
"
502 _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/@(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
503 _TEMPLATE_URL = 'https://vk.com/videos'
505 'url': 'https://vk.com/video/@mobidevices',
507 'id': '-17892518_all',
509 'playlist_mincount': 1355,
511 'url': 'https://vk.com/video/@mobidevices?section=uploaded',
513 'id': '-17892518_uploaded',
515 'playlist_mincount': 182,
517 _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
519 def _entries(self, page_id, section):
520 video_list_json = self._download_payload('al_video', page_id, {
521 'act': 'load_videos_silent',
526 count = video_list_json['count']
527 total = video_list_json['total']
528 video_list = video_list_json['list']
531 for video in video_list:
532 v = self._VIDEO._make(video[:2])
533 video_id = '%d_%d' % (v.owner_id, v.id)
534 yield self.url_result(
535 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
538 video_list_json = self._download_payload('al_video', page_id, {
539 'act': 'load_videos_silent',
544 count += video_list_json['count']
545 video_list = video_list_json['list']
547 def _real_extract(self, url):
548 u_id, section = self._match_valid_url(url).groups()
549 webpage = self._download_webpage(url, u_id)
550 page_id = self._search_regex(r'data-owner-id\s?=\s?"([^
"]+)"', webpage, 'page_id
')
554 return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section))
557 class VKWallPostIE(VKBaseIE):
558 IE_NAME = 'vk
:wallpost
'
559 _VALID_URL = r'https?
://(?
:(?
:(?
:(?
:m|new
)\
.)?vk\
.com
/(?
:[^?
]+\?.*\bw
=)?
wall(?P
<id>-?\d
+_\d
+)))'
561 # public page URL, audio playlist
562 'url
': 'https
://vk
.com
/bs
.official?w
=wall
-23538238_35',
564 'id': '-23538238_35',
565 'title
': 'Black Shadow
- Wall post
-23538238_35',
566 'description
': 'md5
:3f84b9c4f9ef499731cf1ced9998cc0c
',
569 'md5
': '5ba93864ec5b85f7ce19a9af4af080f6
',
571 'id': '135220665_111806521',
573 'title
': 'Black Shadow
- Слепое Верование
',
575 'uploader
': 'Black Shadow
',
576 'artist
': 'Black Shadow
',
577 'track
': 'Слепое Верование
',
580 'md5
': '4cc7e804579122b17ea95af7834c9233
',
582 'id': '135220665_111802303',
584 'title
': 'Black Shadow
- Война
- Негасимое Бездны Пламя
!',
586 'uploader
': 'Black Shadow
',
587 'artist
': 'Black Shadow
',
588 'track
': 'Война
- Негасимое Бездны Пламя
!',
592 'skip_download
': True,
595 'skip
': 'Requires vk account credentials
',
597 # single YouTube embed, no leading -
598 'url
': 'https
://vk
.com
/wall85155021_6319
',
600 'id': '85155021_6319',
601 'title
': 'Сергей Горбунов
- Wall post
85155021_6319',
607 'skip
': 'Requires vk account credentials
',
610 'url
': 'https
://vk
.com
/wall
-23538238_35',
611 'only_matching
': True,
613 # mobile wall page URL
614 'url
': 'https
://m
.vk
.com
/wall
-23538238_35',
615 'only_matching
': True,
617 _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789
+/='
618 _AUDIO = collections.namedtuple('Audio
', ['id', 'owner_id
', 'url
', 'title
', 'performer
', 'duration
', 'album_id
', 'unk
', 'author_link
', 'lyrics
', 'flags
', 'context
', 'extra
', 'hashes
', 'cover_url
', 'ads
'])
620 def _decode(self, enc):
624 r = self._BASE64_CHARS.index(c)
626 e = 64 * e + r if cond else r
629 dec += chr(255 & e >> (-2 * n & 6))
632 def _unmask_url(self, mask_url, vk_id):
633 if 'audio_api_unavailable
' in mask_url:
634 extra = mask_url.split('?extra
=')[1].split('#')
635 func
, base
= self
._decode
(extra
[1]).split(chr(11))
636 mask_url
= list(self
._decode
(extra
[0]))
637 url_len
= len(mask_url
)
638 indexes
= [None] * url_len
639 index
= int(base
) ^ vk_id
640 for n
in range(url_len
- 1, -1, -1):
641 index
= (url_len
* (n
+ 1) ^ index
+ n
) % url_len
643 for n
in range(1, url_len
):
645 index
= indexes
[url_len
- 1 - n
]
646 mask_url
[n
] = mask_url
[index
]
648 mask_url
= ''.join(mask_url
)
651 def _real_extract(self
, url
):
652 post_id
= self
._match
_id
(url
)
654 webpage
= self
._download
_payload
('wkview', post_id
, {
656 'w': 'wall' + post_id
,
659 description
= clean_html(get_element_by_class('wall_post_text', webpage
))
660 uploader
= clean_html(get_element_by_class('author', webpage
))
664 for audio
in re
.findall(r
'data-audio="([^"]+)', webpage
):
665 audio
= self
._parse
_json
(unescapeHTML(audio
), post_id
)
666 a
= self
._AUDIO
._make
(audio
[:16])
669 title
= unescapeHTML(a
.title
)
670 performer
= unescapeHTML(a
.performer
)
672 'id': '%s_%s' % (a
.owner_id
, a
.id),
673 'url': self
._unmask
_url
(a
.url
, a
.ads
['vk_id']),
674 'title': '%s - %s' % (performer
, title
) if performer
else title
,
675 'thumbnails': [{'url': c_url}
for c_url
in a
.cover_url
.split(',')] if a
.cover_url
else None,
676 'duration': int_or_none(a
.duration
),
677 'uploader': uploader
,
681 'protocol': 'm3u8_native',
684 for video
in re
.finditer(
685 r
'<a[^>]+href=(["\'])(?P
<url
>/video(?
:-?
[\d_
]+).*?
)\
1', webpage):
686 entries.append(self.url_result(
687 compat_urlparse.urljoin(url, video.group('url
')), VKIE.ie_key()))
689 title = 'Wall post
%s' % post_id
691 return self.playlist_result(
692 orderedSet(entries), post_id,
693 '%s - %s' % (uploader, title) if uploader else title,