jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import collections
	2	import hashlib
	3	import re
	4
	5	from .common import InfoExtractor
	6	from .dailymotion import DailymotionIE
	7	from .odnoklassniki import OdnoklassnikiIE
	8	from .pladform import PladformIE
	9	from .sibnet import SibnetEmbedIE
	10	from .vimeo import VimeoIE
	11	from .youtube import YoutubeIE
	12	from ..utils import (
	13	ExtractorError,
	14	UserNotLive,
	15	clean_html,
	16	get_element_by_class,
	17	get_element_html_by_id,
	18	int_or_none,
	19	join_nonempty,
	20	parse_resolution,
	21	str_or_none,
	22	str_to_int,
	23	try_call,
	24	unescapeHTML,
	25	unified_timestamp,
	26	update_url_query,
	27	url_or_none,
	28	urlencode_postdata,
	29	urljoin,
	30	traverse_obj,
	31	)
	32
	33
	34	class VKBaseIE(InfoExtractor):
	35	_NETRC_MACHINE = 'vk'
	36
	37	def _download_webpage_handle(self, url_or_request, video_id, args, fatal=True, *kwargs):
	38	response = super()._download_webpage_handle(url_or_request, video_id, args, fatal=fatal, *kwargs)
	39	challenge_url, cookie = response[1].geturl() if response else '', None
	40	if challenge_url.startswith('https://vk.com/429.html?'):
	41	cookie = self._get_cookies(challenge_url).get('hash429')
	42	if not cookie:
	43	return response
	44
	45	hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
	46	self._request_webpage(
	47	update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
	48	note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
	49	return super()._download_webpage_handle(url_or_request, video_id, args, fatal=True, *kwargs)
	50
	51	def _perform_login(self, username, password):
	52	login_page, url_handle = self._download_webpage_handle(
	53	'https://vk.com', None, 'Downloading login page')
	54
	55	login_form = self._hidden_inputs(login_page)
	56
	57	login_form.update({
	58	'email': username.encode('cp1251'),
	59	'pass': password.encode('cp1251'),
	60	})
	61
	62	# vk serves two same remixlhk cookies in Set-Cookie header and expects
	63	# first one to be actually set
	64	self._apply_first_set_cookie_header(url_handle, 'remixlhk')
	65
	66	login_page = self._download_webpage(
	67	'https://vk.com/login', None,
	68	note='Logging in',
	69	data=urlencode_postdata(login_form))
	70
	71	if re.search(r'onLoginFailed', login_page):
	72	raise ExtractorError(
	73	'Unable to login, incorrect username and/or password', expected=True)
	74
	75	def _download_payload(self, path, video_id, data, fatal=True):
	76	endpoint = f'https://vk.com/{path}.php'
	77	data['al'] = 1
	78	code, payload = self._download_json(
	79	endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
	80	headers={
	81	'Referer': endpoint,
	82	'X-Requested-With': 'XMLHttpRequest',
	83	})['payload']
	84	if code == '3':
	85	self.raise_login_required()
	86	elif code == '8':
	87	raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
	88	return payload
	89
	90
	91	class VKIE(VKBaseIE):
	92	IE_NAME = 'vk'
	93	IE_DESC = 'VK'
	94	_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1']
	95	_VALID_URL = r'''(?x)
	96	https?://
	97	(?:
	98	(?:
	99	(?:(?:m\|new)\.)?vk\.com/video_\|
	100	(?:www\.)?daxab.com/
	101	)
	102	ext\.php\?(?P<embed_query>.?\boid=(?P<oid>-?\d+).?\bid=(?P<id>\d+).*)\|
	103	(?:
	104	(?:(?:m\|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video\|clip)\|
	105	(?:www\.)?daxab.com/embed/
	106	)
	107	(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)\|(ln-[\da-zA-Z]+)))?
	108	)
	109	'''
	110
	111	_TESTS = [
	112	{
	113	'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
	114	'info_dict': {
	115	'id': '-77521_162222515',
	116	'ext': 'mp4',
	117	'title': 'ProtivoGunz - Хуёвая песня',
	118	'uploader': 're:(?:Noize MC\|Alexander Ilyashenko).*',
	119	'uploader_id': '39545378',
	120	'duration': 195,
	121	'timestamp': 1329049880,
	122	'upload_date': '20120212',
	123	'comment_count': int,
	124	'like_count': int,
	125	'thumbnail': r're:https?://.+(?:\.jpg\|getVideoPreview.*)$',
	126	},
	127	'params': {'skip_download': 'm3u8'},
	128	},
	129	{
	130	'url': 'http://vk.com/video205387401_165548505',
	131	'info_dict': {
	132	'id': '205387401_165548505',
	133	'ext': 'mp4',
	134	'title': 'No name',
	135	'uploader': 'Tom Cruise',
	136	'uploader_id': '205387401',
	137	'duration': 9,
	138	'timestamp': 1374364108,
	139	'upload_date': '20130720',
	140	'comment_count': int,
	141	'like_count': int,
	142	'thumbnail': r're:https?://.+(?:\.jpg\|getVideoPreview.*)$',
	143	}
	144	},
	145	{
	146	'note': 'Embedded video',
	147	'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
	148	'info_dict': {
	149	'id': '-77521_162222515',
	150	'ext': 'mp4',
	151	'uploader': 're:(?:Noize MC\|Alexander Ilyashenko).*',
	152	'title': 'ProtivoGunz - Хуёвая песня',
	153	'duration': 195,
	154	'upload_date': '20120212',
	155	'timestamp': 1329049880,
	156	'uploader_id': '39545378',
	157	'thumbnail': r're:https?://.+(?:\.jpg\|getVideoPreview.*)$',
	158	},
	159	'params': {'skip_download': 'm3u8'},
	160	},
	161	{
	162	'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
	163	'info_dict': {
	164	'id': '-93049196_456239755',
	165	'ext': 'mp4',
	166	'title': '8 серия (озвучка)',
	167	'duration': 8383,
	168	'comment_count': int,
	169	'uploader': 'Dizi2021',
	170	'like_count': int,
	171	'timestamp': 1640162189,
	172	'upload_date': '20211222',
	173	'uploader_id': '-93049196',
	174	'thumbnail': r're:https?://.+(?:\.jpg\|getVideoPreview.*)$',
	175	},
	176	},
	177	{
	178	'note': 'youtube embed',
	179	'url': 'https://vk.com/video276849682_170681728',
	180	'info_dict': {
	181	'id': 'V3K4mi0SYkc',
	182	'ext': 'mp4',
	183	'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
	184	'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
	185	'duration': 178,
	186	'upload_date': '20130117',
	187	'uploader': "Children's Joy Foundation Inc.",
	188	'uploader_id': 'thecjf',
	189	'view_count': int,
	190	'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
	191	'availability': 'public',
	192	'like_count': int,
	193	'live_status': 'not_live',
	194	'playable_in_embed': True,
	195	'channel': 'Children\'s Joy Foundation Inc.',
	196	'uploader_url': 'http://www.youtube.com/user/thecjf',
	197	'thumbnail': r're:https?://.+\.jpg$',
	198	'tags': 'count:27',
	199	'start_time': 0.0,
	200	'categories': ['Nonprofits & Activism'],
	201	'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
	202	'channel_follower_count': int,
	203	'age_limit': 0,
	204	},
	205	},
	206	{
	207	'note': 'dailymotion embed',
	208	'url': 'https://vk.com/video-95168827_456239103?list=cca524a0f0d5557e16',
	209	'info_dict': {
	210	'id': 'x8gfli0',
	211	'ext': 'mp4',
	212	'title': 'md5:45410f60ccd4b2760da98cb5fc777d70',
	213	'description': 'md5:2e71c5c9413735cfa06cf1a166f16c84',
	214	'uploader': 'Movies and cinema.',
	215	'upload_date': '20221218',
	216	'uploader_id': 'x1jdavv',
	217	'timestamp': 1671387617,
	218	'age_limit': 0,
	219	'duration': 2918,
	220	'like_count': int,
	221	'view_count': int,
	222	'thumbnail': r're:https?://.+x1080$',
	223	'tags': list
	224	},
	225	},
	226	{
	227	'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211',
	228	'info_dict': {
	229	'id': '-74006511_456247211',
	230	'ext': 'mp4',
	231	'comment_count': int,
	232	'duration': 9,
	233	'like_count': int,
	234	'thumbnail': r're:https?://.+(?:\.jpg\|getVideoPreview.*)$',
	235	'timestamp': 1664995597,
	236	'title': 'Clip by @madempress',
	237	'upload_date': '20221005',
	238	'uploader': 'Шальная императрица',
	239	'uploader_id': '-74006511',
	240	},
	241	},
	242	{
	243	# video key is extra_data not url\d+
	244	'url': 'http://vk.com/video-110305615_171782105',
	245	'md5': 'e13fcda136f99764872e739d13fac1d1',
	246	'info_dict': {
	247	'id': '-110305615_171782105',
	248	'ext': 'mp4',
	249	'title': 'S-Dance, репетиции к The way show',
	250	'uploader': 'THE WAY SHOW \| 17 апреля',
	251	'uploader_id': '-110305615',
	252	'timestamp': 1454859345,
	253	'upload_date': '20160207',
	254	},
	255	'skip': 'Removed',
	256	},
	257	{
	258	'note': 'finished live stream, postlive_mp4',
	259	'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
	260	'info_dict': {
	261	'id': '-387766_456242764',
	262	'ext': 'mp4',
	263	'title': 'ИгроМир 2016 День 1 — Игромания Утром',
	264	'uploader': 'Игромания',
	265	'duration': 5239,
	266	'upload_date': '20160929',
	267	'uploader_id': '-387766',
	268	'timestamp': 1475137527,
	269	'thumbnail': r're:https?://.+\.jpg$',
	270	'comment_count': int,
	271	'like_count': int,
	272	},
	273	'params': {
	274	'skip_download': True,
	275	},
	276	},
	277	{
	278	# live stream, hls and rtmp links, most likely already finished live
	279	# stream by the time you are reading this comment
	280	'url': 'https://vk.com/video-140332_456239111',
	281	'only_matching': True,
	282	},
	283	{
	284	# removed video, just testing that we match the pattern
	285	'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
	286	'only_matching': True,
	287	},
	288	{
	289	# age restricted video, requires vk account credentials
	290	'url': 'https://vk.com/video205387401_164765225',
	291	'only_matching': True,
	292	},
	293	{
	294	# pladform embed
	295	'url': 'https://vk.com/video-76116461_171554880',
	296	'only_matching': True,
	297	},
	298	{
	299	'url': 'http://new.vk.com/video205387401_165548505',
	300	'only_matching': True,
	301	},
	302	{
	303	# This video is no longer available, because its author has been blocked.
	304	'url': 'https://vk.com/video-10639516_456240611',
	305	'only_matching': True,
	306	},
	307	{
	308	# The video is not available in your region.
	309	'url': 'https://vk.com/video-51812607_171445436',
	310	'only_matching': True,
	311	},
	312	{
	313	'url': 'https://vk.com/clip30014565_456240946',
	314	'only_matching': True,
	315	}]
	316
	317	def _real_extract(self, url):
	318	mobj = self._match_valid_url(url)
	319	video_id = mobj.group('videoid')
	320
	321	mv_data = {}
	322	if video_id:
	323	data = {
	324	'act': 'show',
	325	'video': video_id,
	326	}
	327	# Some videos (removed?) can only be downloaded with list id specified
	328	list_id = mobj.group('list_id')
	329	if list_id:
	330	data['list'] = list_id
	331
	332	payload = self._download_payload('al_video', video_id, data)
	333	info_page = payload[1]
	334	opts = payload[-1]
	335	mv_data = opts.get('mvData') or {}
	336	player = opts.get('player') or {}
	337	else:
	338	video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
	339
	340	info_page = self._download_webpage(
	341	'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
	342
	343	error_message = self._html_search_regex(
	344	[r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
	345	r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
	346	info_page, 'error message', default=None)
	347	if error_message:
	348	raise ExtractorError(error_message, expected=True)
	349
	350	if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
	351	raise ExtractorError(
	352	'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
	353	expected=True)
	354
	355	ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
	356
	357	ERRORS = {
	358	r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
	359	ERROR_COPYRIGHT,
	360
	361	r'>The video .*? was removed from public access by request of the copyright holder.<':
	362	ERROR_COPYRIGHT,
	363
	364	r'<!>Please log in or <':
	365	'Video %s is only available for registered users, '
	366	'use --username and --password options to provide account credentials.',
	367
	368	r'<!>Unknown error':
	369	'Video %s does not exist.',
	370
	371	r'<!>Видео временно недоступно':
	372	'Video %s is temporarily unavailable.',
	373
	374	r'<!>Access denied':
	375	'Access denied to video %s.',
	376
	377	r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
	378	'Video %s is no longer available, because its author has been blocked.',
	379
	380	r'<!>This video is no longer available, because its author has been blocked.':
	381	'Video %s is no longer available, because its author has been blocked.',
	382
	383	r'<!>This video is no longer available, because it has been deleted.':
	384	'Video %s is no longer available, because it has been deleted.',
	385
	386	r'<!>The video .+? is not available in your region.':
	387	'Video %s is not available in your region.',
	388	}
	389
	390	for error_re, error_msg in ERRORS.items():
	391	if re.search(error_re, info_page):
	392	raise ExtractorError(error_msg % video_id, expected=True)
	393
	394	player = self._parse_json(self._search_regex(
	395	r'var\s+playerParams\s=\s({.+?})\s;\s\n',
	396	info_page, 'player params'), video_id)
	397
	398	youtube_url = YoutubeIE._extract_url(info_page)
	399	if youtube_url:
	400	return self.url_result(youtube_url, YoutubeIE.ie_key())
	401
	402	vimeo_url = VimeoIE._extract_url(url, info_page)
	403	if vimeo_url is not None:
	404	return self.url_result(vimeo_url, VimeoIE.ie_key())
	405
	406	pladform_url = PladformIE._extract_url(info_page)
	407	if pladform_url:
	408	return self.url_result(pladform_url, PladformIE.ie_key())
	409
	410	m_rutube = re.search(
	411	r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video\|play)\\?/embed(?:.*?))\\?"', info_page)
	412	if m_rutube is not None:
	413	rutube_url = self._proto_relative_url(
	414	m_rutube.group(1).replace('\\', ''))
	415	return self.url_result(rutube_url)
	416
	417	dailymotion_url = next(DailymotionIE._extract_embed_urls(url, info_page), None)
	418	if dailymotion_url:
	419	return self.url_result(dailymotion_url, DailymotionIE.ie_key())
	420
	421	odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
	422	if odnoklassniki_url:
	423	return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
	424
	425	sibnet_url = next(SibnetEmbedIE._extract_embed_urls(url, info_page), None)
	426	if sibnet_url:
	427	return self.url_result(sibnet_url)
	428
	429	m_opts = re.search(r'(?s)var\s+opts\s=\s({.+?});', info_page)
	430	if m_opts:
	431	m_opts_url = re.search(r"url\s:\s'((?!/\b)[^']+)", m_opts.group(1))
	432	if m_opts_url:
	433	opts_url = m_opts_url.group(1)
	434	if opts_url.startswith('//'):
	435	opts_url = 'http:' + opts_url
	436	return self.url_result(opts_url)
	437
	438	data = player['params'][0]
	439	title = unescapeHTML(data['md_title'])
	440
	441	# 2 = live
	442	# 3 = post live (finished live)
	443	is_live = data.get('live') == 2
	444
	445	timestamp = unified_timestamp(self._html_search_regex(
	446	r'class=["\']mv_info_date[^>]+>([^<]+)(?:<\|from)', info_page,
	447	'upload date', default=None)) or int_or_none(data.get('date'))
	448
	449	view_count = str_to_int(self._search_regex(
	450	r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
	451	info_page, 'view count', default=None))
	452
	453	formats = []
	454	for format_id, format_url in data.items():
	455	format_url = url_or_none(format_url)
	456	if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
	457	continue
	458	if (format_id.startswith(('url', 'cache'))
	459	or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
	460	height = int_or_none(self._search_regex(
	461	r'^(?:url\|cache)(\d+)', format_id, 'height', default=None))
	462	formats.append({
	463	'format_id': format_id,
	464	'url': format_url,
	465	'height': height,
	466	})
	467	elif format_id == 'hls':
	468	formats.extend(self._extract_m3u8_formats(
	469	format_url, video_id, 'mp4', 'm3u8_native',
	470	m3u8_id=format_id, fatal=False, live=is_live))
	471	elif format_id == 'rtmp':
	472	formats.append({
	473	'format_id': format_id,
	474	'url': format_url,
	475	'ext': 'flv',
	476	})
	477
	478	subtitles = {}
	479	for sub in data.get('subs') or {}:
	480	subtitles.setdefault(sub.get('lang', 'en'), []).append({
	481	'ext': sub.get('title', '.srt').split('.')[-1],
	482	'url': url_or_none(sub.get('url')),
	483	})
	484
	485	return {
	486	'id': video_id,
	487	'formats': formats,
	488	'title': title,
	489	'thumbnail': data.get('jpg'),
	490	'uploader': data.get('md_author'),
	491	'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
	492	'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
	493	'timestamp': timestamp,
	494	'view_count': view_count,
	495	'like_count': int_or_none(mv_data.get('likes')),
	496	'comment_count': int_or_none(mv_data.get('commcount')),
	497	'is_live': is_live,
	498	'subtitles': subtitles,
	499	}
	500
	501
	502	class VKUserVideosIE(VKBaseIE):
	503	IE_NAME = 'vk:uservideos'
	504	IE_DESC = "VK - User's Videos"
	505	_VALID_URL = r'https?://(?:(?:m\|new)\.)?vk\.com/video/(?:playlist/)?(?P<id>[^?$#/&]+)(?!\?.\bz=video)(?:[/?#&](?:.?\bsection=(?P<section>\w+))?\|$)'
	506	_TEMPLATE_URL = 'https://vk.com/videos'
	507	_TESTS = [{
	508	'url': 'https://vk.com/video/@mobidevices',
	509	'info_dict': {
	510	'id': '-17892518_all',
	511	},
	512	'playlist_mincount': 1355,
	513	}, {
	514	'url': 'https://vk.com/video/@mobidevices?section=uploaded',
	515	'info_dict': {
	516	'id': '-17892518_uploaded',
	517	},
	518	'playlist_mincount': 182,
	519	}, {
	520	'url': 'https://vk.com/video/playlist/-174476437_2',
	521	'info_dict': {
	522	'id': '-174476437_playlist_2',
	523	'title': 'Анонсы'
	524	},
	525	'playlist_mincount': 108,
	526	}]
	527	_VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
	528
	529	def _entries(self, page_id, section):
	530	video_list_json = self._download_payload('al_video', page_id, {
	531	'act': 'load_videos_silent',
	532	'offset': 0,
	533	'oid': page_id,
	534	'section': section,
	535	})[0][section]
	536	count = video_list_json['count']
	537	total = video_list_json['total']
	538	video_list = video_list_json['list']
	539
	540	while True:
	541	for video in video_list:
	542	v = self._VIDEO._make(video[:2])
	543	video_id = '%d_%d' % (v.owner_id, v.id)
	544	yield self.url_result(
	545	'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
	546	if count >= total:
	547	break
	548	video_list_json = self._download_payload('al_video', page_id, {
	549	'act': 'load_videos_silent',
	550	'offset': count,
	551	'oid': page_id,
	552	'section': section,
	553	})[0][section]
	554	count += video_list_json['count']
	555	video_list = video_list_json['list']
	556
	557	def _real_extract(self, url):
	558	u_id, section = self._match_valid_url(url).groups()
	559	webpage = self._download_webpage(url, u_id)
	560
	561	if u_id.startswith('@'):
	562	page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
	563	elif '_' in u_id:
	564	page_id, section = u_id.split('_', 1)
	565	section = f'playlist_{section}'
	566	else:
	567	raise ExtractorError('Invalid URL', expected=True)
	568
	569	if not section:
	570	section = 'all'
	571
	572	playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
	573	return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section), playlist_title)
	574
	575
	576	class VKWallPostIE(VKBaseIE):
	577	IE_NAME = 'vk:wallpost'
	578	_VALID_URL = r'https?://(?:(?:(?:(?:m\|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
	579	_TESTS = [{
	580	# public page URL, audio playlist
	581	'url': 'https://vk.com/bs.official?w=wall-23538238_35',
	582	'info_dict': {
	583	'id': '-23538238_35',
	584	'title': 'Black Shadow - Wall post -23538238_35',
	585	'description': 'md5:190c78f905a53e0de793d83933c6e67f',
	586	},
	587	'playlist': [{
	588	'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
	589	'info_dict': {
	590	'id': '135220665_111806521',
	591	'ext': 'm4a',
	592	'title': 'Black Shadow - Слепое Верование',
	593	'duration': 370,
	594	'uploader': 'Black Shadow',
	595	'artist': 'Black Shadow',
	596	'track': 'Слепое Верование',
	597	},
	598	}, {
	599	'md5': '4cc7e804579122b17ea95af7834c9233',
	600	'info_dict': {
	601	'id': '135220665_111802303',
	602	'ext': 'm4a',
	603	'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
	604	'duration': 423,
	605	'uploader': 'Black Shadow',
	606	'artist': 'Black Shadow',
	607	'track': 'Война - Негасимое Бездны Пламя!',
	608	},
	609	}],
	610	'params': {
	611	'skip_download': True,
	612	},
	613	}, {
	614	# single YouTube embed with irrelevant reaction videos
	615	'url': 'https://vk.com/wall-32370614_7173954',
	616	'info_dict': {
	617	'id': '-32370614_7173954',
	618	'title': 'md5:9f93c405bbc00061d34007d78c75e3bc',
	619	'description': 'md5:953b811f26fa9f21ee5856e2ea8e68fc',
	620	},
	621	'playlist_count': 1,
	622	}, {
	623	# wall page URL
	624	'url': 'https://vk.com/wall-23538238_35',
	625	'only_matching': True,
	626	}, {
	627	# mobile wall page URL
	628	'url': 'https://m.vk.com/wall-23538238_35',
	629	'only_matching': True,
	630	}]
	631	_BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
	632	_AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
	633
	634	def _decode(self, enc):
	635	dec = ''
	636	e = n = 0
	637	for c in enc:
	638	r = self._BASE64_CHARS.index(c)
	639	cond = n % 4
	640	e = 64 * e + r if cond else r
	641	n += 1
	642	if cond:
	643	dec += chr(255 & e >> (-2 * n & 6))
	644	return dec
	645
	646	def _unmask_url(self, mask_url, vk_id):
	647	if 'audio_api_unavailable' in mask_url:
	648	extra = mask_url.split('?extra=')[1].split('#')
	649	func, base = self._decode(extra[1]).split(chr(11))
	650	mask_url = list(self._decode(extra[0]))
	651	url_len = len(mask_url)
	652	indexes = [None] * url_len
	653	index = int(base) ^ vk_id
	654	for n in range(url_len - 1, -1, -1):
	655	index = (url_len * (n + 1) ^ index + n) % url_len
	656	indexes[n] = index
	657	for n in range(1, url_len):
	658	c = mask_url[n]
	659	index = indexes[url_len - 1 - n]
	660	mask_url[n] = mask_url[index]
	661	mask_url[index] = c
	662	mask_url = ''.join(mask_url)
	663	return mask_url
	664
	665	def _real_extract(self, url):
	666	post_id = self._match_id(url)
	667
	668	webpage = self._download_payload('wkview', post_id, {
	669	'act': 'show',
	670	'w': 'wall' + post_id,
	671	})[1]
	672
	673	uploader = clean_html(get_element_by_class('PostHeaderTitle__authorName', webpage))
	674
	675	entries = []
	676
	677	for audio in re.findall(r'data-audio="([^"]+)', webpage):
	678	audio = self._parse_json(unescapeHTML(audio), post_id)
	679	if not audio['url']:
	680	continue
	681	title = unescapeHTML(audio.get('title'))
	682	artist = unescapeHTML(audio.get('artist'))
	683	entries.append({
	684	'id': f'{audio["owner_id"]}_{audio["id"]}',
	685	'title': join_nonempty(artist, title, delim=' - '),
	686	'thumbnails': try_call(lambda: [{'url': u} for u in audio['coverUrl'].split(',')]),
	687	'duration': int_or_none(audio.get('duration')),
	688	'uploader': uploader,
	689	'artist': artist,
	690	'track': title,
	691	'formats': [{
	692	'url': audio['url'],
	693	'ext': 'm4a',
	694	'vcodec': 'none',
	695	'acodec': 'mp3',
	696	'container': 'm4a_dash',
	697	}],
	698	})
	699
	700	entries.extend(self.url_result(urljoin(url, entry), VKIE) for entry in set(re.findall(
	701	r'<a[^>]+href=(?:["\'])(/video(?:-?[\d_]+)[^"\']*)',
	702	get_element_html_by_id('wl_post_body', webpage))))
	703
	704	return self.playlist_result(
	705	entries, post_id, join_nonempty(uploader, f'Wall post {post_id}', delim=' - '),
	706	clean_html(get_element_by_class('wall_post_text', webpage)))
	707
	708
	709	class VKPlayBaseIE(InfoExtractor):
	710	_RESOLUTIONS = {
	711	'tiny': '256x144',
	712	'lowest': '426x240',
	713	'low': '640x360',
	714	'medium': '852x480',
	715	'high': '1280x720',
	716	'full_hd': '1920x1080',
	717	'quad_hd': '2560x1440',
	718	}
	719
	720	def _extract_from_initial_state(self, url, video_id, path):
	721	webpage = self._download_webpage(url, video_id)
	722	video_info = traverse_obj(self._search_json(
	723	r'<script[^>]+\bid="initial-state"[^>]*>', webpage, 'initial state', video_id),
	724	path, expected_type=dict)
	725	if not video_info:
	726	raise ExtractorError('Unable to extract video info from html inline initial state')
	727	return video_info
	728
	729	def _extract_formats(self, stream_info, video_id):
	730	formats = []
	731	for stream in traverse_obj(stream_info, (
	732	'data', 0, 'playerUrls', lambda _, v: url_or_none(v['url']) and v['type'])):
	733	url = stream['url']
	734	format_id = str_or_none(stream['type'])
	735	if format_id in ('hls', 'live_hls', 'live_playback_hls') or '.m3u8' in url:
	736	formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id=format_id, fatal=False))
	737	elif format_id == 'dash':
	738	formats.extend(self._extract_mpd_formats(url, video_id, mpd_id=format_id, fatal=False))
	739	elif format_id in ('live_dash', 'live_playback_dash'):
	740	self.write_debug(f'Not extracting unsupported format "{format_id}"')
	741	else:
	742	formats.append({
	743	'url': url,
	744	'ext': 'mp4',
	745	'format_id': format_id,
	746	**parse_resolution(self._RESOLUTIONS.get(format_id)),
	747	})
	748	return formats
	749
	750	def _extract_common_meta(self, stream_info):
	751	return traverse_obj(stream_info, {
	752	'id': ('id', {str_or_none}),
	753	'title': ('title', {str}),
	754	'release_timestamp': ('startTime', {int_or_none}),
	755	'thumbnail': ('previewUrl', {url_or_none}),
	756	'view_count': ('count', 'views', {int_or_none}),
	757	'like_count': ('count', 'likes', {int_or_none}),
	758	'categories': ('category', 'title', {str}, {lambda x: [x] if x else None}),
	759	'uploader': (('user', ('blog', 'owner')), 'nick', {str}),
	760	'uploader_id': (('user', ('blog', 'owner')), 'id', {str_or_none}),
	761	'duration': ('duration', {int_or_none}),
	762	'is_live': ('isOnline', {bool}),
	763	'concurrent_view_count': ('count', 'viewers', {int_or_none}),
	764	}, get_all=False)
	765
	766
	767	class VKPlayIE(VKPlayBaseIE):
	768	_VALID_URL = r'https?://vkplay\.live/(?P<username>[^/]+)/record/(?P<id>[a-f0-9\-]+)'
	769	_TESTS = [{
	770	'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
	771	'info_dict': {
	772	'id': 'f5e6e3b5-dc52-4d14-965d-0680dd2882da',
	773	'ext': 'mp4',
	774	'title': 'Atomic Heart (пробуем!) спасибо подписчику EKZO!',
	775	'uploader': 'ZitsmanN',
	776	'uploader_id': '13159830',
	777	'release_timestamp': 1683461378,
	778	'release_date': '20230507',
	779	'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+',
	780	'duration': 10608,
	781	'view_count': int,
	782	'like_count': int,
	783	'categories': ['Atomic Heart'],
	784	},
	785	'params': {'skip_download': 'm3u8'},
	786	}]
	787
	788	def _real_extract(self, url):
	789	username, video_id = self._match_valid_url(url).groups()
	790
	791	record_info = traverse_obj(self._download_json(
	792	f'https://api.vkplay.live/v1/blog/{username}/public_video_stream/record/{video_id}', video_id, fatal=False),
	793	('data', 'record', {dict}))
	794	if not record_info:
	795	record_info = self._extract_from_initial_state(url, video_id, ('record', 'currentRecord', 'data'))
	796
	797	return {
	798	**self._extract_common_meta(record_info),
	799	'id': video_id,
	800	'formats': self._extract_formats(record_info, video_id),
	801	}
	802
	803
	804	class VKPlayLiveIE(VKPlayBaseIE):
	805	_VALID_URL = r'https?://vkplay\.live/(?P<id>[^/]+)/?(?:[#?]\|$)'
	806	_TESTS = [{
	807	'url': 'https://vkplay.live/bayda',
	808	'info_dict': {
	809	'id': 'f02c321e-427b-408d-b12f-ae34e53e0ea2',
	810	'ext': 'mp4',
	811	'title': r're:эскапизм крута .*',
	812	'uploader': 'Bayda',
	813	'uploader_id': 12279401,
	814	'release_timestamp': 1687209962,
	815	'release_date': '20230619',
	816	'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+',
	817	'view_count': int,
	818	'concurrent_view_count': int,
	819	'like_count': int,
	820	'categories': ['EVE Online'],
	821	'live_status': 'is_live',
	822	},
	823	'skip': 'livestream',
	824	'params': {'skip_download': True},
	825	}]
	826
	827	def _real_extract(self, url):
	828	username = self._match_id(url)
	829
	830	stream_info = self._download_json(
	831	f'https://api.vkplay.live/v1/blog/{username}/public_video_stream', username, fatal=False)
	832	if not stream_info:
	833	stream_info = self._extract_from_initial_state(url, username, ('stream', 'stream', 'data', 'stream'))
	834
	835	formats = self._extract_formats(stream_info, username)
	836	if not formats and not traverse_obj(stream_info, ('isOnline', {bool})):
	837	raise UserNotLive(video_id=username)
	838
	839	return {
	840	**self._extract_common_meta(stream_info),
	841	'formats': formats,
	842	}