jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/yandexdisk.py

Commit	Line	Data
29f7c58a	1	import json
29f7c58a	2
cbbe6663	3	from .common import InfoExtractor
cbbe6663 S	4	from ..utils import (
	5	determine_ext,
	6	float_or_none,
	7	int_or_none,
6aaf96a3	8	join_nonempty,
29f7c58a	9	mimetype2ext,
cbbe6663	10	try_get,
29f7c58a	11	urljoin,
cbbe6663 S	12	)
	13
	14
	15	class YandexDiskIE(InfoExtractor):
29f7c58a	16	_VALID_URL = r'''(?x)https?://
	17	(?P<domain>
	18	yadi\.sk\|
	19	disk\.yandex\.
	20	(?:
	21	az\|
	22	by\|
	23	co(?:m(?:\.(?:am\|ge\|tr))?\|\.il)\|
	24	ee\|
	25	fr\|
	26	k[gz]\|
	27	l[tv]\|
	28	md\|
	29	t[jm]\|
	30	u[az]\|
	31	ru
	32	)
	33	)/(?:[di]/\|public.*?\bhash=)(?P<id>[^/?#&]+)'''
cbbe6663	34
8519b88f	35	_TESTS = [{
cbbe6663	36	'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
29f7c58a	37	'md5': 'a4a8d52958c8fddcf9845935070402ae',
cbbe6663 S	38	'info_dict': {
	39	'id': 'VdOeDou8eZs6Y',
	40	'ext': 'mp4',
	41	'title': '4.mp4',
	42	'duration': 168.6,
	43	'uploader': 'y.botova',
	44	'uploader_id': '300043621',
	45	'view_count': int,
	46	},
29f7c58a	47	'expected_warnings': ['Unable to download JSON metadata'],
8519b88f S	48	}, {
	49	'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
	50	'only_matching': True,
29f7c58a	51	}, {
	52	'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
	53	'only_matching': True,
8519b88f	54	}]
cbbe6663 S	55
cbbe6663 S	56	def _real_extract(self, url):
5ad28e7f	57	domain, video_id = self._match_valid_url(url).groups()
cbbe6663 S	58
cbbe6663 S	59	webpage = self._download_webpage(url, video_id)
29f7c58a	60	store = self._parse_json(self._search_regex(
	61	r'<script[^>]+id="store-prefetch"[^>]>\s({.+?})\s*</script>',
	62	webpage, 'store'), video_id)
	63	resource = store['resources'][store['rootResourceId']]
	64
	65	title = resource['name']
	66	meta = resource.get('meta') or {}
	67
	68	public_url = meta.get('short_url')
	69	if public_url:
	70	video_id = self._match_id(public_url)
	71
	72	source_url = (self._download_json(
	73	'https://cloud-api.yandex.net/v1/disk/public/resources/download',
	74	video_id, query={'public_key': url}, fatal=False) or {}).get('href')
	75	video_streams = resource.get('videoStreams') or {}
	76	video_hash = resource.get('hash') or url
	77	environment = store.get('environment') or {}
	78	sk = environment.get('sk')
	79	yandexuid = environment.get('yandexuid')
	80	if sk and yandexuid and not (source_url and video_streams):
	81	self._set_cookie(domain, 'yandexuid', yandexuid)
	82
	83	def call_api(action):
	84	return (self._download_json(
	85	urljoin(url, '/public/api/') + action, video_id, data=json.dumps({
	86	'hash': video_hash,
	87	'sk': sk,
	88	}).encode(), headers={
	89	'Content-Type': 'text/plain',
	90	}, fatal=False) or {}).get('data') or {}
	91	if not source_url:
	92	# TODO: figure out how to detect if download limit has
	93	# been reached and then avoid unnecessary source format
	94	# extraction requests
	95	source_url = call_api('download-url').get('url')
	96	if not video_streams:
	97	video_streams = call_api('get-video-streams')
cbbe6663 S	98
	99	formats = []
	100	if source_url:
	101	formats.append({
	102	'url': source_url,
	103	'format_id': 'source',
29f7c58a	104	'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),
cbbe6663	105	'quality': 1,
add96eb9	106	'filesize': int_or_none(meta.get('size')),
cbbe6663	107	})
29f7c58a	108
29f7c58a	109	for video in (video_streams.get('videos') or []):
cbbe6663 S	110	format_url = video.get('url')
	111	if not format_url:
	112	continue
29f7c58a	113	if video.get('dimension') == 'adaptive':
cbbe6663	114	formats.extend(self._extract_m3u8_formats(
29f7c58a	115	format_url, video_id, 'mp4', 'm3u8_native',
cbbe6663 S	116	m3u8_id='hls', fatal=False))
cbbe6663 S	117	else:
29f7c58a	118	size = video.get('size') or {}
29f7c58a	119	height = int_or_none(size.get('height'))
cbbe6663	120	formats.append({
29f7c58a	121	'ext': 'mp4',
6aaf96a3	122	'format_id': join_nonempty('hls', height and f'{height}p'),
29f7c58a	123	'height': height,
29f7c58a	124	'protocol': 'm3u8_native',
cbbe6663	125	'url': format_url,
29f7c58a	126	'width': int_or_none(size.get('width')),
cbbe6663	127	})
cbbe6663	128
29f7c58a	129	uid = resource.get('uid')
29f7c58a	130	display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
cbbe6663 S	131
	132	return {
	133	'id': video_id,
	134	'title': title,
29f7c58a	135	'duration': float_or_none(video_streams.get('duration'), 1000),
	136	'uploader': display_name,
	137	'uploader_id': uid,
	138	'view_count': int_or_none(meta.get('views_counter')),
cbbe6663 S	139	'formats': formats,
cbbe6663 S	140	}