]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/yandexdisk.py
3 from .common
import InfoExtractor
14 class YandexDiskIE(InfoExtractor
):
15 _VALID_URL
= r
'''(?x)https?://
22 co(?:m(?:\.(?:am|ge|tr))?|\.il)|
32 )/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
35 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
36 'md5': 'a4a8d52958c8fddcf9845935070402ae',
38 'id': 'VdOeDou8eZs6Y',
42 'uploader': 'y.botova',
43 'uploader_id': '300043621',
46 'expected_warnings': ['Unable to download JSON metadata'],
48 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
49 'only_matching': True,
51 'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
52 'only_matching': True,
55 def _real_extract(self
, url
):
56 domain
, video_id
= self
._match
_valid
_url
(url
).groups()
58 webpage
= self
._download
_webpage
(url
, video_id
)
59 store
= self
._parse
_json
(self
._search
_regex
(
60 r
'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
61 webpage
, 'store'), video_id
)
62 resource
= store
['resources'][store
['rootResourceId']]
64 title
= resource
['name']
65 meta
= resource
.get('meta') or {}
67 public_url
= meta
.get('short_url')
69 video_id
= self
._match
_id
(public_url
)
71 source_url
= (self
._download
_json
(
72 'https://cloud-api.yandex.net/v1/disk/public/resources/download',
73 video_id
, query
={'public_key': url}
, fatal
=False) or {}).get('href')
74 video_streams
= resource
.get('videoStreams') or {}
75 video_hash
= resource
.get('hash') or url
76 environment
= store
.get('environment') or {}
77 sk
= environment
.get('sk')
78 yandexuid
= environment
.get('yandexuid')
79 if sk
and yandexuid
and not (source_url
and video_streams
):
80 self
._set
_cookie
(domain
, 'yandexuid', yandexuid
)
83 return (self
._download
_json
(
84 urljoin(url
, '/public/api/') + action
, video_id
, data
=json
.dumps({
87 }).encode(), headers
={
88 'Content-Type': 'text/plain',
89 }, fatal
=False) or {}).get('data') or {}
91 # TODO: figure out how to detect if download limit has
92 # been reached and then avoid unnecessary source format
94 source_url
= call_api('download-url').get('url')
96 video_streams
= call_api('get-video-streams')
102 'format_id': 'source',
103 'ext': determine_ext(title
, meta
.get('ext') or mimetype2ext(meta
.get('mime_type')) or 'mp4'),
105 'filesize': int_or_none(meta
.get('size'))
108 for video
in (video_streams
.get('videos') or []):
109 format_url
= video
.get('url')
112 if video
.get('dimension') == 'adaptive':
113 formats
.extend(self
._extract
_m
3u8_formats
(
114 format_url
, video_id
, 'mp4', 'm3u8_native',
115 m3u8_id
='hls', fatal
=False))
117 size
= video
.get('size') or {}
118 height
= int_or_none(size
.get('height'))
121 format_id
+= '-%dp' % height
124 'format_id': format_id
,
126 'protocol': 'm3u8_native',
128 'width': int_or_none(size
.get('width')),
131 uid
= resource
.get('uid')
132 display_name
= try_get(store
, lambda x
: x
['users'][uid
]['displayName'])
137 'duration': float_or_none(video_streams
.get('duration'), 1000),
138 'uploader': display_name
,
140 'view_count': int_or_none(meta
.get('views_counter')),