]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/vshare.py
[extractors] Use new framework for existing embeds (#4307)
[yt-dlp.git] / yt_dlp / extractor / vshare.py
1 from .common import InfoExtractor
2 from ..utils import ExtractorError, decode_packed_codes
3
4
5 class VShareIE(InfoExtractor):
6 _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
7 _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)']
8 _TESTS = [{
9 'url': 'https://vshare.io/d/0f64ce6',
10 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
11 'info_dict': {
12 'id': '0f64ce6',
13 'title': 'vl14062007715967',
14 'ext': 'mp4',
15 }
16 }, {
17 'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
18 'only_matching': True,
19 }]
20
21 def _extract_packed(self, webpage):
22 packed = self._search_regex(
23 r'(eval\(function.+)', webpage, 'packed code')
24 unpacked = decode_packed_codes(packed)
25 digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
26 digits = [int(digit) for digit in digits.split(',')]
27 key_digit = self._search_regex(
28 r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
29 chars = [chr(d - int(key_digit)) for d in digits]
30 return ''.join(chars)
31
32 def _real_extract(self, url):
33 video_id = self._match_id(url)
34
35 webpage = self._download_webpage(
36 'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
37 video_id, headers={'Referer': url})
38
39 title = self._html_extract_title(webpage)
40 title = title.split(' - ')[0]
41
42 error = self._html_search_regex(
43 r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
44 'error', default=None)
45 if error:
46 raise ExtractorError(error, expected=True)
47
48 info = self._parse_html5_media_entries(
49 url, '<video>%s</video>' % self._extract_packed(webpage),
50 video_id)[0]
51
52 self._sort_formats(info['formats'])
53
54 info.update({
55 'id': video_id,
56 'title': title,
57 })
58
59 return info