]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vshare.py
[youtube] De-prioritize auto-generated thumbnails
[yt-dlp.git] / yt_dlp / extractor / vshare.py
CommitLineData
0987f2dd
T
1import re
2
2ab0bfcd 3from .common import InfoExtractor
0987f2dd 4from ..compat import compat_chr
ff31f2d5
S
5from ..utils import (
6 decode_packed_codes,
7 ExtractorError,
8)
2ab0bfcd
S
9
10
11class VShareIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
13 _TESTS = [{
14 'url': 'https://vshare.io/d/0f64ce6',
0987f2dd 15 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
2ab0bfcd
S
16 'info_dict': {
17 'id': '0f64ce6',
18 'title': 'vl14062007715967',
19 'ext': 'mp4',
20 }
21 }, {
22 'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
23 'only_matching': True,
24 }]
25
a2b6aba8
S
26 @staticmethod
27 def _extract_urls(webpage):
28 return re.findall(
29 r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
30 webpage)
31
0987f2dd 32 def _extract_packed(self, webpage):
a2b6aba8
S
33 packed = self._search_regex(
34 r'(eval\(function.+)', webpage, 'packed code')
0987f2dd
T
35 unpacked = decode_packed_codes(packed)
36 digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
a2b6aba8
S
37 digits = [int(digit) for digit in digits.split(',')]
38 key_digit = self._search_regex(
39 r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
0987f2dd
T
40 chars = [compat_chr(d - int(key_digit)) for d in digits]
41 return ''.join(chars)
42
2ab0bfcd
S
43 def _real_extract(self, url):
44 video_id = self._match_id(url)
45
46 webpage = self._download_webpage(
a2b6aba8 47 'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
794c1b6e 48 video_id, headers={'Referer': url})
2ab0bfcd 49
04f3fd2c 50 title = self._html_extract_title(webpage)
0987f2dd 51 title = title.split(' - ')[0]
2ab0bfcd 52
ff31f2d5
S
53 error = self._html_search_regex(
54 r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
55 'error', default=None)
56 if error:
57 raise ExtractorError(error, expected=True)
58
a2b6aba8
S
59 info = self._parse_html5_media_entries(
60 url, '<video>%s</video>' % self._extract_packed(webpage),
61 video_id)[0]
62
63 self._sort_formats(info['formats'])
64
65 info.update({
2ab0bfcd
S
66 'id': video_id,
67 'title': title,
a2b6aba8 68 })
0987f2dd 69
a2b6aba8 70 return info