yt_dlp/extractor/vshare.py

   1 from .common import InfoExtractor
   2 from ..utils import ExtractorError, decode_packed_codes
   3
   4
   5 class VShareIE(InfoExtractor):
   6     _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
   7     _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)']
   8     _TESTS = [{
   9         'url': 'https://vshare.io/d/0f64ce6',
  10         'md5': '17b39f55b5497ae8b59f5fbce8e35886',
  11         'info_dict': {
  12             'id': '0f64ce6',
  13             'title': 'vl14062007715967',
  14             'ext': 'mp4',
  15         }
  16     }, {
  17         'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
  18         'only_matching': True,
  19     }]
  20
  21     def _extract_packed(self, webpage):
  22         packed = self._search_regex(
  23             r'(eval\(function.+)', webpage, 'packed code')
  24         unpacked = decode_packed_codes(packed)
  25         digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
  26         digits = [int(digit) for digit in digits.split(',')]
  27         key_digit = self._search_regex(
  28             r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
  29         chars = [chr(d - int(key_digit)) for d in digits]
  30         return ''.join(chars)
  31
  32     def _real_extract(self, url):
  33         video_id = self._match_id(url)
  34
  35         webpage = self._download_webpage(
  36             'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
  37             video_id, headers={'Referer': url})
  38
  39         title = self._html_extract_title(webpage)
  40         title = title.split(' - ')[0]
  41
  42         error = self._html_search_regex(
  43             r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
  44             'error', default=None)
  45         if error:
  46             raise ExtractorError(error, expected=True)
  47
  48         info = self._parse_html5_media_entries(
  49             url, '<video>%s</video>' % self._extract_packed(webpage),
  50             video_id)[0]
  51
  52         self._sort_formats(info['formats'])
  53
  54         info.update({
  55             'id': video_id,
  56             'title': title,
  57         })
  58
  59         return info