yt_dlp/extractor/doodstream.py

   1 import string
   2 import random
   3 import time
   4
   5 from .common import InfoExtractor
   6
   7
   8 class DoodStreamIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|pm)/[ed]/(?P<id>[a-z0-9]+)'
  10     _TESTS = [{
  11         'url': 'http://dood.to/e/5s1wmbdacezb',
  12         'md5': '4568b83b31e13242b3f1ff96c55f0595',
  13         'info_dict': {
  14             'id': '5s1wmbdacezb',
  15             'ext': 'mp4',
  16             'title': 'Kat Wonders - Monthly May 2020',
  17             'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
  18             'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
  19         }
  20     }, {
  21         'url': 'http://dood.watch/d/5s1wmbdacezb',
  22         'md5': '4568b83b31e13242b3f1ff96c55f0595',
  23         'info_dict': {
  24             'id': '5s1wmbdacezb',
  25             'ext': 'mp4',
  26             'title': 'Kat Wonders - Monthly May 2020',
  27             'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
  28             'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
  29         }
  30     }, {
  31         'url': 'https://dood.to/d/jzrxn12t2s7n',
  32         'md5': '3207e199426eca7c2aa23c2872e6728a',
  33         'info_dict': {
  34             'id': 'jzrxn12t2s7n',
  35             'ext': 'mp4',
  36             'title': 'Stacy Cruz Cute ALLWAYSWELL',
  37             'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
  38             'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
  39         }
  40     }, {
  41         'url': 'https://dood.so/d/jzrxn12t2s7n',
  42         'only_matching': True
  43     }]
  44
  45     def _real_extract(self, url):
  46         video_id = self._match_id(url)
  47         url = f'https://dood.to/e/{video_id}'
  48         webpage = self._download_webpage(url, video_id)
  49
  50         title = self._html_search_meta(
  51             ('og:title', 'twitter:title'), webpage, default=None) or self._html_extract_title(webpage)
  52         thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
  53         token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
  54         description = self._html_search_meta(
  55             ['og:description', 'description', 'twitter:description'], webpage, default=None)
  56
  57         headers = {
  58             'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
  59             'referer': url
  60         }
  61
  62         pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
  63         final_url = ''.join((
  64             self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
  65             *(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
  66             f'?token={token}&expiry={int(time.time() * 1000)}',
  67         ))
  68
  69         return {
  70             'id': video_id,
  71             'title': title,
  72             'url': final_url,
  73             'http_headers': headers,
  74             'ext': 'mp4',
  75             'description': description,
  76             'thumbnail': thumb,
  77         }