yt_dlp/extractor/manyvids.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     determine_ext,
   4     int_or_none,
   5     str_to_int,
   6     urlencode_postdata,
   7 )
   8
   9
  10 class ManyVidsIE(InfoExtractor):
  11     _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
  12     _TESTS = [{
  13         # preview video
  14         'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
  15         'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
  16         'info_dict': {
  17             'id': '133957',
  18             'ext': 'mp4',
  19             'title': 'everthing about me (Preview)',
  20             'view_count': int,
  21             'like_count': int,
  22         },
  23     }, {
  24         # full video
  25         'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
  26         'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
  27         'info_dict': {
  28             'id': '935718',
  29             'ext': 'mp4',
  30             'title': 'MY FACE REVEAL',
  31             'view_count': int,
  32             'like_count': int,
  33         },
  34     }]
  35
  36     def _real_extract(self, url):
  37         video_id = self._match_id(url)
  38
  39         webpage = self._download_webpage(url, video_id)
  40
  41         video_url = self._search_regex(
  42             r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
  43             webpage, 'video URL', group='url')
  44
  45         title = self._html_search_regex(
  46             (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
  47              r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
  48             webpage, 'title', default=None) or self._html_search_meta(
  49             'twitter:title', webpage, 'title', fatal=True)
  50
  51         if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
  52             title += ' (Preview)'
  53
  54         mv_token = self._search_regex(
  55             r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
  56             'mv token', default=None, group='value')
  57
  58         if mv_token:
  59             # Sets some cookies
  60             self._download_webpage(
  61                 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
  62                 video_id, fatal=False, data=urlencode_postdata({
  63                     'mvtoken': mv_token,
  64                     'vid': video_id,
  65                 }), headers={
  66                     'Referer': url,
  67                     'X-Requested-With': 'XMLHttpRequest'
  68                 })
  69
  70         if determine_ext(video_url) == 'm3u8':
  71             formats = self._extract_m3u8_formats(
  72                 video_url, video_id, 'mp4', entry_protocol='m3u8_native',
  73                 m3u8_id='hls')
  74         else:
  75             formats = [{'url': video_url}]
  76
  77         like_count = int_or_none(self._search_regex(
  78             r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
  79         view_count = str_to_int(self._html_search_regex(
  80             r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
  81             'view count', default=None))
  82
  83         return {
  84             'id': video_id,
  85             'title': title,
  86             'view_count': view_count,
  87             'like_count': like_count,
  88             'formats': formats,
  89             'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'),
  90         }