yt_dlp/extractor/extremetube.py

   1 from ..utils import str_to_int
   2 from .keezmovies import KeezMoviesIE
   3
   4
   5 class ExtremeTubeIE(KeezMoviesIE):  # XXX: Do not subclass from concrete IE
   6     _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
   7     _TESTS = [{
   8         'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
   9         'md5': '92feaafa4b58e82f261e5419f39c60cb',
  10         'info_dict': {
  11             'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
  12             'ext': 'mp4',
  13             'title': 'Music Video 14 british euro brit european cumshots swallow',
  14             'uploader': 'anonim',
  15             'view_count': int,
  16             'age_limit': 18,
  17         }
  18     }, {
  19         'url': 'http://www.extremetube.com/gay/video/abcde-1234',
  20         'only_matching': True,
  21     }, {
  22         'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick',
  23         'only_matching': True,
  24     }, {
  25         'url': 'http://www.extremetube.com/video/652431',
  26         'only_matching': True,
  27     }]
  28
  29     def _real_extract(self, url):
  30         webpage, info = self._extract_info(url)
  31
  32         if not info['title']:
  33             info['title'] = self._search_regex(
  34                 r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
  35
  36         uploader = self._html_search_regex(
  37             r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>',
  38             webpage, 'uploader', fatal=False)
  39         view_count = str_to_int(self._search_regex(
  40             r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</',
  41             webpage, 'view count', fatal=False))
  42
  43         info.update({
  44             'uploader': uploader,
  45             'view_count': view_count,
  46         })
  47
  48         return info