yt_dlp/extractor/beeg.py

   1 from .common import InfoExtractor
   2
   3 from ..utils import (
   4     int_or_none,
   5     str_or_none,
   6     traverse_obj,
   7     try_get,
   8     unified_timestamp,
   9 )
  10
  11
  12 class BeegIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'
  14     _TESTS = [{
  15         'url': 'https://beeg.com/-0983946056129650',
  16         'md5': '51d235147c4627cfce884f844293ff88',
  17         'info_dict': {
  18             'id': '0983946056129650',
  19             'ext': 'mp4',
  20             'title': 'sucked cock and fucked in a private plane',
  21             'duration': 927,
  22             'tags': list,
  23             'age_limit': 18,
  24             'upload_date': '20220131',
  25             'timestamp': 1643656455,
  26             'display_id': '2540839',
  27         }
  28     }, {
  29         'url': 'https://beeg.com/-0599050563103750?t=4-861',
  30         'md5': 'bd8b5ea75134f7f07fad63008db2060e',
  31         'info_dict': {
  32             'id': '0599050563103750',
  33             'ext': 'mp4',
  34             'title': 'Bad Relatives',
  35             'duration': 2060,
  36             'tags': list,
  37             'age_limit': 18,
  38             'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
  39             'timestamp': 1643623200,
  40             'display_id': '2569965',
  41             'upload_date': '20220131',
  42         }
  43     }, {
  44         # api/v6 v2
  45         'url': 'https://beeg.com/1941093077?t=911-1391',
  46         'only_matching': True,
  47     }, {
  48         # api/v6 v2 w/o t
  49         'url': 'https://beeg.com/1277207756',
  50         'only_matching': True,
  51     }]
  52
  53     def _real_extract(self, url):
  54         video_id = self._match_id(url)
  55
  56         webpage = self._download_webpage(url, video_id)
  57
  58         video = self._download_json(
  59             'https://store.externulls.com/facts/file/%s' % video_id,
  60             video_id, 'Downloading JSON for %s' % video_id)
  61
  62         fc_facts = video.get('fc_facts')
  63         first_fact = {}
  64         for fact in fc_facts:
  65             if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):
  66                 first_fact = fact
  67
  68         resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')
  69
  70         formats = []
  71         for format_id, video_uri in resources.items():
  72             if not video_uri:
  73                 continue
  74             height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))
  75             current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))
  76             for f in current_formats:
  77                 f['height'] = height
  78             formats.extend(current_formats)
  79
  80         return {
  81             'id': video_id,
  82             'display_id': str_or_none(first_fact.get('id')),
  83             'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
  84             'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
  85             'timestamp': unified_timestamp(first_fact.get('fc_created')),
  86             'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),
  87             'tags': traverse_obj(video, ('tags', ..., 'tg_name')),
  88             'formats': formats,
  89             'age_limit': self._rta_search(webpage),
  90         }