]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/beeg.py
[cleanup] Fix infodict returned fields (#8906)
[yt-dlp.git] / yt_dlp / extractor / beeg.py
1 from .common import InfoExtractor
2
3 from ..utils import (
4 int_or_none,
5 str_or_none,
6 traverse_obj,
7 try_get,
8 unified_timestamp,
9 )
10
11
12 class BeegIE(InfoExtractor):
13 _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'
14 _TESTS = [{
15 'url': 'https://beeg.com/-0983946056129650',
16 'md5': '51d235147c4627cfce884f844293ff88',
17 'info_dict': {
18 'id': '0983946056129650',
19 'ext': 'mp4',
20 'title': 'sucked cock and fucked in a private plane',
21 'duration': 927,
22 'tags': list,
23 'age_limit': 18,
24 'upload_date': '20220131',
25 'timestamp': 1643656455,
26 'display_id': '2540839',
27 }
28 }, {
29 'url': 'https://beeg.com/-0599050563103750?t=4-861',
30 'md5': 'bd8b5ea75134f7f07fad63008db2060e',
31 'info_dict': {
32 'id': '0599050563103750',
33 'ext': 'mp4',
34 'title': 'Bad Relatives',
35 'duration': 2060,
36 'tags': list,
37 'age_limit': 18,
38 'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
39 'timestamp': 1643623200,
40 'display_id': '2569965',
41 'upload_date': '20220131',
42 }
43 }, {
44 # api/v6 v2
45 'url': 'https://beeg.com/1941093077?t=911-1391',
46 'only_matching': True,
47 }, {
48 # api/v6 v2 w/o t
49 'url': 'https://beeg.com/1277207756',
50 'only_matching': True,
51 }]
52
53 def _real_extract(self, url):
54 video_id = self._match_id(url)
55
56 webpage = self._download_webpage(url, video_id)
57
58 video = self._download_json(
59 'https://store.externulls.com/facts/file/%s' % video_id,
60 video_id, 'Downloading JSON for %s' % video_id)
61
62 fc_facts = video.get('fc_facts')
63 first_fact = {}
64 for fact in fc_facts:
65 if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):
66 first_fact = fact
67
68 resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')
69
70 formats = []
71 for format_id, video_uri in resources.items():
72 if not video_uri:
73 continue
74 height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))
75 current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))
76 for f in current_formats:
77 f['height'] = height
78 formats.extend(current_formats)
79
80 return {
81 'id': video_id,
82 'display_id': str_or_none(first_fact.get('id')),
83 'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
84 'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
85 'timestamp': unified_timestamp(first_fact.get('fc_created')),
86 'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),
87 'tags': traverse_obj(video, ('tags', ..., 'tg_name')),
88 'formats': formats,
89 'age_limit': self._rta_search(webpage),
90 }