[yt-dlp.git] / yt_dlp / extractor / beeg.py

from .common import InfoExtractor

from ..utils import (
    int_or_none,
    str_or_none,
    traverse_obj,
    try_get,
    unified_timestamp,
)


class BeegIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://beeg.com/-0983946056129650',
        'md5': '51d235147c4627cfce884f844293ff88',
        'info_dict': {
            'id': '0983946056129650',
            'ext': 'mp4',
            'title': 'sucked cock and fucked in a private plane',
            'duration': 927,
            'tags': list,
            'age_limit': 18,
            'upload_date': '20220131',
            'timestamp': 1643656455,
            'display_id': '2540839',
        }
    }, {
        'url': 'https://beeg.com/-0599050563103750?t=4-861',
        'md5': 'bd8b5ea75134f7f07fad63008db2060e',
        'info_dict': {
            'id': '0599050563103750',
            'ext': 'mp4',
            'title': 'Bad Relatives',
            'duration': 2060,
            'tags': list,
            'age_limit': 18,
            'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
            'timestamp': 1643623200,
            'display_id': '2569965',
            'upload_date': '20220131',
        }
    }, {
        # api/v6 v2
        'url': 'https://beeg.com/1941093077?t=911-1391',
        'only_matching': True,
    }, {
        # api/v6 v2 w/o t
        'url': 'https://beeg.com/1277207756',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        video = self._download_json(
            'https://store.externulls.com/facts/file/%s' % video_id,
            video_id, 'Downloading JSON for %s' % video_id)

        fc_facts = video.get('fc_facts')
        first_fact = {}
        for fact in fc_facts:
            if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):
                first_fact = fact

        resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')

        formats = []
        for format_id, video_uri in resources.items():
            if not video_uri:
                continue
            height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))
            current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))
            for f in current_formats:
                f['height'] = height
            formats.extend(current_formats)

        return {
            'id': video_id,
            'display_id': str_or_none(first_fact.get('id')),
            'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
            'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
            'timestamp': unified_timestamp(first_fact.get('fc_created')),
            'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),
            'tags': traverse_obj(video, ('tags', ..., 'tg_name')),
            'formats': formats,
            'age_limit': self._rta_search(webpage),
        }
Commit	Line	Data
2aebbcce	1	from .common import InfoExtractor
cd170e81	2
5946cda7 S	3	from ..utils import (
5946cda7 S	4	int_or_none,
f4f9f6d0	5	str_or_none,
cd170e81 B	6	traverse_obj,
cd170e81 B	7	try_get,
e4d51e75	8	unified_timestamp,
5946cda7	9	)
2aebbcce	10
	11
	12	class BeegIE(InfoExtractor):
cd170e81	13	_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com(?:/video)?)/-?(?P<id>\d+)'
cdc7baff	14	_TESTS = [{
cd170e81 B	15	'url': 'https://beeg.com/-0983946056129650',
cd170e81 B	16	'md5': '51d235147c4627cfce884f844293ff88',
2aebbcce	17	'info_dict': {
cd170e81	18	'id': '0983946056129650',
2aebbcce	19	'ext': 'mp4',
cd170e81 B	20	'title': 'sucked cock and fucked in a private plane',
cd170e81 B	21	'duration': 927,
5946cda7	22	'tags': list,
7ca2e11f	23	'age_limit': 18,
cd170e81 B	24	'upload_date': '20220131',
cd170e81 B	25	'timestamp': 1643656455,
f4f9f6d0	26	'display_id': '2540839',
cd170e81 B	27	}
	28	}, {
	29	'url': 'https://beeg.com/-0599050563103750?t=4-861',
	30	'md5': 'bd8b5ea75134f7f07fad63008db2060e',
	31	'info_dict': {
	32	'id': '0599050563103750',
	33	'ext': 'mp4',
	34	'title': 'Bad Relatives',
	35	'duration': 2060,
	36	'tags': list,
	37	'age_limit': 18,
	38	'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
	39	'timestamp': 1643623200,
f4f9f6d0	40	'display_id': '2569965',
cd170e81	41	'upload_date': '20220131',
2aebbcce	42	}
27cef888 S	43	}, {
	44	# api/v6 v2
	45	'url': 'https://beeg.com/1941093077?t=911-1391',
	46	'only_matching': True,
5fc08961 S	47	}, {
	48	# api/v6 v2 w/o t
	49	'url': 'https://beeg.com/1277207756',
	50	'only_matching': True,
cdc7baff	51	}]
2aebbcce	52
2aebbcce	53	def _real_extract(self, url):
5946cda7	54	video_id = self._match_id(url)
3baa62e8	55
3afef2e3 S	56	webpage = self._download_webpage(url, video_id)
3afef2e3 S	57
cd170e81 B	58	video = self._download_json(
	59	'https://store.externulls.com/facts/file/%s' % video_id,
	60	video_id, 'Downloading JSON for %s' % video_id)
3afef2e3	61
cd170e81 B	62	fc_facts = video.get('fc_facts')
	63	first_fact = {}
	64	for fact in fc_facts:
	65	if not first_fact or try_get(fact, lambda x: x['id'] < first_fact['id']):
	66	first_fact = fact
27cef888	67
cd170e81	68	resources = traverse_obj(video, ('file', 'hls_resources')) or first_fact.get('hls_resources')
d63cfc3f	69
5946cda7	70	formats = []
cd170e81 B	71	for format_id, video_uri in resources.items():
cd170e81 B	72	if not video_uri:
5946cda7	73	continue
cd170e81 B	74	height = int_or_none(self._search_regex(r'fl_cdn_(\d+)', format_id, 'height', default=None))
	75	current_formats = self._extract_m3u8_formats(f'https://video.beeg.com/{video_uri}', video_id, ext='mp4', m3u8_id=str(height))
	76	for f in current_formats:
	77	f['height'] = height
	78	formats.extend(current_formats)
5f6a1245	79
2aebbcce	80	return {
2aebbcce	81	'id': video_id,
f4f9f6d0	82	'display_id': str_or_none(first_fact.get('id')),
cd170e81 B	83	'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
	84	'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
	85	'timestamp': unified_timestamp(first_fact.get('fc_created')),
	86	'duration': int_or_none(traverse_obj(video, ('file', 'fl_duration'))),
	87	'tags': traverse_obj(video, ('tags', ..., 'tg_name')),
3baa62e8	88	'formats': formats,
3afef2e3	89	'age_limit': self._rta_search(webpage),
2aebbcce	90	}