]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/pornbox.py
[ie/pornbox] Add extractor (#7386)
[yt-dlp.git] / yt_dlp / extractor / pornbox.py
1 from .common import InfoExtractor
2 from ..compat import functools
3 from ..utils import (
4 int_or_none,
5 parse_duration,
6 parse_iso8601,
7 qualities,
8 str_or_none,
9 traverse_obj,
10 url_or_none,
11 )
12
13
14 class PornboxIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P<id>[0-9]+)'
16 _TESTS = [{
17 'url': 'https://pornbox.com/application/watch-page/212108',
18 'md5': '3ff6b6e206f263be4c5e987a3162ac6e',
19 'info_dict': {
20 'id': '212108',
21 'ext': 'mp4',
22 'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49',
23 'uploader': 'Lily Strong',
24 'timestamp': 1665871200,
25 'upload_date': '20221015',
26 'age_limit': 18,
27 'availability': 'needs_auth',
28 'duration': 1505,
29 'cast': ['Lily Strong', 'John Strong'],
30 'tags': 'count:11',
31 'description': 'md5:589c7f33e183aa8aa939537300efb859',
32 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$'
33 }
34 }, {
35 'url': 'https://pornbox.com/application/watch-page/216045',
36 'info_dict': {
37 'id': '216045',
38 'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2',
39 'description': 'md5:3e631dcaac029f15ed434e402d1b06c7',
40 'uploader': 'VK Studio',
41 'timestamp': 1618264800,
42 'upload_date': '20210412',
43 'age_limit': 18,
44 'availability': 'premium_only',
45 'duration': 2710,
46 'cast': 'count:3',
47 'tags': 'count:29',
48 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
49 'subtitles': 'count:6'
50 },
51 'params': {
52 'skip_download': True,
53 'ignore_no_formats_error': True
54 },
55 'expected_warnings': [
56 'You are either not logged in or do not have access to this scene',
57 'No video formats found', 'Requested format is not available']
58 }]
59
60 def _real_extract(self, url):
61 video_id = self._match_id(url)
62
63 public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id)
64
65 subtitles = {country_code: [{
66 'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}',
67 'ext': 'srt'
68 }] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))}
69
70 is_free_scene = traverse_obj(
71 public_data, ('price', 'is_available_for_free', {bool}), default=False)
72
73 metadata = {
74 'id': video_id,
75 **traverse_obj(public_data, {
76 'title': ('scene_name', {str.strip}),
77 'description': ('small_description', {str.strip}),
78 'uploader': 'studio',
79 'duration': ('runtime', {parse_duration}),
80 'cast': (('models', 'male_models'), ..., 'model_name'),
81 'thumbnail': ('player_poster', {url_or_none}),
82 'tags': ('niches', ..., 'niche'),
83 }),
84 'age_limit': 18,
85 'timestamp': parse_iso8601(traverse_obj(
86 public_data, ('studios', 'release_date'), 'publish_date')),
87 'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene),
88 'subtitles': subtitles,
89 }
90
91 if not public_data.get('is_purchased') or not is_free_scene:
92 self.raise_login_required(
93 'You are either not logged in or do not have access to this scene', metadata_available=True)
94 return metadata
95
96 media_id = traverse_obj(public_data, (
97 'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False)
98 if not media_id:
99 self.raise_no_formats('Could not find stream id', video_id=video_id)
100
101 stream_data = self._download_json(
102 f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls')
103
104 get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
105 metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
106 'url': 'src',
107 'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
108 'format_id': ('quality', {str_or_none}),
109 'quality': ('quality', {get_quality}),
110 'width': ('size', {lambda x: int(x[:-1])}),
111 }))
112
113 return metadata