]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/redtube.py
[cleanup] Upgrade syntax
[yt-dlp.git] / yt_dlp / extractor / redtube.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 determine_ext,
6 ExtractorError,
7 int_or_none,
8 merge_dicts,
9 str_to_int,
10 unified_strdate,
11 url_or_none,
12 )
13
14
15 class RedTubeIE(InfoExtractor):
16 _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
17 _TESTS = [{
18 'url': 'https://www.redtube.com/38864951',
19 'md5': '4fba70cbca3aefd25767ab4b523c9878',
20 'info_dict': {
21 'id': '38864951',
22 'ext': 'mp4',
23 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu',
24 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu',
25 'upload_date': '20210111',
26 'timestamp': 1610343109,
27 'duration': 646,
28 'view_count': int,
29 'age_limit': 18,
30 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg',
31 },
32 }, {
33 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
34 'only_matching': True,
35 }, {
36 'url': 'http://it.redtube.com/66418',
37 'only_matching': True,
38 }]
39
40 @staticmethod
41 def _extract_urls(webpage):
42 return re.findall(
43 r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
44 webpage)
45
46 def _real_extract(self, url):
47 video_id = self._match_id(url)
48 webpage = self._download_webpage(
49 'http://www.redtube.com/%s' % video_id, video_id)
50
51 ERRORS = (
52 (('video-deleted-info', '>This video has been removed'), 'has been removed'),
53 (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'),
54 )
55
56 for patterns, message in ERRORS:
57 if any(p in webpage for p in patterns):
58 raise ExtractorError(
59 'Video %s %s' % (video_id, message), expected=True)
60
61 info = self._search_json_ld(webpage, video_id, default={})
62
63 if not info.get('title'):
64 info['title'] = self._html_search_regex(
65 (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
66 r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
67 webpage, 'title', group='title',
68 default=None) or self._og_search_title(webpage)
69
70 formats = []
71 sources = self._parse_json(
72 self._search_regex(
73 r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'),
74 video_id, fatal=False)
75 if sources and isinstance(sources, dict):
76 for format_id, format_url in sources.items():
77 if format_url:
78 formats.append({
79 'url': format_url,
80 'format_id': format_id,
81 'height': int_or_none(format_id),
82 })
83 medias = self._parse_json(
84 self._search_regex(
85 r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
86 'media definitions', default='{}'),
87 video_id, fatal=False)
88 for media in medias if isinstance(medias, list) else []:
89 format_url = url_or_none(media.get('videoUrl'))
90 if not format_url:
91 continue
92 format_id = media.get('format')
93 quality = media.get('quality')
94 if format_id == 'hls' or (format_id == 'mp4' and not quality):
95 more_media = self._download_json(format_url, video_id, fatal=False)
96 else:
97 more_media = [media]
98 for media in more_media if isinstance(more_media, list) else []:
99 format_url = url_or_none(media.get('videoUrl'))
100 if not format_url:
101 continue
102 format_id = media.get('format')
103 if format_id == 'hls' or determine_ext(format_url) == 'm3u8':
104 formats.extend(self._extract_m3u8_formats(
105 format_url, video_id, 'mp4',
106 entry_protocol='m3u8_native', m3u8_id=format_id or 'hls',
107 fatal=False))
108 continue
109 format_id = media.get('quality')
110 formats.append({
111 'url': format_url,
112 'ext': 'mp4',
113 'format_id': format_id,
114 'height': int_or_none(format_id),
115 })
116 if not formats:
117 video_url = self._html_search_regex(
118 r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
119 formats.append({'url': video_url, 'ext': 'mp4'})
120 self._sort_formats(formats)
121
122 thumbnail = self._og_search_thumbnail(webpage)
123 upload_date = unified_strdate(self._search_regex(
124 r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<',
125 webpage, 'upload date', default=None))
126 duration = int_or_none(self._og_search_property(
127 'video:duration', webpage, default=None) or self._search_regex(
128 r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
129 view_count = str_to_int(self._search_regex(
130 (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
131 r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)',
132 r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'),
133 webpage, 'view count', default=None))
134
135 # No self-labeling, but they describe themselves as
136 # "Home of Videos Porno"
137 age_limit = 18
138
139 return merge_dicts(info, {
140 'id': video_id,
141 'ext': 'mp4',
142 'thumbnail': thumbnail,
143 'upload_date': upload_date,
144 'duration': duration,
145 'view_count': view_count,
146 'age_limit': age_limit,
147 'formats': formats,
148 })