]>
Commit | Line | Data |
---|---|---|
032b3df5 PH |
1 | from __future__ import unicode_literals |
2 | ||
e28ed498 S |
3 | import re |
4 | ||
9f5daf00 | 5 | from .common import InfoExtractor |
ac12e888 S |
6 | from ..utils import ( |
7 | ExtractorError, | |
8 | int_or_none, | |
560d3b7d | 9 | merge_dicts, |
ac12e888 S |
10 | str_to_int, |
11 | unified_strdate, | |
3052a30d | 12 | url_or_none, |
ac12e888 | 13 | ) |
9f5daf00 PH |
14 | |
15 | ||
16 | class RedTubeIE(InfoExtractor): | |
5021ca6c S |
17 | _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' |
18 | _TESTS = [{ | |
032b3df5 | 19 | 'url': 'http://www.redtube.com/66418', |
18ebd1a8 | 20 | 'md5': 'fc08071233725f26b8f014dba9590005', |
032b3df5 | 21 | 'info_dict': { |
faf34948 PH |
22 | 'id': '66418', |
23 | 'ext': 'mp4', | |
838b9340 | 24 | 'title': 'Sucked on a toilet', |
18ebd1a8 | 25 | 'upload_date': '20110811', |
ac12e888 S |
26 | 'duration': 596, |
27 | 'view_count': int, | |
838b9340 | 28 | 'age_limit': 18, |
6f5ac90c | 29 | } |
5021ca6c S |
30 | }, { |
31 | 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', | |
32 | 'only_matching': True, | |
33 | }] | |
9f5daf00 | 34 | |
e28ed498 S |
35 | @staticmethod |
36 | def _extract_urls(webpage): | |
37 | return re.findall( | |
38 | r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)', | |
39 | webpage) | |
40 | ||
cd214418 | 41 | def _real_extract(self, url): |
faf34948 | 42 | video_id = self._match_id(url) |
5021ca6c S |
43 | webpage = self._download_webpage( |
44 | 'http://www.redtube.com/%s' % video_id, video_id) | |
9f5daf00 | 45 | |
484637a9 S |
46 | ERRORS = ( |
47 | (('video-deleted-info', '>This video has been removed'), 'has been removed'), | |
48 | (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'), | |
49 | ) | |
50 | ||
51 | for patterns, message in ERRORS: | |
52 | if any(p in webpage for p in patterns): | |
53 | raise ExtractorError( | |
54 | 'Video %s %s' % (video_id, message), expected=True) | |
2676caf3 | 55 | |
560d3b7d S |
56 | info = self._search_json_ld(webpage, video_id, default={}) |
57 | ||
58 | if not info.get('title'): | |
59 | info['title'] = self._html_search_regex( | |
bf097a50 | 60 | (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', |
560d3b7d S |
61 | r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), |
62 | webpage, 'title', group='title', | |
63 | default=None) or self._og_search_title(webpage) | |
ac12e888 S |
64 | |
65 | formats = [] | |
66 | sources = self._parse_json( | |
67 | self._search_regex( | |
68 | r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), | |
69 | video_id, fatal=False) | |
70 | if sources and isinstance(sources, dict): | |
71 | for format_id, format_url in sources.items(): | |
72 | if format_url: | |
73 | formats.append({ | |
74 | 'url': format_url, | |
75 | 'format_id': format_id, | |
76 | 'height': int_or_none(format_id), | |
77 | }) | |
880fa66f S |
78 | medias = self._parse_json( |
79 | self._search_regex( | |
80 | r'mediaDefinition\s*:\s*(\[.+?\])', webpage, | |
81 | 'media definitions', default='{}'), | |
82 | video_id, fatal=False) | |
83 | if medias and isinstance(medias, list): | |
84 | for media in medias: | |
3052a30d S |
85 | format_url = url_or_none(media.get('videoUrl')) |
86 | if not format_url: | |
880fa66f S |
87 | continue |
88 | format_id = media.get('quality') | |
89 | formats.append({ | |
90 | 'url': format_url, | |
91 | 'format_id': format_id, | |
92 | 'height': int_or_none(format_id), | |
93 | }) | |
94 | if not formats: | |
ac12e888 S |
95 | video_url = self._html_search_regex( |
96 | r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') | |
97 | formats.append({'url': video_url}) | |
98 | self._sort_formats(formats) | |
99 | ||
100 | thumbnail = self._og_search_thumbnail(webpage) | |
101 | upload_date = unified_strdate(self._search_regex( | |
560d3b7d S |
102 | r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<', |
103 | webpage, 'upload date', default=None)) | |
18ebd1a8 W |
104 | duration = int_or_none(self._og_search_property( |
105 | 'video:duration', webpage, default=None) or self._search_regex( | |
106 | r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) | |
ac12e888 | 107 | view_count = str_to_int(self._search_regex( |
1367c798 | 108 | (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', |
560d3b7d S |
109 | r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)', |
110 | r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'), | |
111 | webpage, 'view count', default=None)) | |
ac12e888 | 112 | |
1310bf24 PH |
113 | # No self-labeling, but they describe themselves as |
114 | # "Home of Videos Porno" | |
115 | age_limit = 18 | |
116 | ||
560d3b7d | 117 | return merge_dicts(info, { |
032b3df5 | 118 | 'id': video_id, |
faf34948 | 119 | 'ext': 'mp4', |
ac12e888 S |
120 | 'thumbnail': thumbnail, |
121 | 'upload_date': upload_date, | |
122 | 'duration': duration, | |
123 | 'view_count': view_count, | |
1310bf24 | 124 | 'age_limit': age_limit, |
ac12e888 | 125 | 'formats': formats, |
560d3b7d | 126 | }) |