]>
Commit | Line | Data |
---|---|---|
f24e9833 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | ||
0143dc02 | 4 | import json |
0143dc02 PH |
5 | import re |
6 | import sys | |
7 | ||
8 | from .common import InfoExtractor | |
9 | from ..utils import ( | |
10 | compat_urllib_parse_urlparse, | |
11 | compat_urllib_request, | |
12 | ||
13 | ExtractorError, | |
14 | unescapeHTML, | |
15 | unified_strdate, | |
16 | ) | |
97b3656c | 17 | from ..aes import ( |
18 | aes_decrypt_text | |
19 | ) | |
0143dc02 | 20 | |
bfe9de85 | 21 | |
0143dc02 | 22 | class YouPornIE(InfoExtractor): |
f24e9833 | 23 | _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' |
6f5ac90c | 24 | _TEST = { |
f24e9833 PH |
25 | 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', |
26 | 'md5': '71ec5fcfddacf80f495efa8b6a8d9a89', | |
27 | 'info_dict': { | |
28 | 'id': '505835', | |
29 | 'ext': 'mp4', | |
30 | 'upload_date': '20101221', | |
31 | 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?', | |
32 | 'uploader': 'Ask Dan And Jennifer', | |
33 | 'title': 'Sex Ed: Is It Safe To Masturbate Daily?', | |
34 | 'age_limit': 18, | |
6f5ac90c PH |
35 | } |
36 | } | |
0143dc02 | 37 | |
0143dc02 PH |
38 | def _real_extract(self, url): |
39 | mobj = re.match(self._VALID_URL, url) | |
40 | video_id = mobj.group('videoid') | |
f24e9833 | 41 | url = mobj.group('proto') + 'www.' + mobj.group('url') |
0143dc02 PH |
42 | |
43 | req = compat_urllib_request.Request(url) | |
44 | req.add_header('Cookie', 'age_verified=1') | |
45 | webpage = self._download_webpage(req, video_id) | |
8dbe9899 | 46 | age_limit = self._rta_search(webpage) |
0143dc02 PH |
47 | |
48 | # Get JSON parameters | |
f24e9833 | 49 | json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters') |
0143dc02 PH |
50 | try: |
51 | params = json.loads(json_params) | |
52 | except: | |
53 | raise ExtractorError(u'Invalid JSON') | |
54 | ||
55 | self.report_extraction(video_id) | |
56 | try: | |
57 | video_title = params['title'] | |
58 | upload_date = unified_strdate(params['release_date_f']) | |
59 | video_description = params['description'] | |
60 | video_uploader = params['submitted_by'] | |
61 | thumbnail = params['thumbnails'][0]['image'] | |
62 | except KeyError: | |
63 | raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1]) | |
64 | ||
7df28654 | 65 | # Get all of the links from the page |
0143dc02 PH |
66 | DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' |
67 | download_list_html = self._search_regex(DOWNLOAD_LIST_RE, | |
f24e9833 | 68 | webpage, 'download list').strip() |
7df28654 | 69 | LINK_RE = r'<a href="([^"]+)">' |
0143dc02 | 70 | links = re.findall(LINK_RE, download_list_html) |
7df28654 | 71 | |
72 | # Get all encrypted links | |
73 | encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage) | |
74 | for encrypted_link in encrypted_links: | |
75 | link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8') | |
76 | links.append(link) | |
97b3656c | 77 | |
0143dc02 PH |
78 | formats = [] |
79 | for link in links: | |
0143dc02 PH |
80 | # A link looks like this: |
81 | # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0 | |
82 | # A path looks like this: | |
83 | # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4 | |
a56f9de1 JMF |
84 | video_url = unescapeHTML(link) |
85 | path = compat_urllib_parse_urlparse(video_url).path | |
bfe9de85 | 86 | format_parts = path.split('/')[4].split('_')[:2] |
7df28654 | 87 | |
bfe9de85 PH |
88 | dn = compat_urllib_parse_urlparse(video_url).netloc.partition('.')[0] |
89 | ||
90 | resolution = format_parts[0] | |
91 | height = int(resolution[:-len('p')]) | |
92 | bitrate = int(format_parts[1][:-len('k')]) | |
f24e9833 | 93 | format = '-'.join(format_parts) + '-' + dn |
0143dc02 PH |
94 | |
95 | formats.append({ | |
0143dc02 | 96 | 'url': video_url, |
0143dc02 | 97 | 'format': format, |
7df28654 | 98 | 'format_id': format, |
bfe9de85 PH |
99 | 'height': height, |
100 | 'tbr': bitrate, | |
101 | 'resolution': resolution, | |
0143dc02 PH |
102 | }) |
103 | ||
bfe9de85 PH |
104 | self._sort_formats(formats) |
105 | ||
106 | if not formats: | |
107 | raise ExtractorError(u'ERROR: no known formats available for video') | |
7df28654 | 108 | |
109 | return { | |
110 | 'id': video_id, | |
111 | 'uploader': video_uploader, | |
112 | 'upload_date': upload_date, | |
113 | 'title': video_title, | |
114 | 'thumbnail': thumbnail, | |
115 | 'description': video_description, | |
116 | 'age_limit': age_limit, | |
117 | 'formats': formats, | |
118 | } |