]>
Commit | Line | Data |
---|---|---|
f24e9833 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | ||
0143dc02 | 4 | import json |
0143dc02 PH |
5 | import re |
6 | import sys | |
7 | ||
8 | from .common import InfoExtractor | |
1cc79574 | 9 | from ..compat import ( |
0143dc02 PH |
10 | compat_urllib_parse_urlparse, |
11 | compat_urllib_request, | |
1cc79574 PH |
12 | ) |
13 | from ..utils import ( | |
0143dc02 PH |
14 | ExtractorError, |
15 | unescapeHTML, | |
16 | unified_strdate, | |
17 | ) | |
97b3656c | 18 | from ..aes import ( |
19 | aes_decrypt_text | |
20 | ) | |
0143dc02 | 21 | |
bfe9de85 | 22 | |
0143dc02 | 23 | class YouPornIE(InfoExtractor): |
f24e9833 | 24 | _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' |
6f5ac90c | 25 | _TEST = { |
f24e9833 | 26 | 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', |
f24e9833 PH |
27 | 'info_dict': { |
28 | 'id': '505835', | |
29 | 'ext': 'mp4', | |
30 | 'upload_date': '20101221', | |
31 | 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?', | |
32 | 'uploader': 'Ask Dan And Jennifer', | |
33 | 'title': 'Sex Ed: Is It Safe To Masturbate Daily?', | |
34 | 'age_limit': 18, | |
6f5ac90c PH |
35 | } |
36 | } | |
0143dc02 | 37 | |
0143dc02 PH |
38 | def _real_extract(self, url): |
39 | mobj = re.match(self._VALID_URL, url) | |
40 | video_id = mobj.group('videoid') | |
f24e9833 | 41 | url = mobj.group('proto') + 'www.' + mobj.group('url') |
0143dc02 PH |
42 | |
43 | req = compat_urllib_request.Request(url) | |
44 | req.add_header('Cookie', 'age_verified=1') | |
45 | webpage = self._download_webpage(req, video_id) | |
8dbe9899 | 46 | age_limit = self._rta_search(webpage) |
0143dc02 PH |
47 | |
48 | # Get JSON parameters | |
58daf5eb | 49 | json_params = self._search_regex( |
9ee53a49 | 50 | [r'videoJa?son\s*=\s*({.+})', |
50c9949d | 51 | r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'], |
58daf5eb | 52 | webpage, 'JSON parameters') |
0143dc02 PH |
53 | try: |
54 | params = json.loads(json_params) | |
70a1165b | 55 | except ValueError: |
8865bdeb | 56 | raise ExtractorError('Invalid JSON') |
0143dc02 PH |
57 | |
58 | self.report_extraction(video_id) | |
59 | try: | |
60 | video_title = params['title'] | |
61 | upload_date = unified_strdate(params['release_date_f']) | |
62 | video_description = params['description'] | |
63 | video_uploader = params['submitted_by'] | |
64 | thumbnail = params['thumbnails'][0]['image'] | |
65 | except KeyError: | |
66 | raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1]) | |
67 | ||
7df28654 | 68 | # Get all of the links from the page |
0143dc02 PH |
69 | DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' |
70 | download_list_html = self._search_regex(DOWNLOAD_LIST_RE, | |
9e1a5b84 | 71 | webpage, 'download list').strip() |
7df28654 | 72 | LINK_RE = r'<a href="([^"]+)">' |
0143dc02 | 73 | links = re.findall(LINK_RE, download_list_html) |
7df28654 | 74 | |
75 | # Get all encrypted links | |
76 | encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage) | |
77 | for encrypted_link in encrypted_links: | |
78 | link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8') | |
79 | links.append(link) | |
5f6a1245 | 80 | |
0143dc02 PH |
81 | formats = [] |
82 | for link in links: | |
0143dc02 PH |
83 | # A link looks like this: |
84 | # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0 | |
85 | # A path looks like this: | |
86 | # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4 | |
a56f9de1 JMF |
87 | video_url = unescapeHTML(link) |
88 | path = compat_urllib_parse_urlparse(video_url).path | |
bfe9de85 | 89 | format_parts = path.split('/')[4].split('_')[:2] |
7df28654 | 90 | |
bfe9de85 PH |
91 | dn = compat_urllib_parse_urlparse(video_url).netloc.partition('.')[0] |
92 | ||
93 | resolution = format_parts[0] | |
94 | height = int(resolution[:-len('p')]) | |
95 | bitrate = int(format_parts[1][:-len('k')]) | |
f24e9833 | 96 | format = '-'.join(format_parts) + '-' + dn |
0143dc02 PH |
97 | |
98 | formats.append({ | |
0143dc02 | 99 | 'url': video_url, |
0143dc02 | 100 | 'format': format, |
7df28654 | 101 | 'format_id': format, |
bfe9de85 PH |
102 | 'height': height, |
103 | 'tbr': bitrate, | |
104 | 'resolution': resolution, | |
0143dc02 PH |
105 | }) |
106 | ||
bfe9de85 PH |
107 | self._sort_formats(formats) |
108 | ||
109 | if not formats: | |
8865bdeb | 110 | raise ExtractorError('ERROR: no known formats available for video') |
5f6a1245 | 111 | |
7df28654 | 112 | return { |
113 | 'id': video_id, | |
114 | 'uploader': video_uploader, | |
115 | 'upload_date': upload_date, | |
116 | 'title': video_title, | |
117 | 'thumbnail': thumbnail, | |
118 | 'description': video_description, | |
119 | 'age_limit': age_limit, | |
120 | 'formats': formats, | |
121 | } |