3 from .common
import InfoExtractor
13 class YouPornIE(InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
15 _EMBED_REGEX
= [r
'<iframe[^>]+\bsrc=["\'](?P
<url
>(?
:https?
:)?
//(?
:www\
.)?youporn\
.com
/embed
/\d
+)']
17 'url
': 'http
://www
.youporn
.com
/watch
/505835/sex
-ed
-is-it
-safe
-to
-masturbate
-daily
/',
18 'md5
': '3744d24c50438cf5b6f6d59feb5055c2
',
21 'display_id
': 'sex
-ed
-is-it
-safe
-to
-masturbate
-daily
',
23 'title
': 'Sex Ed
: Is It Safe To Masturbate Daily?
',
24 'description
': 'Love
& Sex Answers
: http
://bit
.ly
/DanAndJenn
-- Is It Unhealthy To Masturbate Daily?
',
25 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
27 'uploader
': 'Ask Dan And Jennifer
',
28 'upload_date
': '20101217',
29 'average_rating
': int,
35 'skip
': 'This video has been disabled
',
38 'url
': 'http
://www
.youporn
.com
/watch
/561726/big
-tits
-awesome
-brunette
-on
-amazing
-webcam
-show
/?
from=related3
&al
=2&from_id
=561726&pos
=4',
41 'display_id
': 'big
-tits
-awesome
-brunette
-on
-amazing
-webcam
-show
',
43 'title
': 'Big Tits Awesome Brunette On amazing webcam show
',
44 'description
': 'http
://sweetlivegirls
.com Big Tits Awesome Brunette On amazing webcam show
.mp4
',
45 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
46 'uploader
': 'Unknown
',
47 'upload_date
': '20110418',
48 'average_rating
': int,
55 'skip_download
': True,
59 'url
': 'https
://www
.youporn
.com
/embed
/505835/sex
-ed
-is-it
-safe
-to
-masturbate
-daily
/',
60 'only_matching
': True,
62 'url
': 'http
://www
.youporn
.com
/watch
/505835',
63 'only_matching
': True,
65 'url
': 'https
://www
.youporn
.com
/watch
/13922959/femdom
-principal
/',
66 'only_matching
': True,
69 def _real_extract(self, url):
70 mobj = self._match_valid_url(url)
71 video_id = mobj.group('id')
72 display_id = mobj.group('display_id
') or video_id
74 definitions = self._download_json(
75 'https
://www
.youporn
.com
/api
/video
/media_definitions
/%s/' % video_id,
79 for definition in definitions:
80 if not isinstance(definition, dict):
82 video_url = url_or_none(definition.get('videoUrl
'))
87 'filesize
': int_or_none(definition.get('videoSize
')),
89 height = int_or_none(definition.get('quality
'))
90 # Video URL's path looks like this
:
91 # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
92 # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
93 # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
94 # We will benefit from it by extracting some metadata
95 mobj
= re
.search(r
'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url
)
98 height
= int(mobj
.group('height'))
99 bitrate
= int(mobj
.group('bitrate'))
101 'format_id': '%dp-%dk' % (height
, bitrate
),
106 self
._sort
_formats
(formats
)
108 webpage
= self
._download
_webpage
(
109 'http://www.youporn.com/watch/%s' % video_id
, display_id
,
110 headers
={'Cookie': 'age_verified=1'}
)
112 title
= self
._html
_search
_regex
(
113 r
'(?s)<div[^>]+class=["\']watchVideoTitle
[^
>]+>(.+?
)</div
>',
114 webpage, 'title
', default=None) or self._og_search_title(
115 webpage, default=None) or self._html_search_meta(
116 'title
', webpage, fatal=True)
118 description = self._html_search_regex(
119 r'(?s
)<div
[^
>]+\bid
=["\']description["\'][^
>]*>(.+?
)</div
>',
120 webpage, 'description
',
121 default=None) or self._og_search_description(
122 webpage, default=None)
123 thumbnail = self._search_regex(
124 r'(?
:imageurl\s
*=|poster\s
*:)\s
*(["\'])(?P<thumbnail>.+?)\1',
125 webpage, 'thumbnail', fatal=False, group='thumbnail')
126 duration = int_or_none(self._html_search_meta(
127 'video:duration', webpage, 'duration', fatal=False))
129 uploader = self._html_search_regex(
130 r'(?s)<div[^>]+class=["\']submitByLink
["\'][^>]*>(.+?)</div>',
131 webpage, 'uploader', fatal=False)
132 upload_date = unified_strdate(self._html_search_regex(
133 (r'UPLOADED:\s*<span>([^<]+)',
134 r'Date\s+[Aa]dded:\s*<span>([^<]+)',
135 r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
136 r'(?s
)<label
\b[^
>]*>Uploaded
[^
<]*</label
>\s
*<span
\b[^
>]*>(.+?
)</span
>'),
137 webpage, 'upload date
', fatal=False))
139 age_limit = self._rta_search(webpage)
142 views = self._search_regex(
143 r'(<div
[^
>]+\bclass
=["\']js_videoInfoViews["\']>)', webpage,
144 'views
', default=None)
146 view_count = str_to_int(extract_attributes(views).get('data
-value
'))
147 comment_count = str_to_int(self._search_regex(
148 r'>All
[Cc
]omments? \
(([\d
,.]+)\
)',
149 webpage, 'comment count
', default=None))
151 def extract_tag_box(regex, title):
152 tag_box = self._search_regex(regex, webpage, title, default=None)
155 return re.findall(r'<a
[^
>]+href
=[^
>]+>([^
<]+)', tag_box)
157 categories = extract_tag_box(
158 r'(?s
)Categories
:.*?
</[^
>]+>(.+?
)</div
>', 'categories
')
159 tags = extract_tag_box(
160 r'(?s
)Tags
:.*?
</div
>\s
*<div
[^
>]+class=["\']tagBoxContent["\'][^
>]*>(.+?
)</div
>',
165 'display_id
': display_id,
167 'description
': description,
168 'thumbnail
': thumbnail,
169 'duration
': duration,
170 'uploader
': uploader,
171 'upload_date
': upload_date,
172 'view_count
': view_count,
173 'comment_count
': comment_count,
174 'categories
': categories,
176 'age_limit
': age_limit,