3 from .common
import InfoExtractor
14 class YouPornIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
16 _EMBED_REGEX
= [r
'<iframe[^>]+\bsrc=["\'](?P
<url
>(?
:https?
:)?
//(?
:www\
.)?youporn\
.com
/embed
/\d
+)']
18 'url
': 'http
://www
.youporn
.com
/watch
/505835/sex
-ed
-is-it
-safe
-to
-masturbate
-daily
/',
19 'md5
': '3744d24c50438cf5b6f6d59feb5055c2
',
22 'display_id
': 'sex
-ed
-is-it
-safe
-to
-masturbate
-daily
',
24 'title
': 'Sex Ed
: Is It Safe To Masturbate Daily?
',
25 'description
': 'Love
& Sex Answers
: http
://bit
.ly
/DanAndJenn
-- Is It Unhealthy To Masturbate Daily?
',
26 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
28 'uploader
': 'Ask Dan And Jennifer
',
29 'upload_date
': '20101217',
30 'average_rating
': int,
36 'skip
': 'This video has been disabled
',
39 'url
': 'http
://www
.youporn
.com
/watch
/561726/big
-tits
-awesome
-brunette
-on
-amazing
-webcam
-show
/?
from=related3
&al
=2&from_id
=561726&pos
=4',
42 'display_id
': 'big
-tits
-awesome
-brunette
-on
-amazing
-webcam
-show
',
44 'title
': 'Big Tits Awesome Brunette On amazing webcam show
',
45 'description
': 'http
://sweetlivegirls
.com Big Tits Awesome Brunette On amazing webcam show
.mp4
',
46 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
47 'uploader
': 'Unknown
',
48 'upload_date
': '20110418',
49 'average_rating
': int,
56 'skip_download
': True,
60 'url
': 'https
://www
.youporn
.com
/embed
/505835/sex
-ed
-is-it
-safe
-to
-masturbate
-daily
/',
61 'only_matching
': True,
63 'url
': 'http
://www
.youporn
.com
/watch
/505835',
64 'only_matching
': True,
66 'url
': 'https
://www
.youporn
.com
/watch
/13922959/femdom
-principal
/',
67 'only_matching
': True,
69 'url
': 'https
://www
.youporn
.com
/watch
/16290308/tinderspecial
-trailer1
/',
74 'description
': 'md5
:00ea70f642f431c379763c17c2f396bc
',
75 'display_id
': 'tinderspecial
-trailer1
',
78 'upload_date
': '20201123',
79 'uploader
': 'Ersties
',
81 'thumbnail
': 'https
://fi1
.ypncdn
.com
/202011/23/16290308/original
/8/tinderspecial
-trailer1
-8(m
=eaAaaEPbaaaa
).jpg
',
82 'timestamp
': 1606089600,
83 'title
': 'Tinder In Real Life
',
88 def _real_extract(self, url):
89 mobj = self._match_valid_url(url)
90 video_id = mobj.group('id')
91 display_id = mobj.group('display_id
') or video_id
93 definitions = self._download_json(
94 'https
://www
.youporn
.com
/api
/video
/media_definitions
/%s/' % video_id,
98 for definition in definitions:
99 if not isinstance(definition, dict):
101 video_url = url_or_none(definition.get('videoUrl
'))
106 'filesize
': int_or_none(definition.get('videoSize
')),
108 height = int_or_none(definition.get('quality
'))
109 # Video URL's path looks like this
:
110 # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
111 # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
112 # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
113 # We will benefit from it by extracting some metadata
114 mobj
= re
.search(r
'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url
)
117 height
= int(mobj
.group('height'))
118 bitrate
= int(mobj
.group('bitrate'))
120 'format_id': '%dp-%dk' % (height
, bitrate
),
126 webpage
= self
._download
_webpage
(
127 'http://www.youporn.com/watch/%s' % video_id
, display_id
,
128 headers
={'Cookie': 'age_verified=1'}
)
130 title
= self
._html
_search
_regex
(
131 r
'(?s)<div[^>]+class=["\']watchVideoTitle
[^
>]+>(.+?
)</div
>',
132 webpage, 'title
', default=None) or self._og_search_title(
133 webpage, default=None) or self._html_search_meta(
134 'title
', webpage, fatal=True)
136 description = self._html_search_regex(
137 r'(?s
)<div
[^
>]+\bid
=["\']description["\'][^
>]*>(.+?
)</div
>',
138 webpage, 'description
',
139 default=None) or self._og_search_description(
140 webpage, default=None)
141 thumbnail = self._search_regex(
142 r'(?
:imageurl\s
*=|poster\s
*:)\s
*(["\'])(?P<thumbnail>.+?)\1',
143 webpage, 'thumbnail', fatal=False, group='thumbnail')
144 duration = int_or_none(self._html_search_meta(
145 'video:duration', webpage, 'duration', fatal=False))
147 uploader = self._html_search_regex(
148 r'(?s)<div[^>]+class=["\']submitByLink
["\'][^>]*>(.+?)</div>',
149 webpage, 'uploader', fatal=False)
150 upload_date = unified_strdate(self._html_search_regex(
151 (r'UPLOADED:\s*<span>([^<]+)',
152 r'Date\s+[Aa]dded:\s*<span>([^<]+)',
153 r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
154 r'(?s
)<label
\b[^
>]*>Uploaded
[^
<]*</label
>\s
*<span
\b[^
>]*>(.+?
)</span
>'),
155 webpage, 'upload date
', fatal=False))
157 age_limit = self._rta_search(webpage)
160 views = self._search_regex(
161 r'(<div
[^
>]+\bclass
=["\']js_videoInfoViews["\']>)', webpage,
162 'views
', default=None)
164 view_count = str_to_int(extract_attributes(views).get('data
-value
'))
165 comment_count = str_to_int(self._search_regex(
166 r'>All
[Cc
]omments? \
(([\d
,.]+)\
)',
167 webpage, 'comment count
', default=None))
169 def extract_tag_box(regex, title):
170 tag_box = self._search_regex(regex, webpage, title, default=None)
173 return re.findall(r'<a
[^
>]+href
=[^
>]+>([^
<]+)', tag_box)
175 categories = extract_tag_box(
176 r'(?s
)Categories
:.*?
</[^
>]+>(.+?
)</div
>', 'categories
')
177 tags = extract_tag_box(
178 r'(?s
)Tags
:.*?
</div
>\s
*<div
[^
>]+class=["\']tagBoxContent["\'][^
>]*>(.+?
)</div
>',
181 data = self._search_json_ld(webpage, video_id, expected_type='VideoObject
', fatal=False)
182 return merge_dicts(data, {
184 'display_id
': display_id,
186 'description
': description,
187 'thumbnail
': thumbnail,
188 'duration
': duration,
189 'uploader
': uploader,
190 'upload_date
': upload_date,
191 'view_count
': view_count,
192 'comment_count
': comment_count,
193 'categories
': categories,
195 'age_limit
': age_limit,