]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/manyvids.py
3 from .common
import InfoExtractor
14 class ManyVidsIE(InfoExtractor
):
15 _VALID_URL
= r
'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
18 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
19 'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
23 'title': 'everthing about me (Preview)',
24 'uploader': 'ellyxxix',
30 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
31 'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
35 'title': 'MY FACE REVEAL',
36 'description': 'md5:ec5901d41808b3746fed90face161612',
37 'uploader': 'Sarah Calanthe',
43 def _real_extract(self
, url
):
44 video_id
= self
._match
_id
(url
)
46 real_url
= 'https://www.manyvids.com/video/%s/gtm.js' % (video_id
, )
48 webpage
= self
._download
_webpage
(real_url
, video_id
)
50 # probably useless fallback
51 webpage
= self
._download
_webpage
(url
, video_id
)
53 info
= self
._search
_regex
(
54 r
'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
55 webpage
, 'meta details', default
='')
56 info
= extract_attributes(info
)
58 player
= self
._search
_regex
(
59 r
'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
60 webpage
, 'player details', default
='')
61 player
= extract_attributes(player
)
63 video_urls_and_ids
= (
64 (info
.get('data-meta-video'), 'video'),
65 (player
.get('data-video-transcoded'), 'transcoded'),
66 (player
.get('data-video-filepath'), 'filepath'),
67 (self
._og
_search
_video
_url
(webpage
, secure
=False, default
=None), 'og_video'),
70 def txt_or_none(s
, default
=None):
71 return (s
.strip() or default
) if isinstance(s
, str) else default
73 uploader
= txt_or_none(info
.get('data-meta-author'))
77 s
= re
.sub(r
'^\s*%s\s+[|-]' % (re
.escape(uploader
), ), '', s
)
81 mung_title(info
.get('data-meta-title'))
82 or self
._html
_search
_regex
(
83 (r
'<span[^>]+class=["\']item
-title
[^
>]+>([^
<]+)',
84 r'<h2
[^
>]+class=["\']h2 m-0["\'][^
>]*>([^
<]+)'),
85 webpage, 'title
', default=None)
86 or self._html_search_meta(
87 'twitter
:title
', webpage, 'title
', fatal=True))
89 title = re.sub(r'\s
*[|
-]\s
+ManyVids\s
*$
', '', title) or title
91 if any(p in webpage for p in ('preview_videos
', '_preview
.mp4
')):
94 mv_token = self._search_regex(
95 r'data
-mvtoken
=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
96 'mv token', default=None, group='value')
100 self._download_webpage(
101 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
102 video_id, note='Setting format cookies', fatal=False,
103 data=urlencode_postdata({
108 'X-Requested-With': 'XMLHttpRequest'
112 for v_url, fmt in video_urls_and_ids:
113 v_url = url_or_none(v_url)
116 if determine_ext(v_url) == 'm3u8':
117 formats.extend(self._extract_m3u8_formats(
118 v_url, video_id, 'mp4', entry_protocol='m3u8_native',
126 self._remove_duplicate_formats(formats)
129 if f.get('height') is None:
130 f['height'] = int_or_none(
131 self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
132 if '/preview/' in f['url']:
133 f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
134 f['preference'] = -10
135 if 'transcoded' in f['format_id']:
136 f['preference'] = f.get('preference', -1) - 1
139 likes = self._search_regex(
140 r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\
2[^
>]*>)''' % (video_id, ),
141 webpage, 'likes', default='')
142 likes = extract_attributes(likes)
143 return int_or_none(likes.get('data-likes'))
146 return str_to_int(self._html_search_regex(
147 r'''(?s
)<span
\b[^
>]*\bclass\s
*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
148 webpage, 'view count', default=None))
154 'description': txt_or_none(info.get('data-meta-description')),
155 'uploader': txt_or_none(info.get('data-meta-author')),
157 url_or_none(info.get('data-meta-image'))
158 or url_or_none(player.get('data-video-screenshot'))),
159 'view_count': get_views(),
160 'like_count': get_likes(),