]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/playvid.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / playvid.py
CommitLineData
4ea3137e 1import re
ac668111 2import urllib.parse
4ea3137e
M
3
4from .common import InfoExtractor
ac668111 5from ..compat import compat_urllib_parse_unquote
6from ..utils import ExtractorError, clean_html
4ea3137e 7
4ea3137e 8
db95dc13 9class PlayvidIE(InfoExtractor):
92519402 10 _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
2a49d016 11 _TESTS = [{
1cc79574
PH
12 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
13 'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
4ea3137e 14 'info_dict': {
1cc79574 15 'id': 'RnmBNgtrrJu',
db95dc13 16 'ext': 'mp4',
1cc79574
PH
17 'title': 'md5:9256d01c6317e3f703848b5906880dc8',
18 'duration': 82,
4ea3137e 19 'age_limit': 18,
2a49d016
YCH
20 },
21 'skip': 'Video removed due to ToS',
22 }, {
23 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH',
24 'md5': '39d49df503ad7b8f23a4432cbf046477',
25 'info_dict': {
26 'id': 'hwb0GpNkzgH',
27 'ext': 'mp4',
28 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park',
29 'age_limit': 18,
ec85ded8 30 'thumbnail': r're:^https?://.*\.jpg$',
2a49d016
YCH
31 },
32 }]
4ea3137e
M
33
34 def _real_extract(self, url):
1cc79574 35 video_id = self._match_id(url)
4ea3137e
M
36 webpage = self._download_webpage(url, video_id)
37
e987e91f
S
38 m_error = re.search(
39 r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
40 if m_error:
41 raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
42
4ea3137e
M
43 video_title = None
44 duration = None
45 video_thumbnail = None
46 formats = []
47
48 # most of the information is stored in the flashvars
db95dc13
PH
49 flashvars = self._html_search_regex(
50 r'flashvars="(.+?)"', webpage, 'flashvars')
4ea3137e 51
388ad0c0 52 infos = compat_urllib_parse_unquote(flashvars).split(r'&')
db95dc13
PH
53 for info in infos:
54 videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
55 if videovars_match:
56 key = videovars_match.group(1)
57 val = videovars_match.group(2)
4ea3137e 58
db95dc13 59 if key == 'title':
ac668111 60 video_title = urllib.parse.unquote_plus(val)
db95dc13
PH
61 if key == 'duration':
62 try:
63 duration = int(val)
64 except ValueError:
65 pass
66 if key == 'big_thumb':
67 video_thumbnail = val
4ea3137e 68
db95dc13
PH
69 videourl_match = re.match(
70 r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
71 if videourl_match:
72 height = int(videourl_match.group('resolution'))
73 formats.append({
74 'height': height,
75 'url': val,
76 })
4ea3137e
M
77
78 # Extract title - should be in the flashvars; if not, look elsewhere
79 if video_title is None:
04f3fd2c 80 video_title = self._html_extract_title(webpage)
4ea3137e
M
81
82 return {
83 'id': video_id,
84 'formats': formats,
85 'title': video_title,
86 'thumbnail': video_thumbnail,
87 'duration': duration,
88 'description': None,
89 'age_limit': 18
90 }