]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/extremetube.py
[vimeo:watchlater] Fix extraction (Closes #3886)
[yt-dlp.git] / youtube_dl / extractor / extremetube.py
CommitLineData
3c50b99a
PH
1from __future__ import unicode_literals
2
32a35e44 3import re
4
5from .common import InfoExtractor
1cc79574 6from ..compat import (
c2ebea65 7 compat_parse_qs,
32a35e44 8 compat_urllib_request,
1cc79574
PH
9)
10from ..utils import (
c2ebea65 11 qualities,
2f9e8776 12 str_to_int,
32a35e44 13)
14
3c50b99a 15
32a35e44 16class ExtremeTubeIE(InfoExtractor):
1cc79574 17 _VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
52fadd5f 18 _TESTS = [{
3c50b99a 19 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
c2ebea65 20 'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
3c50b99a
PH
21 'info_dict': {
22 'id': '652431',
23 'ext': 'mp4',
24 'title': 'Music Video 14 british euro brit european cumshots swallow',
25 'uploader': 'unknown',
2f9e8776 26 'view_count': int,
3c50b99a 27 'age_limit': 18,
32a35e44 28 }
52fadd5f
PH
29 }, {
30 'url': 'http://www.extremetube.com/gay/video/abcde-1234',
31 'only_matching': True,
32 }]
32a35e44 33
34 def _real_extract(self, url):
35 mobj = re.match(self._VALID_URL, url)
1cc79574 36 video_id = mobj.group('id')
32a35e44 37 url = 'http://www.' + mobj.group('url')
38
39 req = compat_urllib_request.Request(url)
40 req.add_header('Cookie', 'age_verified=1')
41 webpage = self._download_webpage(req, video_id)
42
3c50b99a 43 video_title = self._html_search_regex(
9c7b79ac 44 r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
3c50b99a 45 uploader = self._html_search_regex(
2f9e8776
S
46 r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
47 webpage, 'uploader', fatal=False)
48 view_count = str_to_int(self._html_search_regex(
49 r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
50 webpage, 'view count', fatal=False))
51
c2ebea65
NJ
52 flash_vars = compat_parse_qs(self._search_regex(
53 r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
54
55 formats = []
56 quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
57 for k, vals in flash_vars.items():
58 m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
59 if m is not None:
60 formats.append({
61 'format_id': m.group('quality'),
62 'quality': quality(m.group('quality')),
63 'url': vals[0],
64 })
65
66 self._sort_formats(formats)
32a35e44 67
32a35e44 68 return {
69 'id': video_id,
70 'title': video_title,
c2ebea65 71 'formats': formats,
32a35e44 72 'uploader': uploader,
2f9e8776 73 'view_count': view_count,
86ad94bb 74 'age_limit': 18,
32a35e44 75 }