]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/fivemin.py
[youtube] Relax URL expansion in description
[yt-dlp.git] / youtube_dl / extractor / fivemin.py
CommitLineData
933a5b37
JMF
1from __future__ import unicode_literals
2
6d6536ac 3import re
4
933a5b37 5from .common import InfoExtractor
5e1912cf 6from ..compat import (
26669ea3 7 compat_parse_qs,
15707c7e 8 compat_urllib_parse_urlencode,
26669ea3 9 compat_urllib_parse_urlparse,
10 compat_urlparse,
5e1912cf
PH
11)
12from ..utils import (
57b8d84c 13 ExtractorError,
26669ea3 14 parse_duration,
15 replace_extension,
933a5b37
JMF
16)
17
18
19class FiveMinIE(InfoExtractor):
20 IE_NAME = '5min'
6d6536ac 21 _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
933a5b37 22
8f5c0218
JMF
23 _TESTS = [
24 {
25 # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
26 'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
27 'md5': '4f7b0b79bf1a470e5004f7112385941d',
28 'info_dict': {
29 'id': '518013791',
30 'ext': 'mp4',
31 'title': 'iPad Mini with Retina Display Review',
26669ea3 32 'duration': 177,
8f5c0218 33 },
933a5b37 34 },
8f5c0218
JMF
35 {
36 # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
37 'url': '5min:518086247',
38 'md5': 'e539a9dd682c288ef5a498898009f69e',
39 'info_dict': {
40 'id': '518086247',
41 'ext': 'mp4',
42 'title': 'How to Make a Next-Level Fruit Salad',
26669ea3 43 'duration': 184,
8f5c0218 44 },
6d6536ac 45 'skip': 'no longer available',
8f5c0218
JMF
46 },
47 ]
26669ea3 48 _ERRORS = {
49 'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.',
50 'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.',
51 'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.',
52 'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.',
53 'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
54 'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
55 }
56 _QUALITIES = {
57 1: {
58 'width': 640,
59 'height': 360,
60 },
61 2: {
62 'width': 854,
63 'height': 480,
64 },
65 4: {
66 'width': 1280,
67 'height': 720,
68 },
69 8: {
70 'width': 1920,
71 'height': 1080,
72 },
73 16: {
74 'width': 640,
75 'height': 360,
76 },
77 32: {
78 'width': 854,
79 'height': 480,
80 },
81 64: {
82 'width': 1280,
83 'height': 720,
84 },
85 128: {
86 'width': 640,
87 'height': 360,
88 },
89 }
933a5b37 90
933a5b37 91 def _real_extract(self, url):
6d6536ac 92 mobj = re.match(self._VALID_URL, url)
93 video_id = mobj.group('id')
94 sid = mobj.group('sid')
95
96 if mobj.group('query'):
97 qs = compat_parse_qs(mobj.group('query'))
98 if not qs.get('playList'):
99 raise ExtractorError('Invalid URL', expected=True)
100 video_id = qs['playList'][0]
101 if qs.get('sid'):
102 sid = qs['sid'][0]
103
8f5c0218 104 embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
6d6536ac 105 if not sid:
106 embed_page = self._download_webpage(embed_url, video_id,
107 'Downloading embed page')
108 sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
109
57b8d84c 110 response = self._download_json(
6d6536ac 111 'https://syn.5min.com/handlers/SenseHandler.ashx?' +
15707c7e 112 compat_urllib_parse_urlencode({
6d6536ac 113 'func': 'GetResults',
114 'playlist': video_id,
115 'sid': sid,
116 'isPlayerSeed': 'true',
117 'url': embed_url,
118 }),
57b8d84c
JMF
119 video_id)
120 if not response['success']:
26669ea3 121 raise ExtractorError(
122 '%s said: %s' % (
123 self.IE_NAME,
124 self._ERRORS.get(response['errorMessage'], response['errorMessage'])),
125 expected=True)
57b8d84c 126 info = response['binding'][0]
933a5b37 127
933a5b37 128 formats = []
26669ea3 129 parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
130 compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
131 for rendition in info['Renditions']:
6d6536ac 132 if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
26669ea3 133 continue
134 else:
135 rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
136 quality = self._QUALITIES.get(rendition['ID'], {})
933a5b37 137 formats.append({
26669ea3 138 'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']),
139 'url': rendition_url,
140 'width': quality.get('width'),
141 'height': quality.get('height'),
933a5b37 142 })
26669ea3 143 self._sort_formats(formats)
933a5b37
JMF
144
145 return {
146 'id': video_id,
147 'title': info['Title'],
26669ea3 148 'thumbnail': info.get('ThumbURL'),
149 'duration': parse_duration(info.get('Duration')),
933a5b37
JMF
150 'formats': formats,
151 }