]>
Commit | Line | Data |
---|---|---|
219b8130 | 1 | import re |
b27c856f | 2 | import json |
a3c736de | 3 | import itertools |
219b8130 PH |
4 | |
5 | from .common import InfoExtractor | |
d82134c3 | 6 | from .subtitles import SubtitlesInfoExtractor |
953e32b2 | 7 | |
219b8130 PH |
8 | from ..utils import ( |
9 | compat_urllib_request, | |
953e32b2 IM |
10 | compat_str, |
11 | get_element_by_attribute, | |
12 | get_element_by_id, | |
c3fef636 | 13 | orderedSet, |
f53c966a | 14 | str_to_int, |
553f6e46 | 15 | int_or_none, |
219b8130 PH |
16 | |
17 | ExtractorError, | |
219b8130 PH |
18 | ) |
19 | ||
70922df8 JMF |
20 | class DailymotionBaseInfoExtractor(InfoExtractor): |
21 | @staticmethod | |
22 | def _build_request(url): | |
23 | """Build a request with the family filter disabled""" | |
24 | request = compat_urllib_request.Request(url) | |
25 | request.add_header('Cookie', 'family_filter=off') | |
9f1109a5 | 26 | request.add_header('Cookie', 'ff=off') |
70922df8 | 27 | return request |
953e32b2 | 28 | |
70922df8 | 29 | class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): |
219b8130 PH |
30 | """Information Extractor for Dailymotion""" |
31 | ||
9ee859b6 | 32 | _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)' |
219b8130 | 33 | IE_NAME = u'dailymotion' |
cdec0190 JMF |
34 | |
35 | _FORMATS = [ | |
36 | (u'stream_h264_ld_url', u'ld'), | |
37 | (u'stream_h264_url', u'standard'), | |
38 | (u'stream_h264_hq_url', u'hq'), | |
39 | (u'stream_h264_hd_url', u'hd'), | |
40 | (u'stream_h264_hd1080_url', u'hd180'), | |
41 | ] | |
42 | ||
c5428382 JMF |
43 | _TESTS = [ |
44 | { | |
45 | u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', | |
46 | u'file': u'x33vw9.mp4', | |
47 | u'md5': u'392c4b85a60a90dc4792da41ce3144eb', | |
48 | u'info_dict': { | |
49 | u"uploader": u"Amphora Alex and Van .", | |
50 | u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" | |
51 | } | |
52 | }, | |
53 | # Vevo video | |
54 | { | |
55 | u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', | |
56 | u'file': u'USUV71301934.mp4', | |
57 | u'info_dict': { | |
58 | u'title': u'Roar (Official)', | |
59 | u'uploader': u'Katy Perry', | |
60 | u'upload_date': u'20130905', | |
61 | }, | |
62 | u'params': { | |
63 | u'skip_download': True, | |
64 | }, | |
65 | u'skip': u'VEVO is only available in some countries', | |
66 | }, | |
9f1109a5 PH |
67 | # age-restricted video |
68 | { | |
69 | u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', | |
70 | u'file': u'xyh2zz.mp4', | |
71 | u'md5': u'0d667a7b9cebecc3c89ee93099c4159d', | |
72 | u'info_dict': { | |
73 | u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', | |
74 | u'uploader': 'HotWaves1012', | |
75 | u'age_limit': 18, | |
76 | } | |
77 | ||
78 | } | |
c5428382 | 79 | ] |
219b8130 PH |
80 | |
81 | def _real_extract(self, url): | |
82 | # Extract id and simplified title from URL | |
83 | mobj = re.match(self._VALID_URL, url) | |
84 | ||
9ee859b6 | 85 | video_id = mobj.group('id') |
219b8130 | 86 | |
a490fda7 | 87 | url = 'http://www.dailymotion.com/video/%s' % video_id |
219b8130 PH |
88 | |
89 | # Retrieve video webpage to extract further information | |
70922df8 | 90 | request = self._build_request(url) |
219b8130 PH |
91 | webpage = self._download_webpage(request, video_id) |
92 | ||
93 | # Extract URL, uploader and title from webpage | |
94 | self.report_extraction(video_id) | |
219b8130 | 95 | |
c5428382 JMF |
96 | # It may just embed a vevo video: |
97 | m_vevo = re.search( | |
98 | r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)', | |
99 | webpage) | |
100 | if m_vevo is not None: | |
101 | vevo_id = m_vevo.group('id') | |
102 | self.to_screen(u'Vevo video detected: %s' % vevo_id) | |
103 | return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo') | |
104 | ||
9f1109a5 | 105 | age_limit = self._rta_search(webpage) |
219b8130 PH |
106 | |
107 | video_upload_date = None | |
108 | mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) | |
109 | if mobj is not None: | |
110 | video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) | |
111 | ||
b27c856f JMF |
112 | embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id |
113 | embed_page = self._download_webpage(embed_url, video_id, | |
114 | u'Downloading embed page') | |
4ff7a0f1 JMF |
115 | info = self._search_regex(r'var info = ({.*?}),$', embed_page, |
116 | 'video info', flags=re.MULTILINE) | |
b27c856f | 117 | info = json.loads(info) |
3a1d48d6 JMF |
118 | if info.get('error') is not None: |
119 | msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] | |
120 | raise ExtractorError(msg, expected=True) | |
b27c856f | 121 | |
cdec0190 JMF |
122 | formats = [] |
123 | for (key, format_id) in self._FORMATS: | |
124 | video_url = info.get(key) | |
125 | if video_url is not None: | |
126 | m_size = re.search(r'H264-(\d+)x(\d+)', video_url) | |
127 | if m_size is not None: | |
553f6e46 | 128 | width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) |
cdec0190 JMF |
129 | else: |
130 | width, height = None, None | |
131 | formats.append({ | |
132 | 'url': video_url, | |
133 | 'ext': 'mp4', | |
134 | 'format_id': format_id, | |
135 | 'width': width, | |
136 | 'height': height, | |
137 | }) | |
138 | if not formats: | |
b27c856f | 139 | raise ExtractorError(u'Unable to extract video URL') |
b27c856f | 140 | |
953e32b2 | 141 | # subtitles |
1f343eaa | 142 | video_subtitles = self.extract_subtitles(video_id, webpage) |
953e32b2 | 143 | if self._downloader.params.get('listsubtitles', False): |
1f343eaa | 144 | self._list_available_subtitles(video_id, webpage) |
953e32b2 IM |
145 | return |
146 | ||
5458b4ce JMF |
147 | view_count = self._search_regex( |
148 | r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False) | |
149 | if view_count is not None: | |
150 | view_count = str_to_int(view_count) | |
f53c966a | 151 | |
9f1109a5 | 152 | return { |
219b8130 | 153 | 'id': video_id, |
cdec0190 | 154 | 'formats': formats, |
7c86cd5a | 155 | 'uploader': info['owner_screenname'], |
219b8130 | 156 | 'upload_date': video_upload_date, |
46720279 | 157 | 'title': self._og_search_title(webpage), |
953e32b2 | 158 | 'subtitles': video_subtitles, |
9f1109a5 PH |
159 | 'thumbnail': info['thumbnail_url'], |
160 | 'age_limit': age_limit, | |
f53c966a | 161 | 'view_count': view_count, |
9f1109a5 | 162 | } |
a3c736de | 163 | |
1f343eaa | 164 | def _get_available_subtitles(self, video_id, webpage): |
f8e52269 | 165 | try: |
7fad1c63 JMF |
166 | sub_list = self._download_webpage( |
167 | 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | |
168 | video_id, note=False) | |
169 | except ExtractorError as err: | |
f8e52269 IM |
170 | self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) |
171 | return {} | |
172 | info = json.loads(sub_list) | |
173 | if (info['total'] > 0): | |
174 | sub_lang_list = dict((l['language'], l['url']) for l in info['list']) | |
175 | return sub_lang_list | |
176 | self._downloader.report_warning(u'video doesn\'t have subtitles') | |
177 | return {} | |
178 | ||
a3c736de | 179 | |
70922df8 | 180 | class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): |
39baacc4 | 181 | IE_NAME = u'dailymotion:playlist' |
a3c736de | 182 | _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' |
1e0a235f | 183 | _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"' |
39baacc4 | 184 | _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' |
a3c736de | 185 | |
39baacc4 | 186 | def _extract_entries(self, id): |
a3c736de | 187 | video_ids = [] |
a3c736de | 188 | for pagenum in itertools.count(1): |
70922df8 JMF |
189 | request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum)) |
190 | webpage = self._download_webpage(request, | |
39baacc4 | 191 | id, u'Downloading page %s' % pagenum) |
a3c736de | 192 | |
1e0a235f | 193 | video_ids.extend(re.findall(r'data-id="(.+?)"', webpage)) |
a3c736de | 194 | |
1e0a235f | 195 | if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: |
a3c736de | 196 | break |
39baacc4 | 197 | return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') |
c3fef636 | 198 | for video_id in orderedSet(video_ids)] |
39baacc4 JMF |
199 | |
200 | def _real_extract(self, url): | |
201 | mobj = re.match(self._VALID_URL, url) | |
202 | playlist_id = mobj.group('id') | |
203 | webpage = self._download_webpage(url, playlist_id) | |
204 | ||
a3c736de JMF |
205 | return {'_type': 'playlist', |
206 | 'id': playlist_id, | |
207 | 'title': get_element_by_id(u'playlist_name', webpage), | |
39baacc4 | 208 | 'entries': self._extract_entries(playlist_id), |
a3c736de | 209 | } |
39baacc4 JMF |
210 | |
211 | ||
212 | class DailymotionUserIE(DailymotionPlaylistIE): | |
213 | IE_NAME = u'dailymotion:user' | |
1e0a235f | 214 | _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' |
39baacc4 JMF |
215 | _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' |
216 | ||
217 | def _real_extract(self, url): | |
218 | mobj = re.match(self._VALID_URL, url) | |
219 | user = mobj.group('user') | |
220 | webpage = self._download_webpage(url, user) | |
221 | full_user = self._html_search_regex( | |
222 | r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user), | |
223 | webpage, u'user', flags=re.DOTALL) | |
224 | ||
225 | return { | |
226 | '_type': 'playlist', | |
227 | 'id': user, | |
228 | 'title': full_user, | |
229 | 'entries': self._extract_entries(user), | |
230 | } |