]>
Commit | Line | Data |
---|---|---|
5f6a1245 | 1 | # coding: utf-8 |
22a6f150 PH |
2 | from __future__ import unicode_literals |
3 | ||
219b8130 | 4 | import re |
b27c856f | 5 | import json |
a3c736de | 6 | import itertools |
219b8130 PH |
7 | |
8 | from .common import InfoExtractor | |
953e32b2 | 9 | |
5c2266df | 10 | from ..compat import compat_str |
1cc79574 PH |
11 | from ..utils import ( |
12 | ExtractorError, | |
d3f007af | 13 | determine_ext, |
1cc79574 | 14 | int_or_none, |
d3f007af | 15 | parse_iso8601, |
5c2266df | 16 | sanitized_Request, |
f53c966a | 17 | str_to_int, |
4b10aadf | 18 | unescapeHTML, |
219b8130 PH |
19 | ) |
20 | ||
5f6a1245 | 21 | |
70922df8 JMF |
22 | class DailymotionBaseInfoExtractor(InfoExtractor): |
23 | @staticmethod | |
24 | def _build_request(url): | |
25 | """Build a request with the family filter disabled""" | |
5c2266df | 26 | request = sanitized_Request(url) |
2a0c2ca2 | 27 | request.add_header('Cookie', 'family_filter=off; ff=off') |
70922df8 | 28 | return request |
953e32b2 | 29 | |
12434026 YCH |
30 | def _download_webpage_handle_no_ff(self, url, *args, **kwargs): |
31 | request = self._build_request(url) | |
32 | return self._download_webpage_handle(request, *args, **kwargs) | |
33 | ||
d3f007af S |
34 | def _download_webpage_no_ff(self, url, *args, **kwargs): |
35 | request = self._build_request(url) | |
36 | return self._download_webpage(request, *args, **kwargs) | |
5f6a1245 | 37 | |
219b8130 | 38 | |
d3f007af | 39 | class DailymotionIE(DailymotionBaseInfoExtractor): |
9ee859b6 | 40 | _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)' |
ce6815aa | 41 | IE_NAME = 'dailymotion' |
cdec0190 JMF |
42 | |
43 | _FORMATS = [ | |
ce6815aa PH |
44 | ('stream_h264_ld_url', 'ld'), |
45 | ('stream_h264_url', 'standard'), | |
46 | ('stream_h264_hq_url', 'hq'), | |
47 | ('stream_h264_hd_url', 'hd'), | |
48 | ('stream_h264_hd1080_url', 'hd180'), | |
cdec0190 JMF |
49 | ] |
50 | ||
c5428382 JMF |
51 | _TESTS = [ |
52 | { | |
23ba76bc JMF |
53 | 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', |
54 | 'md5': '2137c41a8e78554bb09225b8eb322406', | |
ce6815aa | 55 | 'info_dict': { |
23ba76bc | 56 | 'id': 'x2iuewm', |
ce6815aa | 57 | 'ext': 'mp4', |
23ba76bc | 58 | 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', |
d3f007af S |
59 | 'description': 'Several come bundled with the Steam Controller.', |
60 | 'thumbnail': 're:^https?:.*\.(?:jpg|png)$', | |
50aa2bb6 | 61 | 'duration': 74, |
d3f007af S |
62 | 'timestamp': 1425657362, |
63 | 'upload_date': '20150306', | |
64 | 'uploader': 'IGN', | |
65 | 'uploader_id': 'xijv66', | |
66 | 'age_limit': 0, | |
67 | 'view_count': int, | |
68 | 'comment_count': int, | |
c5428382 JMF |
69 | } |
70 | }, | |
71 | # Vevo video | |
72 | { | |
ce6815aa PH |
73 | 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', |
74 | 'info_dict': { | |
75 | 'title': 'Roar (Official)', | |
76 | 'id': 'USUV71301934', | |
77 | 'ext': 'mp4', | |
78 | 'uploader': 'Katy Perry', | |
79 | 'upload_date': '20130905', | |
c5428382 | 80 | }, |
ce6815aa PH |
81 | 'params': { |
82 | 'skip_download': True, | |
c5428382 | 83 | }, |
ce6815aa | 84 | 'skip': 'VEVO is only available in some countries', |
c5428382 | 85 | }, |
9f1109a5 PH |
86 | # age-restricted video |
87 | { | |
ce6815aa PH |
88 | 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', |
89 | 'md5': '0d667a7b9cebecc3c89ee93099c4159d', | |
90 | 'info_dict': { | |
91 | 'id': 'xyh2zz', | |
92 | 'ext': 'mp4', | |
93 | 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', | |
94 | 'uploader': 'HotWaves1012', | |
95 | 'age_limit': 18, | |
9f1109a5 | 96 | } |
583882fd LL |
97 | }, |
98 | # geo-restricted, player v5 | |
99 | { | |
100 | 'url': 'http://www.dailymotion.com/video/xhza0o', | |
101 | 'only_matching': True, | |
9f1109a5 | 102 | } |
c5428382 | 103 | ] |
219b8130 PH |
104 | |
105 | def _real_extract(self, url): | |
b10609d9 | 106 | video_id = self._match_id(url) |
219b8130 | 107 | |
d3f007af S |
108 | webpage = self._download_webpage_no_ff( |
109 | 'https://www.dailymotion.com/video/%s' % video_id, video_id) | |
110 | ||
111 | age_limit = self._rta_search(webpage) | |
112 | ||
113 | description = self._og_search_description(webpage) or self._html_search_meta( | |
114 | 'description', webpage, 'description') | |
219b8130 | 115 | |
d3f007af S |
116 | view_count = str_to_int(self._search_regex( |
117 | [r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:(\d+)"', | |
118 | r'video_views_count[^>]+>\s+([\d\.,]+)'], | |
119 | webpage, 'view count', fatal=False)) | |
120 | comment_count = int_or_none(self._search_regex( | |
121 | r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"', | |
122 | webpage, 'comment count', fatal=False)) | |
123 | ||
124 | player_v5 = self._search_regex( | |
0b534d2a S |
125 | [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 |
126 | r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', | |
127 | r'buildPlayer\(({.+?})\);'], | |
d3f007af S |
128 | webpage, 'player v5', default=None) |
129 | if player_v5: | |
130 | player = self._parse_json(player_v5, video_id) | |
131 | metadata = player['metadata'] | |
583882fd LL |
132 | |
133 | self._check_error(metadata) | |
134 | ||
d3f007af S |
135 | formats = [] |
136 | for quality, media_list in metadata['qualities'].items(): | |
137 | for media in media_list: | |
138 | media_url = media.get('url') | |
139 | if not media_url: | |
140 | continue | |
141 | type_ = media.get('type') | |
142 | if type_ == 'application/vnd.lumberjack.manifest': | |
143 | continue | |
8e82ecfe S |
144 | ext = determine_ext(media_url) |
145 | if type_ == 'application/x-mpegURL' or ext == 'm3u8': | |
146 | m3u8_formats = self._extract_m3u8_formats( | |
147 | media_url, video_id, 'mp4', m3u8_id='hls', fatal=False) | |
148 | if m3u8_formats: | |
149 | formats.extend(m3u8_formats) | |
150 | elif type_ == 'application/f4m' or ext == 'f4m': | |
151 | f4m_formats = self._extract_f4m_formats( | |
152 | media_url, video_id, preference=-1, f4m_id='hds', fatal=False) | |
153 | if f4m_formats: | |
154 | formats.extend(f4m_formats) | |
d3f007af S |
155 | else: |
156 | f = { | |
157 | 'url': media_url, | |
158 | 'format_id': quality, | |
159 | } | |
160 | m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url) | |
161 | if m: | |
162 | f.update({ | |
163 | 'width': int(m.group('width')), | |
164 | 'height': int(m.group('height')), | |
165 | }) | |
166 | formats.append(f) | |
167 | self._sort_formats(formats) | |
168 | ||
169 | title = metadata['title'] | |
170 | duration = int_or_none(metadata.get('duration')) | |
171 | timestamp = int_or_none(metadata.get('created_time')) | |
172 | thumbnail = metadata.get('poster_url') | |
173 | uploader = metadata.get('owner', {}).get('screenname') | |
174 | uploader_id = metadata.get('owner', {}).get('id') | |
175 | ||
176 | subtitles = {} | |
cbd2ffd0 S |
177 | subtitles_data = metadata.get('subtitles', {}).get('data', {}) |
178 | if subtitles_data and isinstance(subtitles_data, dict): | |
179 | for subtitle_lang, subtitle in subtitles_data.items(): | |
180 | subtitles[subtitle_lang] = [{ | |
181 | 'ext': determine_ext(subtitle_url), | |
182 | 'url': subtitle_url, | |
183 | } for subtitle_url in subtitle.get('urls', [])] | |
d3f007af S |
184 | |
185 | return { | |
186 | 'id': video_id, | |
187 | 'title': title, | |
188 | 'description': description, | |
189 | 'thumbnail': thumbnail, | |
190 | 'duration': duration, | |
191 | 'timestamp': timestamp, | |
192 | 'uploader': uploader, | |
193 | 'uploader_id': uploader_id, | |
194 | 'age_limit': age_limit, | |
195 | 'view_count': view_count, | |
196 | 'comment_count': comment_count, | |
197 | 'formats': formats, | |
198 | 'subtitles': subtitles, | |
199 | } | |
219b8130 | 200 | |
d3f007af S |
201 | # vevo embed |
202 | vevo_id = self._search_regex( | |
9e05d039 | 203 | r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)', |
d3f007af S |
204 | webpage, 'vevo embed', default=None) |
205 | if vevo_id: | |
206 | return self.url_result('vevo:%s' % vevo_id, 'Vevo') | |
c5428382 | 207 | |
d3f007af S |
208 | # fallback old player |
209 | embed_page = self._download_webpage_no_ff( | |
210 | 'https://www.dailymotion.com/embed/video/%s' % video_id, | |
211 | video_id, 'Downloading embed page') | |
212 | ||
213 | timestamp = parse_iso8601(self._html_search_meta( | |
214 | 'video:release_date', webpage, 'upload date')) | |
215 | ||
216 | info = self._parse_json( | |
217 | self._search_regex( | |
218 | r'var info = ({.*?}),$', embed_page, | |
219 | 'video info', flags=re.MULTILINE), | |
220 | video_id) | |
219b8130 | 221 | |
583882fd | 222 | self._check_error(info) |
b27c856f | 223 | |
cdec0190 JMF |
224 | formats = [] |
225 | for (key, format_id) in self._FORMATS: | |
226 | video_url = info.get(key) | |
227 | if video_url is not None: | |
228 | m_size = re.search(r'H264-(\d+)x(\d+)', video_url) | |
229 | if m_size is not None: | |
553f6e46 | 230 | width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) |
cdec0190 JMF |
231 | else: |
232 | width, height = None, None | |
233 | formats.append({ | |
234 | 'url': video_url, | |
235 | 'ext': 'mp4', | |
236 | 'format_id': format_id, | |
237 | 'width': width, | |
238 | 'height': height, | |
239 | }) | |
d3f007af | 240 | self._sort_formats(formats) |
b27c856f | 241 | |
953e32b2 | 242 | # subtitles |
1f343eaa | 243 | video_subtitles = self.extract_subtitles(video_id, webpage) |
953e32b2 | 244 | |
b10609d9 PH |
245 | title = self._og_search_title(webpage, default=None) |
246 | if title is None: | |
247 | title = self._html_search_regex( | |
248 | r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage, | |
249 | 'title') | |
f53c966a | 250 | |
9f1109a5 | 251 | return { |
b10609d9 | 252 | 'id': video_id, |
cdec0190 | 253 | 'formats': formats, |
85342674 | 254 | 'uploader': info['owner.screenname'], |
d3f007af | 255 | 'timestamp': timestamp, |
b10609d9 | 256 | 'title': title, |
d3f007af | 257 | 'description': description, |
b10609d9 | 258 | 'subtitles': video_subtitles, |
9f1109a5 PH |
259 | 'thumbnail': info['thumbnail_url'], |
260 | 'age_limit': age_limit, | |
f53c966a | 261 | 'view_count': view_count, |
50aa2bb6 | 262 | 'duration': info['duration'] |
9f1109a5 | 263 | } |
a3c736de | 264 | |
583882fd LL |
265 | def _check_error(self, info): |
266 | if info.get('error') is not None: | |
7593fbaa S |
267 | raise ExtractorError( |
268 | '%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True) | |
583882fd | 269 | |
a1f2a06b | 270 | def _get_subtitles(self, video_id, webpage): |
f8e52269 | 271 | try: |
7fad1c63 JMF |
272 | sub_list = self._download_webpage( |
273 | 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | |
274 | video_id, note=False) | |
275 | except ExtractorError as err: | |
22a6f150 | 276 | self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) |
f8e52269 IM |
277 | return {} |
278 | info = json.loads(sub_list) | |
279 | if (info['total'] > 0): | |
a1f2a06b | 280 | sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list']) |
f8e52269 | 281 | return sub_lang_list |
22a6f150 | 282 | self._downloader.report_warning('video doesn\'t have subtitles') |
f8e52269 IM |
283 | return {} |
284 | ||
a3c736de | 285 | |
70922df8 | 286 | class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): |
22a6f150 | 287 | IE_NAME = 'dailymotion:playlist' |
a3c736de | 288 | _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' |
1e0a235f | 289 | _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"' |
39baacc4 | 290 | _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' |
22a6f150 PH |
291 | _TESTS = [{ |
292 | 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', | |
293 | 'info_dict': { | |
294 | 'title': 'SPORT', | |
11e611a7 | 295 | 'id': 'xv4bw_nqtv_sport', |
22a6f150 PH |
296 | }, |
297 | 'playlist_mincount': 20, | |
298 | }] | |
a3c736de | 299 | |
39baacc4 | 300 | def _extract_entries(self, id): |
25a4c5a9 | 301 | video_ids = set() |
12434026 | 302 | processed_urls = set() |
a3c736de | 303 | for pagenum in itertools.count(1): |
12434026 YCH |
304 | page_url = self._PAGE_TEMPLATE % (id, pagenum) |
305 | webpage, urlh = self._download_webpage_handle_no_ff( | |
306 | page_url, id, 'Downloading page %s' % pagenum) | |
307 | if urlh.geturl() in processed_urls: | |
308 | self.report_warning('Stopped at duplicated page %s, which is the same as %s' % ( | |
309 | page_url, urlh.geturl()), id) | |
310 | break | |
311 | ||
312 | processed_urls.add(urlh.geturl()) | |
a3c736de | 313 | |
25a4c5a9 JMF |
314 | for video_id in re.findall(r'data-xid="(.+?)"', webpage): |
315 | if video_id not in video_ids: | |
316 | yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') | |
317 | video_ids.add(video_id) | |
a3c736de | 318 | |
1e0a235f | 319 | if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: |
a3c736de | 320 | break |
39baacc4 JMF |
321 | |
322 | def _real_extract(self, url): | |
323 | mobj = re.match(self._VALID_URL, url) | |
324 | playlist_id = mobj.group('id') | |
325 | webpage = self._download_webpage(url, playlist_id) | |
326 | ||
b0fb63ab PH |
327 | return { |
328 | '_type': 'playlist', | |
329 | 'id': playlist_id, | |
330 | 'title': self._og_search_title(webpage), | |
331 | 'entries': self._extract_entries(playlist_id), | |
332 | } | |
39baacc4 JMF |
333 | |
334 | ||
335 | class DailymotionUserIE(DailymotionPlaylistIE): | |
22a6f150 | 336 | IE_NAME = 'dailymotion:user' |
2b2ee140 | 337 | _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)' |
39baacc4 | 338 | _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' |
22a6f150 PH |
339 | _TESTS = [{ |
340 | 'url': 'https://www.dailymotion.com/user/nqtv', | |
341 | 'info_dict': { | |
342 | 'id': 'nqtv', | |
343 | 'title': 'Rémi Gaillard', | |
344 | }, | |
345 | 'playlist_mincount': 100, | |
12434026 YCH |
346 | }, { |
347 | 'url': 'http://www.dailymotion.com/user/UnderProject', | |
348 | 'info_dict': { | |
349 | 'id': 'UnderProject', | |
350 | 'title': 'UnderProject', | |
351 | }, | |
352 | 'playlist_mincount': 1800, | |
353 | 'expected_warnings': [ | |
354 | 'Stopped at duplicated page', | |
355 | ], | |
356 | 'skip': 'Takes too long time', | |
22a6f150 | 357 | }] |
39baacc4 JMF |
358 | |
359 | def _real_extract(self, url): | |
360 | mobj = re.match(self._VALID_URL, url) | |
361 | user = mobj.group('user') | |
7d65242d S |
362 | webpage = self._download_webpage( |
363 | 'https://www.dailymotion.com/user/%s' % user, user) | |
4b10aadf S |
364 | full_user = unescapeHTML(self._html_search_regex( |
365 | r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user), | |
22a6f150 | 366 | webpage, 'user')) |
39baacc4 JMF |
367 | |
368 | return { | |
369 | '_type': 'playlist', | |
370 | 'id': user, | |
371 | 'title': full_user, | |
372 | 'entries': self._extract_entries(user), | |
373 | } | |
756f574e YCH |
374 | |
375 | ||
376 | class DailymotionCloudIE(DailymotionBaseInfoExtractor): | |
0bcdc276 S |
377 | _VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/' |
378 | _VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX | |
379 | _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX | |
756f574e | 380 | |
0bcdc276 | 381 | _TESTS = [{ |
756f574e YCH |
382 | # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html |
383 | # Tested at FranceTvInfo_2 | |
384 | 'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1', | |
385 | 'only_matching': True, | |
0bcdc276 S |
386 | }, { |
387 | # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html | |
388 | 'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1', | |
389 | 'only_matching': True, | |
390 | }] | |
756f574e YCH |
391 | |
392 | @classmethod | |
393 | def _extract_dmcloud_url(self, webpage): | |
0bcdc276 | 394 | mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, webpage) |
756f574e YCH |
395 | if mobj: |
396 | return mobj.group(1) | |
397 | ||
0bcdc276 S |
398 | mobj = re.search( |
399 | r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, | |
400 | webpage) | |
756f574e YCH |
401 | if mobj: |
402 | return mobj.group(1) | |
403 | ||
404 | def _real_extract(self, url): | |
405 | video_id = self._match_id(url) | |
406 | ||
d3f007af | 407 | webpage = self._download_webpage_no_ff(url, video_id) |
756f574e YCH |
408 | |
409 | title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title') | |
410 | ||
411 | video_info = self._parse_json(self._search_regex( | |
412 | r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id) | |
413 | ||
414 | # TODO: parse ios_url, which is in fact a manifest | |
415 | video_url = video_info['mp4_url'] | |
416 | ||
417 | return { | |
418 | 'id': video_id, | |
419 | 'url': video_url, | |
420 | 'title': title, | |
421 | 'thumbnail': video_info.get('thumbnail_url'), | |
422 | } |