]>
Commit | Line | Data |
---|---|---|
219b8130 | 1 | import re |
b27c856f | 2 | import json |
a3c736de | 3 | import itertools |
219b8130 PH |
4 | |
5 | from .common import InfoExtractor | |
d82134c3 | 6 | from .subtitles import SubtitlesInfoExtractor |
953e32b2 | 7 | |
219b8130 PH |
8 | from ..utils import ( |
9 | compat_urllib_request, | |
953e32b2 IM |
10 | compat_str, |
11 | get_element_by_attribute, | |
12 | get_element_by_id, | |
219b8130 PH |
13 | |
14 | ExtractorError, | |
219b8130 PH |
15 | ) |
16 | ||
953e32b2 | 17 | |
d82134c3 | 18 | class DailymotionIE(SubtitlesInfoExtractor): |
219b8130 PH |
19 | """Information Extractor for Dailymotion""" |
20 | ||
a490fda7 | 21 | _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' |
219b8130 | 22 | IE_NAME = u'dailymotion' |
6f5ac90c PH |
23 | _TEST = { |
24 | u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', | |
25 | u'file': u'x33vw9.mp4', | |
26 | u'md5': u'392c4b85a60a90dc4792da41ce3144eb', | |
27 | u'info_dict': { | |
976fc7d1 | 28 | u"uploader": u"Amphora Alex and Van .", |
6f5ac90c PH |
29 | u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" |
30 | } | |
31 | } | |
219b8130 PH |
32 | |
33 | def _real_extract(self, url): | |
34 | # Extract id and simplified title from URL | |
35 | mobj = re.match(self._VALID_URL, url) | |
36 | ||
37 | video_id = mobj.group(1).split('_')[0].split('?')[0] | |
38 | ||
39 | video_extension = 'mp4' | |
a490fda7 | 40 | url = 'http://www.dailymotion.com/video/%s' % video_id |
219b8130 PH |
41 | |
42 | # Retrieve video webpage to extract further information | |
43 | request = compat_urllib_request.Request(url) | |
44 | request.add_header('Cookie', 'family_filter=off') | |
45 | webpage = self._download_webpage(request, video_id) | |
46 | ||
47 | # Extract URL, uploader and title from webpage | |
48 | self.report_extraction(video_id) | |
219b8130 | 49 | |
219b8130 PH |
50 | video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', |
51 | # Looking for official user | |
52 | r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], | |
53 | webpage, 'video uploader') | |
54 | ||
55 | video_upload_date = None | |
56 | mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) | |
57 | if mobj is not None: | |
58 | video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) | |
59 | ||
b27c856f JMF |
60 | embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id |
61 | embed_page = self._download_webpage(embed_url, video_id, | |
62 | u'Downloading embed page') | |
4ff7a0f1 JMF |
63 | info = self._search_regex(r'var info = ({.*?}),$', embed_page, |
64 | 'video info', flags=re.MULTILINE) | |
b27c856f JMF |
65 | info = json.loads(info) |
66 | ||
67 | # TODO: support choosing qualities | |
68 | ||
69 | for key in ['stream_h264_hd1080_url','stream_h264_hd_url', | |
70 | 'stream_h264_hq_url','stream_h264_url', | |
71 | 'stream_h264_ld_url']: | |
72 | if info.get(key):#key in info and info[key]: | |
73 | max_quality = key | |
74 | self.to_screen(u'Using %s' % key) | |
75 | break | |
76 | else: | |
77 | raise ExtractorError(u'Unable to extract video URL') | |
78 | video_url = info[max_quality] | |
79 | ||
953e32b2 | 80 | # subtitles |
d82134c3 | 81 | video_subtitles = self.extract_subtitles(video_id) |
953e32b2 IM |
82 | if self._downloader.params.get('listsubtitles', False): |
83 | self._list_available_subtitles(video_id) | |
84 | return | |
85 | ||
219b8130 PH |
86 | return [{ |
87 | 'id': video_id, | |
88 | 'url': video_url, | |
89 | 'uploader': video_uploader, | |
90 | 'upload_date': video_upload_date, | |
46720279 | 91 | 'title': self._og_search_title(webpage), |
219b8130 | 92 | 'ext': video_extension, |
953e32b2 | 93 | 'subtitles': video_subtitles, |
b29f3b25 | 94 | 'thumbnail': info['thumbnail_url'] |
219b8130 | 95 | }] |
a3c736de | 96 | |
f8e52269 | 97 | def _get_available_subtitles(self, video_id): |
f8e52269 | 98 | try: |
7fad1c63 JMF |
99 | sub_list = self._download_webpage( |
100 | 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | |
101 | video_id, note=False) | |
102 | except ExtractorError as err: | |
f8e52269 IM |
103 | self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) |
104 | return {} | |
105 | info = json.loads(sub_list) | |
106 | if (info['total'] > 0): | |
107 | sub_lang_list = dict((l['language'], l['url']) for l in info['list']) | |
108 | return sub_lang_list | |
109 | self._downloader.report_warning(u'video doesn\'t have subtitles') | |
110 | return {} | |
111 | ||
a3c736de JMF |
112 | |
113 | class DailymotionPlaylistIE(InfoExtractor): | |
114 | _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' | |
115 | _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' | |
116 | ||
117 | def _real_extract(self, url): | |
118 | mobj = re.match(self._VALID_URL, url) | |
119 | playlist_id = mobj.group('id') | |
120 | video_ids = [] | |
121 | ||
122 | for pagenum in itertools.count(1): | |
123 | webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum), | |
124 | playlist_id, u'Downloading page %s' % pagenum) | |
125 | ||
126 | playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) | |
127 | video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) | |
128 | ||
129 | if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | |
130 | break | |
131 | ||
132 | entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') | |
133 | for video_id in video_ids] | |
134 | return {'_type': 'playlist', | |
135 | 'id': playlist_id, | |
136 | 'title': get_element_by_id(u'playlist_name', webpage), | |
137 | 'entries': entries, | |
138 | } |