]>
Commit | Line | Data |
---|---|---|
219b8130 | 1 | import re |
b27c856f | 2 | import json |
a3c736de | 3 | import itertools |
219b8130 PH |
4 | |
5 | from .common import InfoExtractor | |
d82134c3 | 6 | from .subtitles import SubtitlesInfoExtractor |
953e32b2 | 7 | |
219b8130 PH |
8 | from ..utils import ( |
9 | compat_urllib_request, | |
953e32b2 | 10 | compat_str, |
c3fef636 | 11 | orderedSet, |
f53c966a | 12 | str_to_int, |
553f6e46 | 13 | int_or_none, |
219b8130 | 14 | ExtractorError, |
4b10aadf | 15 | unescapeHTML, |
219b8130 PH |
16 | ) |
17 | ||
70922df8 JMF |
18 | class DailymotionBaseInfoExtractor(InfoExtractor): |
19 | @staticmethod | |
20 | def _build_request(url): | |
21 | """Build a request with the family filter disabled""" | |
22 | request = compat_urllib_request.Request(url) | |
23 | request.add_header('Cookie', 'family_filter=off') | |
9f1109a5 | 24 | request.add_header('Cookie', 'ff=off') |
70922df8 | 25 | return request |
953e32b2 | 26 | |
70922df8 | 27 | class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): |
219b8130 PH |
28 | """Information Extractor for Dailymotion""" |
29 | ||
9ee859b6 | 30 | _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)' |
219b8130 | 31 | IE_NAME = u'dailymotion' |
cdec0190 JMF |
32 | |
33 | _FORMATS = [ | |
34 | (u'stream_h264_ld_url', u'ld'), | |
35 | (u'stream_h264_url', u'standard'), | |
36 | (u'stream_h264_hq_url', u'hq'), | |
37 | (u'stream_h264_hd_url', u'hd'), | |
38 | (u'stream_h264_hd1080_url', u'hd180'), | |
39 | ] | |
40 | ||
c5428382 JMF |
41 | _TESTS = [ |
42 | { | |
43 | u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', | |
44 | u'file': u'x33vw9.mp4', | |
45 | u'md5': u'392c4b85a60a90dc4792da41ce3144eb', | |
46 | u'info_dict': { | |
47 | u"uploader": u"Amphora Alex and Van .", | |
48 | u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" | |
49 | } | |
50 | }, | |
51 | # Vevo video | |
52 | { | |
53 | u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', | |
54 | u'file': u'USUV71301934.mp4', | |
55 | u'info_dict': { | |
56 | u'title': u'Roar (Official)', | |
57 | u'uploader': u'Katy Perry', | |
58 | u'upload_date': u'20130905', | |
59 | }, | |
60 | u'params': { | |
61 | u'skip_download': True, | |
62 | }, | |
63 | u'skip': u'VEVO is only available in some countries', | |
64 | }, | |
9f1109a5 PH |
65 | # age-restricted video |
66 | { | |
67 | u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', | |
68 | u'file': u'xyh2zz.mp4', | |
69 | u'md5': u'0d667a7b9cebecc3c89ee93099c4159d', | |
70 | u'info_dict': { | |
71 | u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', | |
72 | u'uploader': 'HotWaves1012', | |
73 | u'age_limit': 18, | |
74 | } | |
75 | ||
76 | } | |
c5428382 | 77 | ] |
219b8130 PH |
78 | |
79 | def _real_extract(self, url): | |
80 | # Extract id and simplified title from URL | |
81 | mobj = re.match(self._VALID_URL, url) | |
82 | ||
9ee859b6 | 83 | video_id = mobj.group('id') |
219b8130 | 84 | |
a490fda7 | 85 | url = 'http://www.dailymotion.com/video/%s' % video_id |
219b8130 PH |
86 | |
87 | # Retrieve video webpage to extract further information | |
70922df8 | 88 | request = self._build_request(url) |
219b8130 PH |
89 | webpage = self._download_webpage(request, video_id) |
90 | ||
91 | # Extract URL, uploader and title from webpage | |
92 | self.report_extraction(video_id) | |
219b8130 | 93 | |
c5428382 JMF |
94 | # It may just embed a vevo video: |
95 | m_vevo = re.search( | |
96 | r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)', | |
97 | webpage) | |
98 | if m_vevo is not None: | |
99 | vevo_id = m_vevo.group('id') | |
100 | self.to_screen(u'Vevo video detected: %s' % vevo_id) | |
101 | return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo') | |
102 | ||
9f1109a5 | 103 | age_limit = self._rta_search(webpage) |
219b8130 PH |
104 | |
105 | video_upload_date = None | |
106 | mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) | |
107 | if mobj is not None: | |
108 | video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) | |
109 | ||
b27c856f JMF |
110 | embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id |
111 | embed_page = self._download_webpage(embed_url, video_id, | |
112 | u'Downloading embed page') | |
4ff7a0f1 JMF |
113 | info = self._search_regex(r'var info = ({.*?}),$', embed_page, |
114 | 'video info', flags=re.MULTILINE) | |
b27c856f | 115 | info = json.loads(info) |
3a1d48d6 JMF |
116 | if info.get('error') is not None: |
117 | msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] | |
118 | raise ExtractorError(msg, expected=True) | |
b27c856f | 119 | |
cdec0190 JMF |
120 | formats = [] |
121 | for (key, format_id) in self._FORMATS: | |
122 | video_url = info.get(key) | |
123 | if video_url is not None: | |
124 | m_size = re.search(r'H264-(\d+)x(\d+)', video_url) | |
125 | if m_size is not None: | |
553f6e46 | 126 | width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) |
cdec0190 JMF |
127 | else: |
128 | width, height = None, None | |
129 | formats.append({ | |
130 | 'url': video_url, | |
131 | 'ext': 'mp4', | |
132 | 'format_id': format_id, | |
133 | 'width': width, | |
134 | 'height': height, | |
135 | }) | |
136 | if not formats: | |
b27c856f | 137 | raise ExtractorError(u'Unable to extract video URL') |
b27c856f | 138 | |
953e32b2 | 139 | # subtitles |
1f343eaa | 140 | video_subtitles = self.extract_subtitles(video_id, webpage) |
953e32b2 | 141 | if self._downloader.params.get('listsubtitles', False): |
1f343eaa | 142 | self._list_available_subtitles(video_id, webpage) |
953e32b2 IM |
143 | return |
144 | ||
5458b4ce JMF |
145 | view_count = self._search_regex( |
146 | r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False) | |
147 | if view_count is not None: | |
148 | view_count = str_to_int(view_count) | |
f53c966a | 149 | |
9f1109a5 | 150 | return { |
219b8130 | 151 | 'id': video_id, |
cdec0190 | 152 | 'formats': formats, |
85342674 | 153 | 'uploader': info['owner.screenname'], |
219b8130 | 154 | 'upload_date': video_upload_date, |
46720279 | 155 | 'title': self._og_search_title(webpage), |
953e32b2 | 156 | 'subtitles': video_subtitles, |
9f1109a5 PH |
157 | 'thumbnail': info['thumbnail_url'], |
158 | 'age_limit': age_limit, | |
f53c966a | 159 | 'view_count': view_count, |
9f1109a5 | 160 | } |
a3c736de | 161 | |
1f343eaa | 162 | def _get_available_subtitles(self, video_id, webpage): |
f8e52269 | 163 | try: |
7fad1c63 JMF |
164 | sub_list = self._download_webpage( |
165 | 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | |
166 | video_id, note=False) | |
167 | except ExtractorError as err: | |
f8e52269 IM |
168 | self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) |
169 | return {} | |
170 | info = json.loads(sub_list) | |
171 | if (info['total'] > 0): | |
172 | sub_lang_list = dict((l['language'], l['url']) for l in info['list']) | |
173 | return sub_lang_list | |
174 | self._downloader.report_warning(u'video doesn\'t have subtitles') | |
175 | return {} | |
176 | ||
a3c736de | 177 | |
70922df8 | 178 | class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): |
39baacc4 | 179 | IE_NAME = u'dailymotion:playlist' |
a3c736de | 180 | _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' |
1e0a235f | 181 | _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"' |
39baacc4 | 182 | _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' |
a3c736de | 183 | |
39baacc4 | 184 | def _extract_entries(self, id): |
a3c736de | 185 | video_ids = [] |
a3c736de | 186 | for pagenum in itertools.count(1): |
70922df8 JMF |
187 | request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum)) |
188 | webpage = self._download_webpage(request, | |
39baacc4 | 189 | id, u'Downloading page %s' % pagenum) |
a3c736de | 190 | |
4b10aadf | 191 | video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage)) |
a3c736de | 192 | |
1e0a235f | 193 | if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: |
a3c736de | 194 | break |
39baacc4 | 195 | return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') |
c3fef636 | 196 | for video_id in orderedSet(video_ids)] |
39baacc4 JMF |
197 | |
198 | def _real_extract(self, url): | |
199 | mobj = re.match(self._VALID_URL, url) | |
200 | playlist_id = mobj.group('id') | |
201 | webpage = self._download_webpage(url, playlist_id) | |
202 | ||
b0fb63ab PH |
203 | return { |
204 | '_type': 'playlist', | |
205 | 'id': playlist_id, | |
206 | 'title': self._og_search_title(webpage), | |
207 | 'entries': self._extract_entries(playlist_id), | |
208 | } | |
39baacc4 JMF |
209 | |
210 | ||
211 | class DailymotionUserIE(DailymotionPlaylistIE): | |
212 | IE_NAME = u'dailymotion:user' | |
1e0a235f | 213 | _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' |
39baacc4 JMF |
214 | _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' |
215 | ||
216 | def _real_extract(self, url): | |
217 | mobj = re.match(self._VALID_URL, url) | |
218 | user = mobj.group('user') | |
219 | webpage = self._download_webpage(url, user) | |
4b10aadf S |
220 | full_user = unescapeHTML(self._html_search_regex( |
221 | r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user), | |
222 | webpage, u'user', flags=re.DOTALL)) | |
39baacc4 JMF |
223 | |
224 | return { | |
225 | '_type': 'playlist', | |
226 | 'id': user, | |
227 | 'title': full_user, | |
228 | 'entries': self._extract_entries(user), | |
229 | } |