]>
Commit | Line | Data |
---|---|---|
219b8130 | 1 | import re |
b27c856f | 2 | import json |
a3c736de | 3 | import itertools |
219b8130 PH |
4 | |
5 | from .common import InfoExtractor | |
d82134c3 | 6 | from .subtitles import SubtitlesInfoExtractor |
953e32b2 | 7 | |
219b8130 PH |
8 | from ..utils import ( |
9 | compat_urllib_request, | |
953e32b2 IM |
10 | compat_str, |
11 | get_element_by_attribute, | |
12 | get_element_by_id, | |
c3fef636 | 13 | orderedSet, |
f53c966a | 14 | str_to_int, |
219b8130 PH |
15 | |
16 | ExtractorError, | |
219b8130 PH |
17 | ) |
18 | ||
70922df8 JMF |
19 | class DailymotionBaseInfoExtractor(InfoExtractor): |
20 | @staticmethod | |
21 | def _build_request(url): | |
22 | """Build a request with the family filter disabled""" | |
23 | request = compat_urllib_request.Request(url) | |
24 | request.add_header('Cookie', 'family_filter=off') | |
9f1109a5 | 25 | request.add_header('Cookie', 'ff=off') |
70922df8 | 26 | return request |
953e32b2 | 27 | |
70922df8 | 28 | class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): |
219b8130 PH |
29 | """Information Extractor for Dailymotion""" |
30 | ||
9ee859b6 | 31 | _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)' |
219b8130 | 32 | IE_NAME = u'dailymotion' |
cdec0190 JMF |
33 | |
34 | _FORMATS = [ | |
35 | (u'stream_h264_ld_url', u'ld'), | |
36 | (u'stream_h264_url', u'standard'), | |
37 | (u'stream_h264_hq_url', u'hq'), | |
38 | (u'stream_h264_hd_url', u'hd'), | |
39 | (u'stream_h264_hd1080_url', u'hd180'), | |
40 | ] | |
41 | ||
c5428382 JMF |
42 | _TESTS = [ |
43 | { | |
44 | u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', | |
45 | u'file': u'x33vw9.mp4', | |
46 | u'md5': u'392c4b85a60a90dc4792da41ce3144eb', | |
47 | u'info_dict': { | |
48 | u"uploader": u"Amphora Alex and Van .", | |
49 | u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" | |
50 | } | |
51 | }, | |
52 | # Vevo video | |
53 | { | |
54 | u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', | |
55 | u'file': u'USUV71301934.mp4', | |
56 | u'info_dict': { | |
57 | u'title': u'Roar (Official)', | |
58 | u'uploader': u'Katy Perry', | |
59 | u'upload_date': u'20130905', | |
60 | }, | |
61 | u'params': { | |
62 | u'skip_download': True, | |
63 | }, | |
64 | u'skip': u'VEVO is only available in some countries', | |
65 | }, | |
9f1109a5 PH |
66 | # age-restricted video |
67 | { | |
68 | u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', | |
69 | u'file': u'xyh2zz.mp4', | |
70 | u'md5': u'0d667a7b9cebecc3c89ee93099c4159d', | |
71 | u'info_dict': { | |
72 | u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', | |
73 | u'uploader': 'HotWaves1012', | |
74 | u'age_limit': 18, | |
75 | } | |
76 | ||
77 | } | |
c5428382 | 78 | ] |
219b8130 PH |
79 | |
80 | def _real_extract(self, url): | |
81 | # Extract id and simplified title from URL | |
82 | mobj = re.match(self._VALID_URL, url) | |
83 | ||
9ee859b6 | 84 | video_id = mobj.group('id') |
219b8130 | 85 | |
a490fda7 | 86 | url = 'http://www.dailymotion.com/video/%s' % video_id |
219b8130 PH |
87 | |
88 | # Retrieve video webpage to extract further information | |
70922df8 | 89 | request = self._build_request(url) |
219b8130 PH |
90 | webpage = self._download_webpage(request, video_id) |
91 | ||
92 | # Extract URL, uploader and title from webpage | |
93 | self.report_extraction(video_id) | |
219b8130 | 94 | |
c5428382 JMF |
95 | # It may just embed a vevo video: |
96 | m_vevo = re.search( | |
97 | r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)', | |
98 | webpage) | |
99 | if m_vevo is not None: | |
100 | vevo_id = m_vevo.group('id') | |
101 | self.to_screen(u'Vevo video detected: %s' % vevo_id) | |
102 | return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo') | |
103 | ||
9f1109a5 | 104 | age_limit = self._rta_search(webpage) |
219b8130 PH |
105 | |
106 | video_upload_date = None | |
107 | mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) | |
108 | if mobj is not None: | |
109 | video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) | |
110 | ||
b27c856f JMF |
111 | embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id |
112 | embed_page = self._download_webpage(embed_url, video_id, | |
113 | u'Downloading embed page') | |
4ff7a0f1 JMF |
114 | info = self._search_regex(r'var info = ({.*?}),$', embed_page, |
115 | 'video info', flags=re.MULTILINE) | |
b27c856f | 116 | info = json.loads(info) |
3a1d48d6 JMF |
117 | if info.get('error') is not None: |
118 | msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] | |
119 | raise ExtractorError(msg, expected=True) | |
b27c856f | 120 | |
cdec0190 JMF |
121 | formats = [] |
122 | for (key, format_id) in self._FORMATS: | |
123 | video_url = info.get(key) | |
124 | if video_url is not None: | |
125 | m_size = re.search(r'H264-(\d+)x(\d+)', video_url) | |
126 | if m_size is not None: | |
127 | width, height = m_size.group(1), m_size.group(2) | |
128 | else: | |
129 | width, height = None, None | |
130 | formats.append({ | |
131 | 'url': video_url, | |
132 | 'ext': 'mp4', | |
133 | 'format_id': format_id, | |
134 | 'width': width, | |
135 | 'height': height, | |
136 | }) | |
137 | if not formats: | |
b27c856f | 138 | raise ExtractorError(u'Unable to extract video URL') |
b27c856f | 139 | |
953e32b2 | 140 | # subtitles |
1f343eaa | 141 | video_subtitles = self.extract_subtitles(video_id, webpage) |
953e32b2 | 142 | if self._downloader.params.get('listsubtitles', False): |
1f343eaa | 143 | self._list_available_subtitles(video_id, webpage) |
953e32b2 IM |
144 | return |
145 | ||
5458b4ce JMF |
146 | view_count = self._search_regex( |
147 | r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False) | |
148 | if view_count is not None: | |
149 | view_count = str_to_int(view_count) | |
f53c966a | 150 | |
9f1109a5 | 151 | return { |
219b8130 | 152 | 'id': video_id, |
cdec0190 | 153 | 'formats': formats, |
7c86cd5a | 154 | 'uploader': info['owner_screenname'], |
219b8130 | 155 | 'upload_date': video_upload_date, |
46720279 | 156 | 'title': self._og_search_title(webpage), |
953e32b2 | 157 | 'subtitles': video_subtitles, |
9f1109a5 PH |
158 | 'thumbnail': info['thumbnail_url'], |
159 | 'age_limit': age_limit, | |
f53c966a | 160 | 'view_count': view_count, |
9f1109a5 | 161 | } |
a3c736de | 162 | |
1f343eaa | 163 | def _get_available_subtitles(self, video_id, webpage): |
f8e52269 | 164 | try: |
7fad1c63 JMF |
165 | sub_list = self._download_webpage( |
166 | 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | |
167 | video_id, note=False) | |
168 | except ExtractorError as err: | |
f8e52269 IM |
169 | self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) |
170 | return {} | |
171 | info = json.loads(sub_list) | |
172 | if (info['total'] > 0): | |
173 | sub_lang_list = dict((l['language'], l['url']) for l in info['list']) | |
174 | return sub_lang_list | |
175 | self._downloader.report_warning(u'video doesn\'t have subtitles') | |
176 | return {} | |
177 | ||
a3c736de | 178 | |
70922df8 | 179 | class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): |
39baacc4 | 180 | IE_NAME = u'dailymotion:playlist' |
a3c736de JMF |
181 | _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' |
182 | _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' | |
39baacc4 | 183 | _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' |
a3c736de | 184 | |
39baacc4 | 185 | def _extract_entries(self, id): |
a3c736de | 186 | video_ids = [] |
a3c736de | 187 | for pagenum in itertools.count(1): |
70922df8 JMF |
188 | request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum)) |
189 | webpage = self._download_webpage(request, | |
39baacc4 | 190 | id, u'Downloading page %s' % pagenum) |
a3c736de | 191 | |
f058e340 | 192 | playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage) |
c3fef636 | 193 | video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el)) |
a3c736de JMF |
194 | |
195 | if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | |
196 | break | |
39baacc4 | 197 | return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') |
c3fef636 | 198 | for video_id in orderedSet(video_ids)] |
39baacc4 JMF |
199 | |
200 | def _real_extract(self, url): | |
201 | mobj = re.match(self._VALID_URL, url) | |
202 | playlist_id = mobj.group('id') | |
203 | webpage = self._download_webpage(url, playlist_id) | |
204 | ||
a3c736de JMF |
205 | return {'_type': 'playlist', |
206 | 'id': playlist_id, | |
207 | 'title': get_element_by_id(u'playlist_name', webpage), | |
39baacc4 | 208 | 'entries': self._extract_entries(playlist_id), |
a3c736de | 209 | } |
39baacc4 JMF |
210 | |
211 | ||
212 | class DailymotionUserIE(DailymotionPlaylistIE): | |
213 | IE_NAME = u'dailymotion:user' | |
214 | _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' | |
215 | _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>' | |
216 | _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' | |
217 | ||
218 | def _real_extract(self, url): | |
219 | mobj = re.match(self._VALID_URL, url) | |
220 | user = mobj.group('user') | |
221 | webpage = self._download_webpage(url, user) | |
222 | full_user = self._html_search_regex( | |
223 | r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user), | |
224 | webpage, u'user', flags=re.DOTALL) | |
225 | ||
226 | return { | |
227 | '_type': 'playlist', | |
228 | 'id': user, | |
229 | 'title': full_user, | |
230 | 'entries': self._extract_entries(user), | |
231 | } |