]>
Commit | Line | Data |
---|---|---|
d77c3dfd FV |
1 | #!/usr/bin/env python |
2 | # -*- coding: utf-8 -*- | |
3 | ||
9e8056d5 PH |
4 | from __future__ import absolute_import |
5 | ||
4fcca4bb | 6 | import base64 |
d77c3dfd | 7 | import datetime |
ccf65f9d | 8 | import itertools |
d77c3dfd FV |
9 | import netrc |
10 | import os | |
11 | import re | |
12 | import socket | |
13 | import time | |
d77c3dfd | 14 | import email.utils |
921a1455 | 15 | import xml.etree.ElementTree |
302efc19 | 16 | import random |
17 | import math | |
6324fd1d | 18 | import operator |
de5d66d4 | 19 | import hashlib |
20 | import binascii | |
21 | import urllib | |
d77c3dfd | 22 | |
9e8056d5 | 23 | from .utils import * |
d6983cb4 | 24 | from .extractor.common import InfoExtractor, SearchInfoExtractor |
d5822b96 PH |
25 | |
26 | from .extractor.ard import ARDIE | |
27 | from .extractor.arte import ArteTvIE | |
219b8130 | 28 | from .extractor.dailymotion import DailymotionIE |
38cbc40a PH |
29 | from .extractor.metacafe import MetacafeIE |
30 | from .extractor.statigram import StatigramIE | |
97d6faac | 31 | from .extractor.photobucket import PhotobucketIE |
b3d14cbf | 32 | from .extractor.vimeo import VimeoIE |
d6039175 | 33 | from .extractor.yahoo import YahooIE |
b05654f0 | 34 | from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE |
d5822b96 | 35 | from .extractor.zdf import ZDFIE |
e30e9318 | 36 | |
d830b7c2 | 37 | |
d77c3dfd | 38 | |
d77c3dfd FV |
39 | |
40 | ||
d77c3dfd FV |
41 | |
42 | ||
d77c3dfd FV |
43 | |
44 | ||
f2ad10a9 CA |
45 | |
46 | ||
d77c3dfd | 47 | class GenericIE(InfoExtractor): |
59ae15a5 PH |
48 | """Generic last-resort information extractor.""" |
49 | ||
50 | _VALID_URL = r'.*' | |
51 | IE_NAME = u'generic' | |
52 | ||
59ae15a5 PH |
53 | def report_download_webpage(self, video_id): |
54 | """Report webpage download.""" | |
3d342357 | 55 | if not self._downloader.params.get('test', False): |
f17ce13a | 56 | self._downloader.report_warning(u'Falling back on generic information extractor.') |
0d173446 | 57 | super(GenericIE, self).report_download_webpage(video_id) |
59ae15a5 | 58 | |
59ae15a5 PH |
59 | def report_following_redirect(self, new_url): |
60 | """Report information extraction.""" | |
61 | self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url) | |
cdb30764 | 62 | |
59ae15a5 | 63 | def _test_redirect(self, url): |
a0d6fe7b | 64 | """Check if it is a redirect, like url shorteners, in case return the new url.""" |
59ae15a5 PH |
65 | class HeadRequest(compat_urllib_request.Request): |
66 | def get_method(self): | |
67 | return "HEAD" | |
68 | ||
69 | class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler): | |
70 | """ | |
cdb30764 | 71 | Subclass the HTTPRedirectHandler to make it use our |
59ae15a5 PH |
72 | HeadRequest also on the redirected URL |
73 | """ | |
cdb30764 | 74 | def redirect_request(self, req, fp, code, msg, headers, newurl): |
59ae15a5 | 75 | if code in (301, 302, 303, 307): |
cdb30764 | 76 | newurl = newurl.replace(' ', '%20') |
59ae15a5 PH |
77 | newheaders = dict((k,v) for k,v in req.headers.items() |
78 | if k.lower() not in ("content-length", "content-type")) | |
cdb30764 | 79 | return HeadRequest(newurl, |
59ae15a5 | 80 | headers=newheaders, |
cdb30764 ND |
81 | origin_req_host=req.get_origin_req_host(), |
82 | unverifiable=True) | |
83 | else: | |
84 | raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp) | |
59ae15a5 PH |
85 | |
86 | class HTTPMethodFallback(compat_urllib_request.BaseHandler): | |
87 | """ | |
88 | Fallback to GET if HEAD is not allowed (405 HTTP error) | |
89 | """ | |
cdb30764 | 90 | def http_error_405(self, req, fp, code, msg, headers): |
59ae15a5 PH |
91 | fp.read() |
92 | fp.close() | |
93 | ||
94 | newheaders = dict((k,v) for k,v in req.headers.items() | |
95 | if k.lower() not in ("content-length", "content-type")) | |
cdb30764 ND |
96 | return self.parent.open(compat_urllib_request.Request(req.get_full_url(), |
97 | headers=newheaders, | |
98 | origin_req_host=req.get_origin_req_host(), | |
59ae15a5 PH |
99 | unverifiable=True)) |
100 | ||
101 | # Build our opener | |
cdb30764 | 102 | opener = compat_urllib_request.OpenerDirector() |
59ae15a5 PH |
103 | for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler, |
104 | HTTPMethodFallback, HEADRedirectHandler, | |
7c038b3c | 105 | compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]: |
59ae15a5 PH |
106 | opener.add_handler(handler()) |
107 | ||
108 | response = opener.open(HeadRequest(url)) | |
419c64b1 PH |
109 | if response is None: |
110 | raise ExtractorError(u'Invalid URL protocol') | |
59ae15a5 PH |
111 | new_url = response.geturl() |
112 | ||
113 | if url == new_url: | |
114 | return False | |
115 | ||
116 | self.report_following_redirect(new_url) | |
a0d6fe7b | 117 | return new_url |
59ae15a5 PH |
118 | |
119 | def _real_extract(self, url): | |
a0d6fe7b JMF |
120 | new_url = self._test_redirect(url) |
121 | if new_url: return [self.url_result(new_url)] | |
59ae15a5 PH |
122 | |
123 | video_id = url.split('/')[-1] | |
59ae15a5 | 124 | try: |
3d342357 | 125 | webpage = self._download_webpage(url, video_id) |
59ae15a5 PH |
126 | except ValueError as err: |
127 | # since this is the last-resort InfoExtractor, if | |
128 | # this error is thrown, it'll be thrown here | |
0c021ad1 | 129 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
130 | |
131 | self.report_extraction(video_id) | |
132 | # Start with something easy: JW Player in SWFObject | |
133 | mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | |
134 | if mobj is None: | |
135 | # Broaden the search a little bit | |
136 | mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) | |
1013186a PH |
137 | if mobj is None: |
138 | # Broaden the search a little bit: JWPlayer JS loader | |
139 | mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage) | |
fb8f7280 JMF |
140 | if mobj is None: |
141 | # Try to find twitter cards info | |
142 | mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | |
1037d539 JMF |
143 | if mobj is None: |
144 | # We look for Open Graph info: | |
145 | # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) | |
146 | m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) | |
147 | # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: | |
148 | if m_video_type is not None: | |
149 | mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) | |
59ae15a5 | 150 | if mobj is None: |
0c021ad1 | 151 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
152 | |
153 | # It's possible that one of the regexes | |
154 | # matched, but returned an empty group: | |
155 | if mobj.group(1) is None: | |
0c021ad1 | 156 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
157 | |
158 | video_url = compat_urllib_parse.unquote(mobj.group(1)) | |
159 | video_id = os.path.basename(video_url) | |
160 | ||
161 | # here's a fun little line of code for you: | |
162 | video_extension = os.path.splitext(video_id)[1][1:] | |
163 | video_id = os.path.splitext(video_id)[0] | |
164 | ||
165 | # it's tempting to parse this further, but you would | |
166 | # have to take into account all the variations like | |
167 | # Video Title - Site Name | |
168 | # Site Name | Video Title | |
169 | # Video Title - Tagline | Site Name | |
170 | # and so on and so forth; it's just not practical | |
af44c948 FV |
171 | video_title = self._html_search_regex(r'<title>(.*)</title>', |
172 | webpage, u'video title') | |
59ae15a5 PH |
173 | |
174 | # video uploader is domain name | |
af44c948 FV |
175 | video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', |
176 | url, u'video uploader') | |
59ae15a5 PH |
177 | |
178 | return [{ | |
f1171f7c PH |
179 | 'id': video_id, |
180 | 'url': video_url, | |
59ae15a5 PH |
181 | 'uploader': video_uploader, |
182 | 'upload_date': None, | |
183 | 'title': video_title, | |
f1171f7c | 184 | 'ext': video_extension, |
59ae15a5 | 185 | }] |
d77c3dfd FV |
186 | |
187 | ||
d77c3dfd | 188 | |
e30e9318 | 189 | class GoogleSearchIE(SearchInfoExtractor): |
59ae15a5 | 190 | """Information Extractor for Google Video search queries.""" |
3c5e7729 | 191 | _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"' |
99a5ae3f | 192 | _MAX_RESULTS = 1000 |
59ae15a5 | 193 | IE_NAME = u'video.google:search' |
e30e9318 | 194 | _SEARCH_KEY = 'gvsearch' |
59ae15a5 | 195 | |
43b62acc JMF |
196 | def _get_n_results(self, query, n): |
197 | """Get a specified number of results for a query""" | |
59ae15a5 | 198 | |
94ca71b7 PH |
199 | res = { |
200 | '_type': 'playlist', | |
201 | 'id': query, | |
202 | 'entries': [] | |
203 | } | |
59ae15a5 | 204 | |
94ca71b7 | 205 | for pagenum in itertools.count(1): |
3c5e7729 | 206 | result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10) |
94ca71b7 PH |
207 | webpage = self._download_webpage(result_url, u'gvsearch:' + query, |
208 | note='Downloading result page ' + str(pagenum)) | |
59ae15a5 | 209 | |
94ca71b7 PH |
210 | for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage): |
211 | e = { | |
212 | '_type': 'url', | |
213 | 'url': mobj.group(1) | |
214 | } | |
215 | res['entries'].append(e) | |
d77c3dfd | 216 | |
94ca71b7 PH |
217 | if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage): |
218 | return res | |
d77c3dfd | 219 | |
e30e9318 | 220 | class YahooSearchIE(SearchInfoExtractor): |
59ae15a5 | 221 | """Information Extractor for Yahoo! Video search queries.""" |
93702113 | 222 | |
99a5ae3f | 223 | _MAX_RESULTS = 1000 |
5a853e14 | 224 | IE_NAME = u'screen.yahoo:search' |
e30e9318 | 225 | _SEARCH_KEY = 'yvsearch' |
59ae15a5 | 226 | |
5a853e14 JMF |
227 | def _get_n_results(self, query, n): |
228 | """Get a specified number of results for a query""" | |
59ae15a5 | 229 | |
5a853e14 JMF |
230 | res = { |
231 | '_type': 'playlist', | |
232 | 'id': query, | |
233 | 'entries': [] | |
234 | } | |
235 | for pagenum in itertools.count(0): | |
236 | result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) | |
237 | webpage = self._download_webpage(result_url, query, | |
238 | note='Downloading results page '+str(pagenum+1)) | |
239 | info = json.loads(webpage) | |
240 | m = info[u'm'] | |
241 | results = info[u'results'] | |
242 | ||
243 | for (i, r) in enumerate(results): | |
244 | if (pagenum * 30) +i >= n: | |
245 | break | |
246 | mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r) | |
247 | e = self.url_result('http://' + mobj.group('url'), 'Yahoo') | |
248 | res['entries'].append(e) | |
249 | if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )): | |
250 | break | |
59ae15a5 | 251 | |
5a853e14 | 252 | return res |
d77c3dfd FV |
253 | |
254 | ||
eeeb4daa | 255 | class BlipTVUserIE(InfoExtractor): |
59ae15a5 | 256 | """Information Extractor for blip.tv users.""" |
eeeb4daa | 257 | |
59ae15a5 PH |
258 | _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' |
259 | _PAGE_SIZE = 12 | |
260 | IE_NAME = u'blip.tv:user' | |
eeeb4daa | 261 | |
59ae15a5 PH |
262 | def _real_extract(self, url): |
263 | # Extract username | |
264 | mobj = re.match(self._VALID_URL, url) | |
265 | if mobj is None: | |
0c021ad1 | 266 | raise ExtractorError(u'Invalid URL: %s' % url) |
eeeb4daa | 267 | |
59ae15a5 | 268 | username = mobj.group(1) |
eeeb4daa | 269 | |
59ae15a5 | 270 | page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' |
eeeb4daa | 271 | |
46bfb422 JMF |
272 | page = self._download_webpage(url, username, u'Downloading user page') |
273 | mobj = re.search(r'data-users-id="([^"]+)"', page) | |
274 | page_base = page_base % mobj.group(1) | |
eeeb4daa JCGS |
275 | |
276 | ||
59ae15a5 PH |
277 | # Download video ids using BlipTV Ajax calls. Result size per |
278 | # query is limited (currently to 12 videos) so we need to query | |
279 | # page by page until there are no video ids - it means we got | |
280 | # all of them. | |
eeeb4daa | 281 | |
59ae15a5 PH |
282 | video_ids = [] |
283 | pagenum = 1 | |
eeeb4daa | 284 | |
59ae15a5 | 285 | while True: |
450e7099 | 286 | url = page_base + "&page=" + str(pagenum) |
46bfb422 JMF |
287 | page = self._download_webpage(url, username, |
288 | u'Downloading video ids from page %d' % pagenum) | |
eeeb4daa | 289 | |
59ae15a5 PH |
290 | # Extract video identifiers |
291 | ids_in_page = [] | |
eeeb4daa | 292 | |
59ae15a5 PH |
293 | for mobj in re.finditer(r'href="/([^"]+)"', page): |
294 | if mobj.group(1) not in ids_in_page: | |
295 | ids_in_page.append(unescapeHTML(mobj.group(1))) | |
eeeb4daa | 296 | |
59ae15a5 | 297 | video_ids.extend(ids_in_page) |
eeeb4daa | 298 | |
59ae15a5 PH |
299 | # A little optimization - if current page is not |
300 | # "full", ie. does not contain PAGE_SIZE video ids then | |
301 | # we can assume that this page is the last one - there | |
302 | # are no more ids on further pages - no need to query | |
303 | # again. | |
eeeb4daa | 304 | |
59ae15a5 PH |
305 | if len(ids_in_page) < self._PAGE_SIZE: |
306 | break | |
eeeb4daa | 307 | |
59ae15a5 | 308 | pagenum += 1 |
eeeb4daa | 309 | |
f6e6da95 | 310 | urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] |
6de8f1af | 311 | url_entries = [self.url_result(url, 'BlipTV') for url in urls] |
d2c69082 | 312 | return [self.playlist_result(url_entries, playlist_title = username)] |
eeeb4daa JCGS |
313 | |
314 | ||
d77c3dfd | 315 | class DepositFilesIE(InfoExtractor): |
59ae15a5 PH |
316 | """Information extractor for depositfiles.com""" |
317 | ||
318 | _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' | |
59ae15a5 | 319 | |
59ae15a5 PH |
320 | def _real_extract(self, url): |
321 | file_id = url.split('/')[-1] | |
322 | # Rebuild url in english locale | |
323 | url = 'http://depositfiles.com/en/files/' + file_id | |
324 | ||
325 | # Retrieve file webpage with 'Free download' button pressed | |
326 | free_download_indication = { 'gateway_result' : '1' } | |
327 | request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) | |
328 | try: | |
329 | self.report_download_webpage(file_id) | |
330 | webpage = compat_urllib_request.urlopen(request).read() | |
331 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
0c021ad1 | 332 | raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) |
59ae15a5 PH |
333 | |
334 | # Search for the real file URL | |
335 | mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage) | |
336 | if (mobj is None) or (mobj.group(1) is None): | |
337 | # Try to figure out reason of the error. | |
338 | mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) | |
339 | if (mobj is not None) and (mobj.group(1) is not None): | |
340 | restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() | |
0c021ad1 | 341 | raise ExtractorError(u'%s' % restriction_message) |
59ae15a5 | 342 | else: |
0c021ad1 | 343 | raise ExtractorError(u'Unable to extract download URL from: %s' % url) |
59ae15a5 PH |
344 | |
345 | file_url = mobj.group(1) | |
346 | file_extension = os.path.splitext(file_url)[1][1:] | |
347 | ||
348 | # Search for file title | |
ac3e9394 | 349 | file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title') |
59ae15a5 PH |
350 | |
351 | return [{ | |
352 | 'id': file_id.decode('utf-8'), | |
353 | 'url': file_url.decode('utf-8'), | |
354 | 'uploader': None, | |
355 | 'upload_date': None, | |
356 | 'title': file_title, | |
357 | 'ext': file_extension.decode('utf-8'), | |
358 | }] | |
d77c3dfd FV |
359 | |
360 | ||
361 | class FacebookIE(InfoExtractor): | |
59ae15a5 PH |
362 | """Information Extractor for Facebook""" |
363 | ||
59ae15a5 PH |
364 | _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' |
365 | _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' | |
366 | _NETRC_MACHINE = 'facebook' | |
59ae15a5 PH |
367 | IE_NAME = u'facebook' |
368 | ||
59ae15a5 PH |
369 | def report_login(self): |
370 | """Report attempt to log in.""" | |
f17ce13a | 371 | self.to_screen(u'Logging in') |
59ae15a5 PH |
372 | |
373 | def _real_initialize(self): | |
374 | if self._downloader is None: | |
375 | return | |
376 | ||
377 | useremail = None | |
378 | password = None | |
379 | downloader_params = self._downloader.params | |
380 | ||
381 | # Attempt to use provided username and password or .netrc data | |
382 | if downloader_params.get('username', None) is not None: | |
383 | useremail = downloader_params['username'] | |
384 | password = downloader_params['password'] | |
385 | elif downloader_params.get('usenetrc', False): | |
386 | try: | |
387 | info = netrc.netrc().authenticators(self._NETRC_MACHINE) | |
388 | if info is not None: | |
389 | useremail = info[0] | |
390 | password = info[2] | |
391 | else: | |
392 | raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) | |
393 | except (IOError, netrc.NetrcParseError) as err: | |
2e5457be | 394 | self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) |
59ae15a5 PH |
395 | return |
396 | ||
397 | if useremail is None: | |
398 | return | |
399 | ||
400 | # Log in | |
401 | login_form = { | |
402 | 'email': useremail, | |
403 | 'pass': password, | |
404 | 'login': 'Log+In' | |
405 | } | |
406 | request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) | |
407 | try: | |
408 | self.report_login() | |
409 | login_results = compat_urllib_request.urlopen(request).read() | |
410 | if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: | |
2e5457be | 411 | self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') |
59ae15a5 PH |
412 | return |
413 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
2e5457be | 414 | self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) |
59ae15a5 PH |
415 | return |
416 | ||
417 | def _real_extract(self, url): | |
418 | mobj = re.match(self._VALID_URL, url) | |
419 | if mobj is None: | |
0c021ad1 | 420 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
421 | video_id = mobj.group('ID') |
422 | ||
b954070d PH |
423 | url = 'https://www.facebook.com/video/video.php?v=%s' % video_id |
424 | webpage = self._download_webpage(url, video_id) | |
425 | ||
32c96387 | 426 | BEFORE = '{swf.addParam(param[0], param[1]);});\n' |
b954070d PH |
427 | AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' |
428 | m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) | |
429 | if not m: | |
430 | raise ExtractorError(u'Cannot parse data') | |
431 | data = dict(json.loads(m.group(1))) | |
edba5137 PH |
432 | params_raw = compat_urllib_parse.unquote(data['params']) |
433 | params = json.loads(params_raw) | |
32c96387 PH |
434 | video_data = params['video_data'][0] |
435 | video_url = video_data.get('hd_src') | |
7796e8c2 | 436 | if not video_url: |
32c96387 | 437 | video_url = video_data['sd_src'] |
7796e8c2 PH |
438 | if not video_url: |
439 | raise ExtractorError(u'Cannot find video URL') | |
32c96387 PH |
440 | video_duration = int(video_data['video_duration']) |
441 | thumbnail = video_data['thumbnail_src'] | |
b954070d | 442 | |
979a9dd4 | 443 | video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>', |
ac3e9394 | 444 | webpage, u'title') |
b954070d PH |
445 | |
446 | info = { | |
447 | 'id': video_id, | |
448 | 'title': video_title, | |
449 | 'url': video_url, | |
450 | 'ext': 'mp4', | |
451 | 'duration': video_duration, | |
32c96387 | 452 | 'thumbnail': thumbnail, |
b954070d PH |
453 | } |
454 | return [info] | |
59ae15a5 | 455 | |
d77c3dfd FV |
456 | |
457 | class BlipTVIE(InfoExtractor): | |
59ae15a5 PH |
458 | """Information extractor for blip.tv""" |
459 | ||
1b2b22ed | 460 | _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' |
59ae15a5 PH |
461 | _URL_EXT = r'^.*\.([a-z0-9]+)$' |
462 | IE_NAME = u'blip.tv' | |
463 | ||
59ae15a5 PH |
464 | def report_direct_download(self, title): |
465 | """Report information extraction.""" | |
f17ce13a | 466 | self.to_screen(u'%s: Direct download detected' % title) |
59ae15a5 PH |
467 | |
468 | def _real_extract(self, url): | |
469 | mobj = re.match(self._VALID_URL, url) | |
470 | if mobj is None: | |
0c021ad1 | 471 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 | 472 | |
1b2b22ed JMF |
473 | # See https://github.com/rg3/youtube-dl/issues/857 |
474 | api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url) | |
475 | if api_mobj is not None: | |
476 | url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id') | |
f7b567ff PH |
477 | urlp = compat_urllib_parse_urlparse(url) |
478 | if urlp.path.startswith('/play/'): | |
7f9d41a5 JCGS |
479 | request = compat_urllib_request.Request(url) |
480 | response = compat_urllib_request.urlopen(request) | |
481 | redirecturl = response.geturl() | |
f7b567ff PH |
482 | rurlp = compat_urllib_parse_urlparse(redirecturl) |
483 | file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2] | |
484 | url = 'http://blip.tv/a/a-' + file_id | |
485 | return self._real_extract(url) | |
486 | ||
7f9d41a5 | 487 | |
59ae15a5 PH |
488 | if '?' in url: |
489 | cchar = '&' | |
490 | else: | |
491 | cchar = '?' | |
492 | json_url = url + cchar + 'skin=json&version=2&no_wrap=1' | |
55c05398 | 493 | request = compat_urllib_request.Request(json_url) |
3446dfb7 | 494 | request.add_header('User-Agent', 'iTunes/10.6.1') |
59ae15a5 PH |
495 | self.report_extraction(mobj.group(1)) |
496 | info = None | |
497 | try: | |
498 | urlh = compat_urllib_request.urlopen(request) | |
499 | if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download | |
500 | basename = url.split('/')[-1] | |
501 | title,ext = os.path.splitext(basename) | |
502 | title = title.decode('UTF-8') | |
503 | ext = ext.replace('.', '') | |
504 | self.report_direct_download(title) | |
505 | info = { | |
506 | 'id': title, | |
507 | 'url': url, | |
508 | 'uploader': None, | |
509 | 'upload_date': None, | |
510 | 'title': title, | |
511 | 'ext': ext, | |
512 | 'urlhandle': urlh | |
513 | } | |
514 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
3446dfb7 | 515 | raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) |
59ae15a5 PH |
516 | if info is None: # Regular URL |
517 | try: | |
55c05398 PH |
518 | json_code_bytes = urlh.read() |
519 | json_code = json_code_bytes.decode('utf-8') | |
59ae15a5 | 520 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |
0c021ad1 | 521 | raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err)) |
59ae15a5 PH |
522 | |
523 | try: | |
524 | json_data = json.loads(json_code) | |
525 | if 'Post' in json_data: | |
526 | data = json_data['Post'] | |
527 | else: | |
528 | data = json_data | |
529 | ||
530 | upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') | |
531 | video_url = data['media']['url'] | |
532 | umobj = re.match(self._URL_EXT, video_url) | |
533 | if umobj is None: | |
534 | raise ValueError('Can not determine filename extension') | |
535 | ext = umobj.group(1) | |
536 | ||
537 | info = { | |
538 | 'id': data['item_id'], | |
539 | 'url': video_url, | |
540 | 'uploader': data['display_name'], | |
541 | 'upload_date': upload_date, | |
542 | 'title': data['title'], | |
543 | 'ext': ext, | |
544 | 'format': data['media']['mimeType'], | |
545 | 'thumbnail': data['thumbnailUrl'], | |
546 | 'description': data['description'], | |
3446dfb7 PH |
547 | 'player_url': data['embedUrl'], |
548 | 'user_agent': 'iTunes/10.6.1', | |
59ae15a5 PH |
549 | } |
550 | except (ValueError,KeyError) as err: | |
0c021ad1 | 551 | raise ExtractorError(u'Unable to parse video information: %s' % repr(err)) |
59ae15a5 | 552 | |
59ae15a5 | 553 | return [info] |
d77c3dfd FV |
554 | |
555 | ||
556 | class MyVideoIE(InfoExtractor): | |
59ae15a5 PH |
557 | """Information Extractor for myvideo.de.""" |
558 | ||
559 | _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' | |
560 | IE_NAME = u'myvideo' | |
561 | ||
b31756c1 FV |
562 | # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git |
563 | # Released into the Public Domain by Tristan Fischer on 2013-05-19 | |
564 | # https://github.com/rg3/youtube-dl/pull/842 | |
de5d66d4 | 565 | def __rc4crypt(self,data, key): |
566 | x = 0 | |
567 | box = list(range(256)) | |
568 | for i in list(range(256)): | |
b31756c1 | 569 | x = (x + box[i] + compat_ord(key[i % len(key)])) % 256 |
de5d66d4 | 570 | box[i], box[x] = box[x], box[i] |
571 | x = 0 | |
572 | y = 0 | |
b31756c1 | 573 | out = '' |
de5d66d4 | 574 | for char in data: |
575 | x = (x + 1) % 256 | |
576 | y = (y + box[x]) % 256 | |
577 | box[x], box[y] = box[y], box[x] | |
b31756c1 FV |
578 | out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256]) |
579 | return out | |
de5d66d4 | 580 | |
581 | def __md5(self,s): | |
b31756c1 | 582 | return hashlib.md5(s).hexdigest().encode() |
de5d66d4 | 583 | |
59ae15a5 PH |
584 | def _real_extract(self,url): |
585 | mobj = re.match(self._VALID_URL, url) | |
586 | if mobj is None: | |
de5d66d4 | 587 | raise ExtractorError(u'invalid URL: %s' % url) |
59ae15a5 PH |
588 | |
589 | video_id = mobj.group(1) | |
590 | ||
de5d66d4 | 591 | GK = ( |
592 | b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt' | |
593 | b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3' | |
594 | b'TnpsbA0KTVRkbU1tSTRNdz09' | |
595 | ) | |
596 | ||
59ae15a5 | 597 | # Get video webpage |
5f955171 PH |
598 | webpage_url = 'http://www.myvideo.de/watch/%s' % video_id |
599 | webpage = self._download_webpage(webpage_url, video_id) | |
59ae15a5 | 600 | |
de5d66d4 | 601 | mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage) |
602 | if mobj is not None: | |
603 | self.report_extraction(video_id) | |
604 | video_url = mobj.group(1) + '.flv' | |
605 | ||
979a9dd4 | 606 | video_title = self._html_search_regex('<title>([^<]+)</title>', |
ac3e9394 | 607 | webpage, u'title') |
de5d66d4 | 608 | |
ac3e9394 | 609 | video_ext = self._search_regex('[.](.+?)$', video_url, u'extension') |
de5d66d4 | 610 | |
611 | return [{ | |
612 | 'id': video_id, | |
613 | 'url': video_url, | |
614 | 'uploader': None, | |
615 | 'upload_date': None, | |
616 | 'title': video_title, | |
617 | 'ext': u'flv', | |
618 | }] | |
619 | ||
620 | # try encxml | |
b31756c1 FV |
621 | mobj = re.search('var flashvars={(.+?)}', webpage) |
622 | if mobj is None: | |
623 | raise ExtractorError(u'Unable to extract video') | |
624 | ||
de5d66d4 | 625 | params = {} |
626 | encxml = '' | |
b31756c1 | 627 | sec = mobj.group(1) |
de5d66d4 | 628 | for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec): |
629 | if not a == '_encxml': | |
630 | params[a] = b | |
631 | else: | |
632 | encxml = compat_urllib_parse.unquote(b) | |
633 | if not params.get('domain'): | |
634 | params['domain'] = 'www.myvideo.de' | |
635 | xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) | |
636 | if 'flash_playertype=MTV' in xmldata_url: | |
637 | self._downloader.report_warning(u'avoiding MTV player') | |
638 | xmldata_url = ( | |
639 | 'http://www.myvideo.de/dynamic/get_player_video_xml.php' | |
640 | '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes' | |
641 | ) % video_id | |
642 | ||
643 | # get enc data | |
644 | enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1] | |
645 | enc_data_b = binascii.unhexlify(enc_data) | |
b31756c1 FV |
646 | sk = self.__md5( |
647 | base64.b64decode(base64.b64decode(GK)) + | |
648 | self.__md5( | |
649 | str(video_id).encode('utf-8') | |
650 | ) | |
de5d66d4 | 651 | ) |
652 | dec_data = self.__rc4crypt(enc_data_b, sk) | |
653 | ||
654 | # extracting infos | |
59ae15a5 | 655 | self.report_extraction(video_id) |
de5d66d4 | 656 | |
ac3e9394 | 657 | video_url = None |
de5d66d4 | 658 | mobj = re.search('connectionurl=\'(.*?)\'', dec_data) |
ac3e9394 AB |
659 | if mobj: |
660 | video_url = compat_urllib_parse.unquote(mobj.group(1)) | |
661 | if 'myvideo2flash' in video_url: | |
662 | self._downloader.report_warning(u'forcing RTMPT ...') | |
663 | video_url = video_url.replace('rtmpe://', 'rtmpt://') | |
664 | ||
665 | if not video_url: | |
666 | # extract non rtmp videos | |
de5d66d4 | 667 | mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data) |
668 | if mobj is None: | |
669 | raise ExtractorError(u'unable to extract url') | |
ac3e9394 | 670 | video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2)) |
59ae15a5 | 671 | |
ac3e9394 AB |
672 | video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file') |
673 | video_file = compat_urllib_parse.unquote(video_file) | |
de5d66d4 | 674 | |
de5d66d4 | 675 | if not video_file.endswith('f4m'): |
676 | ppath, prefix = video_file.split('.') | |
677 | video_playpath = '%s:%s' % (prefix, ppath) | |
678 | video_hls_playlist = '' | |
679 | else: | |
680 | video_playpath = '' | |
681 | video_hls_playlist = ( | |
682 | video_filepath + video_file | |
683 | ).replace('.f4m', '.m3u8') | |
59ae15a5 | 684 | |
ac3e9394 AB |
685 | video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj') |
686 | video_swfobj = compat_urllib_parse.unquote(video_swfobj) | |
de5d66d4 | 687 | |
979a9dd4 | 688 | video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", |
ac3e9394 | 689 | webpage, u'title') |
59ae15a5 PH |
690 | |
691 | return [{ | |
de5d66d4 | 692 | 'id': video_id, |
ac3e9394 AB |
693 | 'url': video_url, |
694 | 'tc_url': video_url, | |
de5d66d4 | 695 | 'uploader': None, |
696 | 'upload_date': None, | |
697 | 'title': video_title, | |
698 | 'ext': u'flv', | |
699 | 'play_path': video_playpath, | |
700 | 'video_file': video_file, | |
de5d66d4 | 701 | 'video_hls_playlist': video_hls_playlist, |
702 | 'player_url': video_swfobj, | |
59ae15a5 | 703 | }] |
d77c3dfd | 704 | |
ac3e9394 | 705 | |
d77c3dfd | 706 | class ComedyCentralIE(InfoExtractor): |
59ae15a5 PH |
707 | """Information extractor for The Daily Show and Colbert Report """ |
708 | ||
ca6849e6 | 709 | # urls can be abbreviations like :thedailyshow or :colbert |
cdb30764 | 710 | # urls for episodes like: |
ca6849e6 | 711 | # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day |
712 | # or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news | |
cdb30764 | 713 | # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 |
ca6849e6 | 714 | _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) |
715 | |(https?://)?(www\.)? | |
716 | (?P<showname>thedailyshow|colbertnation)\.com/ | |
717 | (full-episodes/(?P<episode>.*)| | |
718 | (?P<clip> | |
719 | (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) | |
720 | |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))))) | |
cdb30764 | 721 | $""" |
59ae15a5 PH |
722 | |
723 | _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] | |
724 | ||
725 | _video_extensions = { | |
726 | '3500': 'mp4', | |
727 | '2200': 'mp4', | |
728 | '1700': 'mp4', | |
729 | '1200': 'mp4', | |
730 | '750': 'mp4', | |
731 | '400': 'mp4', | |
732 | } | |
733 | _video_dimensions = { | |
734 | '3500': '1280x720', | |
735 | '2200': '960x540', | |
736 | '1700': '768x432', | |
737 | '1200': '640x360', | |
738 | '750': '512x288', | |
739 | '400': '384x216', | |
740 | } | |
741 | ||
89de9eb1 FV |
742 | @classmethod |
743 | def suitable(cls, url): | |
ca6849e6 | 744 | """Receives a URL and returns True if suitable for this IE.""" |
89de9eb1 | 745 | return re.match(cls._VALID_URL, url, re.VERBOSE) is not None |
ca6849e6 | 746 | |
59ae15a5 PH |
747 | def _print_formats(self, formats): |
748 | print('Available formats:') | |
749 | for x in formats: | |
750 | print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???'))) | |
751 | ||
752 | ||
753 | def _real_extract(self, url): | |
ca6849e6 | 754 | mobj = re.match(self._VALID_URL, url, re.VERBOSE) |
59ae15a5 | 755 | if mobj is None: |
0c021ad1 | 756 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
757 | |
758 | if mobj.group('shortname'): | |
759 | if mobj.group('shortname') in ('tds', 'thedailyshow'): | |
760 | url = u'http://www.thedailyshow.com/full-episodes/' | |
761 | else: | |
762 | url = u'http://www.colbertnation.com/full-episodes/' | |
ca6849e6 | 763 | mobj = re.match(self._VALID_URL, url, re.VERBOSE) |
59ae15a5 PH |
764 | assert mobj is not None |
765 | ||
ca6849e6 | 766 | if mobj.group('clip'): |
767 | if mobj.group('showname') == 'thedailyshow': | |
768 | epTitle = mobj.group('tdstitle') | |
769 | else: | |
770 | epTitle = mobj.group('cntitle') | |
771 | dlNewest = False | |
59ae15a5 | 772 | else: |
ca6849e6 | 773 | dlNewest = not mobj.group('episode') |
774 | if dlNewest: | |
775 | epTitle = mobj.group('showname') | |
776 | else: | |
777 | epTitle = mobj.group('episode') | |
59ae15a5 | 778 | |
59ae15a5 | 779 | self.report_extraction(epTitle) |
480b6c1e | 780 | webpage,htmlHandle = self._download_webpage_handle(url, epTitle) |
59ae15a5 PH |
781 | if dlNewest: |
782 | url = htmlHandle.geturl() | |
ca6849e6 | 783 | mobj = re.match(self._VALID_URL, url, re.VERBOSE) |
59ae15a5 | 784 | if mobj is None: |
480b6c1e | 785 | raise ExtractorError(u'Invalid redirected URL: ' + url) |
59ae15a5 | 786 | if mobj.group('episode') == '': |
480b6c1e | 787 | raise ExtractorError(u'Redirected URL is still not specific: ' + url) |
59ae15a5 PH |
788 | epTitle = mobj.group('episode') |
789 | ||
93148102 | 790 | mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) |
59ae15a5 PH |
791 | |
792 | if len(mMovieParams) == 0: | |
793 | # The Colbert Report embeds the information in a without | |
794 | # a URL prefix; so extract the alternate reference | |
795 | # and then add the URL prefix manually. | |
796 | ||
93148102 | 797 | altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage) |
59ae15a5 | 798 | if len(altMovieParams) == 0: |
480b6c1e | 799 | raise ExtractorError(u'unable to find Flash URL in webpage ' + url) |
59ae15a5 PH |
800 | else: |
801 | mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] | |
cdb30764 | 802 | |
59ae15a5 PH |
803 | uri = mMovieParams[0][1] |
804 | indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) | |
46bfb422 JMF |
805 | indexXml = self._download_webpage(indexUrl, epTitle, |
806 | u'Downloading show index', | |
807 | u'unable to download episode index') | |
59ae15a5 PH |
808 | |
809 | results = [] | |
810 | ||
811 | idoc = xml.etree.ElementTree.fromstring(indexXml) | |
812 | itemEls = idoc.findall('.//item') | |
7717ae19 | 813 | for partNum,itemEl in enumerate(itemEls): |
59ae15a5 PH |
814 | mediaId = itemEl.findall('./guid')[0].text |
815 | shortMediaId = mediaId.split(':')[-1] | |
816 | showId = mediaId.split(':')[-2].replace('.com', '') | |
817 | officialTitle = itemEl.findall('./title')[0].text | |
bf50b038 | 818 | officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text) |
59ae15a5 PH |
819 | |
820 | configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + | |
821 | compat_urllib_parse.urlencode({'uri': mediaId})) | |
46bfb422 JMF |
822 | configXml = self._download_webpage(configUrl, epTitle, |
823 | u'Downloading configuration for %s' % shortMediaId) | |
59ae15a5 PH |
824 | |
825 | cdoc = xml.etree.ElementTree.fromstring(configXml) | |
826 | turls = [] | |
827 | for rendition in cdoc.findall('.//rendition'): | |
828 | finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) | |
829 | turls.append(finfo) | |
830 | ||
831 | if len(turls) == 0: | |
c9fa1cba | 832 | self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found') |
59ae15a5 | 833 | continue |
cdb30764 | 834 | |
59ae15a5 PH |
835 | if self._downloader.params.get('listformats', None): |
836 | self._print_formats([i[0] for i in turls]) | |
837 | return | |
838 | ||
839 | # For now, just pick the highest bitrate | |
32635ec6 | 840 | format,rtmp_video_url = turls[-1] |
59ae15a5 PH |
841 | |
842 | # Get the format arg from the arg stream | |
843 | req_format = self._downloader.params.get('format', None) | |
844 | ||
845 | # Select format if we can find one | |
846 | for f,v in turls: | |
847 | if f == req_format: | |
32635ec6 | 848 | format, rtmp_video_url = f, v |
59ae15a5 PH |
849 | break |
850 | ||
32635ec6 PH |
851 | m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) |
852 | if not m: | |
853 | raise ExtractorError(u'Cannot transform RTMP url') | |
854 | base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' | |
855 | video_url = base + m.group('finalid') | |
59ae15a5 | 856 | |
7717ae19 | 857 | effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) |
59ae15a5 PH |
858 | info = { |
859 | 'id': shortMediaId, | |
860 | 'url': video_url, | |
861 | 'uploader': showId, | |
862 | 'upload_date': officialDate, | |
863 | 'title': effTitle, | |
864 | 'ext': 'mp4', | |
865 | 'format': format, | |
866 | 'thumbnail': None, | |
867 | 'description': officialTitle, | |
59ae15a5 | 868 | } |
59ae15a5 | 869 | results.append(info) |
cdb30764 | 870 | |
59ae15a5 | 871 | return results |
d77c3dfd FV |
872 | |
873 | ||
874 | class EscapistIE(InfoExtractor): | |
59ae15a5 PH |
875 | """Information extractor for The Escapist """ |
876 | ||
877 | _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' | |
878 | IE_NAME = u'escapist' | |
879 | ||
59ae15a5 PH |
880 | def _real_extract(self, url): |
881 | mobj = re.match(self._VALID_URL, url) | |
882 | if mobj is None: | |
0c021ad1 | 883 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
884 | showName = mobj.group('showname') |
885 | videoId = mobj.group('episode') | |
886 | ||
78d3442b FV |
887 | self.report_extraction(videoId) |
888 | webpage = self._download_webpage(url, videoId) | |
59ae15a5 | 889 | |
979a9dd4 | 890 | videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"', |
ac3e9394 | 891 | webpage, u'description', fatal=False) |
59ae15a5 | 892 | |
979a9dd4 | 893 | imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"', |
ac3e9394 | 894 | webpage, u'thumbnail', fatal=False) |
ac3e9394 | 895 | |
979a9dd4 | 896 | playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"', |
ac3e9394 | 897 | webpage, u'player url') |
ac3e9394 | 898 | |
78d3442b FV |
899 | title = self._html_search_regex('<meta name="title" content="([^"]*)"', |
900 | webpage, u'player url').split(' : ')[-1] | |
901 | ||
ac3e9394 AB |
902 | configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url') |
903 | configUrl = compat_urllib_parse.unquote(configUrl) | |
59ae15a5 | 904 | |
78d3442b | 905 | configJSON = self._download_webpage(configUrl, videoId, |
46bfb422 JMF |
906 | u'Downloading configuration', |
907 | u'unable to download configuration') | |
59ae15a5 PH |
908 | |
909 | # Technically, it's JavaScript, not JSON | |
910 | configJSON = configJSON.replace("'", '"') | |
911 | ||
912 | try: | |
913 | config = json.loads(configJSON) | |
914 | except (ValueError,) as err: | |
0c021ad1 | 915 | raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err)) |
59ae15a5 PH |
916 | |
917 | playlist = config['playlist'] | |
918 | videoUrl = playlist[1]['url'] | |
919 | ||
920 | info = { | |
921 | 'id': videoId, | |
922 | 'url': videoUrl, | |
923 | 'uploader': showName, | |
924 | 'upload_date': None, | |
78d3442b | 925 | 'title': title, |
47dcd621 | 926 | 'ext': 'mp4', |
59ae15a5 | 927 | 'thumbnail': imgUrl, |
ac3e9394 | 928 | 'description': videoDesc, |
59ae15a5 PH |
929 | 'player_url': playerUrl, |
930 | } | |
931 | ||
932 | return [info] | |
d77c3dfd | 933 | |
d77c3dfd | 934 | class CollegeHumorIE(InfoExtractor): |
59ae15a5 PH |
935 | """Information extractor for collegehumor.com""" |
936 | ||
0eb0faa2 | 937 | _WORKING = False |
59ae15a5 PH |
938 | _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' |
939 | IE_NAME = u'collegehumor' | |
940 | ||
799c0763 | 941 | def report_manifest(self, video_id): |
59ae15a5 | 942 | """Report information extraction.""" |
f17ce13a | 943 | self.to_screen(u'%s: Downloading XML manifest' % video_id) |
59ae15a5 | 944 | |
59ae15a5 PH |
945 | def _real_extract(self, url): |
946 | mobj = re.match(self._VALID_URL, url) | |
947 | if mobj is None: | |
0c021ad1 | 948 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
949 | video_id = mobj.group('videoid') |
950 | ||
59ae15a5 PH |
951 | info = { |
952 | 'id': video_id, | |
59ae15a5 PH |
953 | 'uploader': None, |
954 | 'upload_date': None, | |
955 | } | |
956 | ||
957 | self.report_extraction(video_id) | |
799c0763 | 958 | xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id |
59ae15a5 PH |
959 | try: |
960 | metaXml = compat_urllib_request.urlopen(xmlUrl).read() | |
961 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
0c021ad1 | 962 | raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err)) |
59ae15a5 PH |
963 | |
964 | mdoc = xml.etree.ElementTree.fromstring(metaXml) | |
965 | try: | |
966 | videoNode = mdoc.findall('./video')[0] | |
967 | info['description'] = videoNode.findall('./description')[0].text | |
968 | info['title'] = videoNode.findall('./caption')[0].text | |
59ae15a5 | 969 | info['thumbnail'] = videoNode.findall('./thumbnail')[0].text |
799c0763 | 970 | manifest_url = videoNode.findall('./file')[0].text |
59ae15a5 | 971 | except IndexError: |
0c021ad1 | 972 | raise ExtractorError(u'Invalid metadata XML file') |
59ae15a5 | 973 | |
799c0763 PH |
974 | manifest_url += '?hdcore=2.10.3' |
975 | self.report_manifest(video_id) | |
976 | try: | |
977 | manifestXml = compat_urllib_request.urlopen(manifest_url).read() | |
978 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
0c021ad1 | 979 | raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err)) |
799c0763 PH |
980 | |
981 | adoc = xml.etree.ElementTree.fromstring(manifestXml) | |
982 | try: | |
983 | media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] | |
984 | node_id = media_node.attrib['url'] | |
985 | video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text | |
986 | except IndexError as err: | |
0c021ad1 | 987 | raise ExtractorError(u'Invalid manifest file') |
799c0763 PH |
988 | |
989 | url_pr = compat_urllib_parse_urlparse(manifest_url) | |
990 | url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1' | |
991 | ||
992 | info['url'] = url | |
993 | info['ext'] = 'f4f' | |
59ae15a5 | 994 | return [info] |
d77c3dfd FV |
995 | |
996 | ||
997 | class XVideosIE(InfoExtractor): | |
59ae15a5 | 998 | """Information extractor for xvideos.com""" |
d77c3dfd | 999 | |
59ae15a5 PH |
1000 | _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' |
1001 | IE_NAME = u'xvideos' | |
d77c3dfd | 1002 | |
59ae15a5 PH |
1003 | def _real_extract(self, url): |
1004 | mobj = re.match(self._VALID_URL, url) | |
1005 | if mobj is None: | |
0c021ad1 | 1006 | raise ExtractorError(u'Invalid URL: %s' % url) |
8588a86f | 1007 | video_id = mobj.group(1) |
d77c3dfd | 1008 | |
5f955171 | 1009 | webpage = self._download_webpage(url, video_id) |
d77c3dfd | 1010 | |
59ae15a5 | 1011 | self.report_extraction(video_id) |
d77c3dfd | 1012 | |
59ae15a5 | 1013 | # Extract video URL |
ac3e9394 AB |
1014 | video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&', |
1015 | webpage, u'video URL')) | |
d77c3dfd | 1016 | |
59ae15a5 | 1017 | # Extract title |
979a9dd4 | 1018 | video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID', |
ac3e9394 | 1019 | webpage, u'title') |
d77c3dfd | 1020 | |
59ae15a5 | 1021 | # Extract video thumbnail |
ac3e9394 AB |
1022 | video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)', |
1023 | webpage, u'thumbnail', fatal=False) | |
d77c3dfd | 1024 | |
59ae15a5 PH |
1025 | info = { |
1026 | 'id': video_id, | |
1027 | 'url': video_url, | |
1028 | 'uploader': None, | |
1029 | 'upload_date': None, | |
1030 | 'title': video_title, | |
1031 | 'ext': 'flv', | |
1032 | 'thumbnail': video_thumbnail, | |
1033 | 'description': None, | |
1034 | } | |
d77c3dfd | 1035 | |
59ae15a5 | 1036 | return [info] |
d77c3dfd FV |
1037 | |
1038 | ||
1039 | class SoundcloudIE(InfoExtractor): | |
59ae15a5 PH |
1040 | """Information extractor for soundcloud.com |
1041 | To access the media, the uid of the song and a stream token | |
1042 | must be extracted from the page source and the script must make | |
1043 | a request to media.soundcloud.com/crossdomain.xml. Then | |
1044 | the media can be grabbed by requesting from an url composed | |
1045 | of the stream token and uid | |
1046 | """ | |
1047 | ||
1048 | _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' | |
1049 | IE_NAME = u'soundcloud' | |
1050 | ||
8fd3afd5 | 1051 | def report_resolve(self, video_id): |
59ae15a5 | 1052 | """Report information extraction.""" |
f17ce13a | 1053 | self.to_screen(u'%s: Resolving id' % video_id) |
59ae15a5 | 1054 | |
59ae15a5 PH |
1055 | def _real_extract(self, url): |
1056 | mobj = re.match(self._VALID_URL, url) | |
1057 | if mobj is None: | |
0c021ad1 | 1058 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
1059 | |
1060 | # extract uploader (which is in the url) | |
15c8d833 | 1061 | uploader = mobj.group(1) |
59ae15a5 | 1062 | # extract simple title (uploader + slug of song title) |
15c8d833 | 1063 | slug_title = mobj.group(2) |
59ae15a5 | 1064 | simple_title = uploader + u'-' + slug_title |
46bfb422 | 1065 | full_title = '%s/%s' % (uploader, slug_title) |
59ae15a5 | 1066 | |
46bfb422 | 1067 | self.report_resolve(full_title) |
59ae15a5 | 1068 | |
8fd3afd5 PH |
1069 | url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title) |
1070 | resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | |
46bfb422 | 1071 | info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON') |
59ae15a5 | 1072 | |
8fd3afd5 PH |
1073 | info = json.loads(info_json) |
1074 | video_id = info['id'] | |
46bfb422 | 1075 | self.report_extraction(full_title) |
59ae15a5 | 1076 | |
8fd3afd5 | 1077 | streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' |
46bfb422 JMF |
1078 | stream_json = self._download_webpage(streams_url, full_title, |
1079 | u'Downloading stream definitions', | |
1080 | u'unable to download stream definitions') | |
59ae15a5 | 1081 | |
8fd3afd5 | 1082 | streams = json.loads(stream_json) |
c7214f9a | 1083 | mediaURL = streams['http_mp3_128_url'] |
bf50b038 | 1084 | upload_date = unified_strdate(info['created_at']) |
59ae15a5 PH |
1085 | |
1086 | return [{ | |
c7214f9a | 1087 | 'id': info['id'], |
59ae15a5 | 1088 | 'url': mediaURL, |
c7214f9a | 1089 | 'uploader': info['user']['username'], |
bf50b038 | 1090 | 'upload_date': upload_date, |
c7214f9a | 1091 | 'title': info['title'], |
59ae15a5 | 1092 | 'ext': u'mp3', |
c7214f9a | 1093 | 'description': info['description'], |
59ae15a5 | 1094 | }] |
d77c3dfd | 1095 | |
5011cded | 1096 | class SoundcloudSetIE(InfoExtractor): |
1097 | """Information extractor for soundcloud.com sets | |
1098 | To access the media, the uid of the song and a stream token | |
1099 | must be extracted from the page source and the script must make | |
1100 | a request to media.soundcloud.com/crossdomain.xml. Then | |
1101 | the media can be grabbed by requesting from an url composed | |
1102 | of the stream token and uid | |
1103 | """ | |
1104 | ||
1105 | _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' | |
50f6412e | 1106 | IE_NAME = u'soundcloud:set' |
5011cded | 1107 | |
5011cded | 1108 | def report_resolve(self, video_id): |
1109 | """Report information extraction.""" | |
f17ce13a | 1110 | self.to_screen(u'%s: Resolving id' % video_id) |
5011cded | 1111 | |
5011cded | 1112 | def _real_extract(self, url): |
1113 | mobj = re.match(self._VALID_URL, url) | |
1114 | if mobj is None: | |
0c021ad1 | 1115 | raise ExtractorError(u'Invalid URL: %s' % url) |
5011cded | 1116 | |
1117 | # extract uploader (which is in the url) | |
1118 | uploader = mobj.group(1) | |
1119 | # extract simple title (uploader + slug of song title) | |
1120 | slug_title = mobj.group(2) | |
1121 | simple_title = uploader + u'-' + slug_title | |
46bfb422 | 1122 | full_title = '%s/sets/%s' % (uploader, slug_title) |
5011cded | 1123 | |
46bfb422 | 1124 | self.report_resolve(full_title) |
5011cded | 1125 | |
1126 | url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) | |
1127 | resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | |
46bfb422 | 1128 | info_json = self._download_webpage(resolv_url, full_title) |
5011cded | 1129 | |
1130 | videos = [] | |
1131 | info = json.loads(info_json) | |
1132 | if 'errors' in info: | |
1133 | for err in info['errors']: | |
613bf669 | 1134 | self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message'])) |
5011cded | 1135 | return |
1136 | ||
46bfb422 | 1137 | self.report_extraction(full_title) |
5011cded | 1138 | for track in info['tracks']: |
1139 | video_id = track['id'] | |
5011cded | 1140 | |
1141 | streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' | |
46bfb422 | 1142 | stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON') |
5011cded | 1143 | |
46bfb422 | 1144 | self.report_extraction(video_id) |
5011cded | 1145 | streams = json.loads(stream_json) |
1146 | mediaURL = streams['http_mp3_128_url'] | |
1147 | ||
1148 | videos.append({ | |
1149 | 'id': video_id, | |
1150 | 'url': mediaURL, | |
1151 | 'uploader': track['user']['username'], | |
fe348844 | 1152 | 'upload_date': unified_strdate(track['created_at']), |
5011cded | 1153 | 'title': track['title'], |
1154 | 'ext': u'mp3', | |
1155 | 'description': track['description'], | |
1156 | }) | |
1157 | return videos | |
1158 | ||
d77c3dfd FV |
1159 | |
1160 | class InfoQIE(InfoExtractor): | |
59ae15a5 | 1161 | """Information extractor for infoq.com""" |
59ae15a5 | 1162 | _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' |
59ae15a5 | 1163 | |
59ae15a5 PH |
1164 | def _real_extract(self, url): |
1165 | mobj = re.match(self._VALID_URL, url) | |
1166 | if mobj is None: | |
0c021ad1 | 1167 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 | 1168 | |
4fcca4bb | 1169 | webpage = self._download_webpage(url, video_id=url) |
59ae15a5 PH |
1170 | self.report_extraction(url) |
1171 | ||
59ae15a5 | 1172 | # Extract video URL |
a3d689cf | 1173 | mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage) |
59ae15a5 | 1174 | if mobj is None: |
0c021ad1 | 1175 | raise ExtractorError(u'Unable to extract video url') |
4fcca4bb PH |
1176 | real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8')) |
1177 | video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id | |
59ae15a5 PH |
1178 | |
1179 | # Extract title | |
ac3e9394 AB |
1180 | video_title = self._search_regex(r'contentTitle = "(.*?)";', |
1181 | webpage, u'title') | |
59ae15a5 PH |
1182 | |
1183 | # Extract description | |
979a9dd4 | 1184 | video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>', |
ac3e9394 | 1185 | webpage, u'description', fatal=False) |
59ae15a5 PH |
1186 | |
1187 | video_filename = video_url.split('/')[-1] | |
1188 | video_id, extension = video_filename.split('.') | |
1189 | ||
1190 | info = { | |
1191 | 'id': video_id, | |
1192 | 'url': video_url, | |
1193 | 'uploader': None, | |
1194 | 'upload_date': None, | |
1195 | 'title': video_title, | |
1196 | 'ext': extension, # Extension is always(?) mp4, but seems to be flv | |
1197 | 'thumbnail': None, | |
1198 | 'description': video_description, | |
1199 | } | |
1200 | ||
1201 | return [info] | |
d77c3dfd FV |
1202 | |
1203 | class MixcloudIE(InfoExtractor): | |
59ae15a5 | 1204 | """Information extractor for www.mixcloud.com""" |
93702113 FV |
1205 | |
1206 | _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ | |
59ae15a5 PH |
1207 | _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' |
1208 | IE_NAME = u'mixcloud' | |
1209 | ||
59ae15a5 PH |
1210 | def report_download_json(self, file_id): |
1211 | """Report JSON download.""" | |
f17ce13a | 1212 | self.to_screen(u'Downloading json') |
59ae15a5 | 1213 | |
59ae15a5 PH |
1214 | def get_urls(self, jsonData, fmt, bitrate='best'): |
1215 | """Get urls from 'audio_formats' section in json""" | |
1216 | file_url = None | |
1217 | try: | |
1218 | bitrate_list = jsonData[fmt] | |
1219 | if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: | |
1220 | bitrate = max(bitrate_list) # select highest | |
1221 | ||
1222 | url_list = jsonData[fmt][bitrate] | |
1223 | except TypeError: # we have no bitrate info. | |
1224 | url_list = jsonData[fmt] | |
1225 | return url_list | |
1226 | ||
1227 | def check_urls(self, url_list): | |
1228 | """Returns 1st active url from list""" | |
1229 | for url in url_list: | |
1230 | try: | |
1231 | compat_urllib_request.urlopen(url) | |
1232 | return url | |
1233 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
1234 | url = None | |
1235 | ||
1236 | return None | |
1237 | ||
1238 | def _print_formats(self, formats): | |
1239 | print('Available formats:') | |
1240 | for fmt in formats.keys(): | |
1241 | for b in formats[fmt]: | |
1242 | try: | |
1243 | ext = formats[fmt][b][0] | |
1244 | print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) | |
1245 | except TypeError: # we have no bitrate info | |
1246 | ext = formats[fmt][0] | |
1247 | print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) | |
1248 | break | |
1249 | ||
1250 | def _real_extract(self, url): | |
1251 | mobj = re.match(self._VALID_URL, url) | |
1252 | if mobj is None: | |
0c021ad1 | 1253 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
1254 | # extract uploader & filename from url |
1255 | uploader = mobj.group(1).decode('utf-8') | |
1256 | file_id = uploader + "-" + mobj.group(2).decode('utf-8') | |
1257 | ||
1258 | # construct API request | |
1259 | file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' | |
1260 | # retrieve .json file with links to files | |
1261 | request = compat_urllib_request.Request(file_url) | |
1262 | try: | |
1263 | self.report_download_json(file_url) | |
1264 | jsonData = compat_urllib_request.urlopen(request).read() | |
1265 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
0c021ad1 | 1266 | raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) |
59ae15a5 PH |
1267 | |
1268 | # parse JSON | |
1269 | json_data = json.loads(jsonData) | |
1270 | player_url = json_data['player_swf_url'] | |
1271 | formats = dict(json_data['audio_formats']) | |
1272 | ||
1273 | req_format = self._downloader.params.get('format', None) | |
1274 | bitrate = None | |
1275 | ||
1276 | if self._downloader.params.get('listformats', None): | |
1277 | self._print_formats(formats) | |
1278 | return | |
1279 | ||
1280 | if req_format is None or req_format == 'best': | |
1281 | for format_param in formats.keys(): | |
1282 | url_list = self.get_urls(formats, format_param) | |
1283 | # check urls | |
1284 | file_url = self.check_urls(url_list) | |
1285 | if file_url is not None: | |
1286 | break # got it! | |
1287 | else: | |
99b0a129 | 1288 | if req_format not in formats: |
0c021ad1 | 1289 | raise ExtractorError(u'Format is not available') |
59ae15a5 PH |
1290 | |
1291 | url_list = self.get_urls(formats, req_format) | |
1292 | file_url = self.check_urls(url_list) | |
1293 | format_param = req_format | |
1294 | ||
1295 | return [{ | |
1296 | 'id': file_id.decode('utf-8'), | |
1297 | 'url': file_url.decode('utf-8'), | |
1298 | 'uploader': uploader.decode('utf-8'), | |
1299 | 'upload_date': None, | |
1300 | 'title': json_data['name'], | |
1301 | 'ext': file_url.split('.')[-1].decode('utf-8'), | |
1302 | 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), | |
1303 | 'thumbnail': json_data['thumbnail_url'], | |
1304 | 'description': json_data['description'], | |
1305 | 'player_url': player_url.decode('utf-8'), | |
1306 | }] | |
d77c3dfd FV |
1307 | |
1308 | class StanfordOpenClassroomIE(InfoExtractor): | |
59ae15a5 PH |
1309 | """Information extractor for Stanford's Open ClassRoom""" |
1310 | ||
1311 | _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' | |
1312 | IE_NAME = u'stanfordoc' | |
1313 | ||
59ae15a5 PH |
1314 | def _real_extract(self, url): |
1315 | mobj = re.match(self._VALID_URL, url) | |
1316 | if mobj is None: | |
f0bad2b0 | 1317 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
1318 | |
1319 | if mobj.group('course') and mobj.group('video'): # A specific video | |
1320 | course = mobj.group('course') | |
1321 | video = mobj.group('video') | |
1322 | info = { | |
1323 | 'id': course + '_' + video, | |
1324 | 'uploader': None, | |
1325 | 'upload_date': None, | |
1326 | } | |
1327 | ||
1328 | self.report_extraction(info['id']) | |
1329 | baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' | |
1330 | xmlUrl = baseUrl + video + '.xml' | |
1331 | try: | |
1332 | metaXml = compat_urllib_request.urlopen(xmlUrl).read() | |
1333 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
0c021ad1 | 1334 | raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err)) |
59ae15a5 PH |
1335 | mdoc = xml.etree.ElementTree.fromstring(metaXml) |
1336 | try: | |
1337 | info['title'] = mdoc.findall('./title')[0].text | |
1338 | info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text | |
1339 | except IndexError: | |
0c021ad1 | 1340 | raise ExtractorError(u'Invalid metadata XML file') |
59ae15a5 PH |
1341 | info['ext'] = info['url'].rpartition('.')[2] |
1342 | return [info] | |
1343 | elif mobj.group('course'): # A course page | |
1344 | course = mobj.group('course') | |
1345 | info = { | |
1346 | 'id': course, | |
1347 | 'type': 'playlist', | |
1348 | 'uploader': None, | |
1349 | 'upload_date': None, | |
1350 | } | |
1351 | ||
f0bad2b0 PH |
1352 | coursepage = self._download_webpage(url, info['id'], |
1353 | note='Downloading course info page', | |
1354 | errnote='Unable to download course info page') | |
59ae15a5 | 1355 | |
979a9dd4 | 1356 | info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id']) |
59ae15a5 | 1357 | |
979a9dd4 | 1358 | info['description'] = self._html_search_regex('<description>([^<]+)</description>', |
ac3e9394 | 1359 | coursepage, u'description', fatal=False) |
59ae15a5 PH |
1360 | |
1361 | links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage)) | |
1362 | info['list'] = [ | |
1363 | { | |
1364 | 'type': 'reference', | |
1365 | 'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage), | |
1366 | } | |
1367 | for vpage in links] | |
1368 | results = [] | |
1369 | for entry in info['list']: | |
1370 | assert entry['type'] == 'reference' | |
1371 | results += self.extract(entry['url']) | |
1372 | return results | |
59ae15a5 PH |
1373 | else: # Root page |
1374 | info = { | |
1375 | 'id': 'Stanford OpenClassroom', | |
1376 | 'type': 'playlist', | |
1377 | 'uploader': None, | |
1378 | 'upload_date': None, | |
1379 | } | |
1380 | ||
1381 | self.report_download_webpage(info['id']) | |
1382 | rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' | |
1383 | try: | |
1384 | rootpage = compat_urllib_request.urlopen(rootURL).read() | |
1385 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
0c021ad1 | 1386 | raise ExtractorError(u'Unable to download course info page: ' + compat_str(err)) |
59ae15a5 PH |
1387 | |
1388 | info['title'] = info['id'] | |
1389 | ||
1390 | links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage)) | |
1391 | info['list'] = [ | |
1392 | { | |
1393 | 'type': 'reference', | |
1394 | 'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage), | |
1395 | } | |
1396 | for cpage in links] | |
1397 | ||
1398 | results = [] | |
1399 | for entry in info['list']: | |
1400 | assert entry['type'] == 'reference' | |
1401 | results += self.extract(entry['url']) | |
1402 | return results | |
d77c3dfd FV |
1403 | |
1404 | class MTVIE(InfoExtractor): | |
59ae15a5 PH |
1405 | """Information extractor for MTV.com""" |
1406 | ||
1407 | _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$' | |
1408 | IE_NAME = u'mtv' | |
1409 | ||
59ae15a5 PH |
1410 | def _real_extract(self, url): |
1411 | mobj = re.match(self._VALID_URL, url) | |
1412 | if mobj is None: | |
0c021ad1 | 1413 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
1414 | if not mobj.group('proto'): |
1415 | url = 'http://' + url | |
1416 | video_id = mobj.group('videoid') | |
59ae15a5 | 1417 | |
5f955171 | 1418 | webpage = self._download_webpage(url, video_id) |
59ae15a5 | 1419 | |
979a9dd4 | 1420 | song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>', |
ac3e9394 | 1421 | webpage, u'song name', fatal=False) |
59ae15a5 | 1422 | |
979a9dd4 | 1423 | video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>', |
ac3e9394 | 1424 | webpage, u'title') |
59ae15a5 | 1425 | |
979a9dd4 | 1426 | mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>', |
ac3e9394 AB |
1427 | webpage, u'mtvn_uri', fatal=False) |
1428 | ||
1429 | content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', | |
1430 | webpage, u'content id', fatal=False) | |
59ae15a5 PH |
1431 | |
1432 | videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri | |
1433 | self.report_extraction(video_id) | |
1434 | request = compat_urllib_request.Request(videogen_url) | |
1435 | try: | |
1436 | metadataXml = compat_urllib_request.urlopen(request).read() | |
1437 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | |
0c021ad1 | 1438 | raise ExtractorError(u'Unable to download video metadata: %s' % compat_str(err)) |
59ae15a5 PH |
1439 | |
1440 | mdoc = xml.etree.ElementTree.fromstring(metadataXml) | |
1441 | renditions = mdoc.findall('.//rendition') | |
1442 | ||
1443 | # For now, always pick the highest quality. | |
1444 | rendition = renditions[-1] | |
1445 | ||
1446 | try: | |
1447 | _,_,ext = rendition.attrib['type'].partition('/') | |
1448 | format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] | |
1449 | video_url = rendition.find('./src').text | |
1450 | except KeyError: | |
0c021ad1 | 1451 | raise ExtractorError('Invalid rendition field.') |
59ae15a5 PH |
1452 | |
1453 | info = { | |
1454 | 'id': video_id, | |
1455 | 'url': video_url, | |
1456 | 'uploader': performer, | |
1457 | 'upload_date': None, | |
1458 | 'title': video_title, | |
1459 | 'ext': ext, | |
1460 | 'format': format, | |
1461 | } | |
1462 | ||
1463 | return [info] | |
6de7ef9b | 1464 | |
302efc19 | 1465 | |
302efc19 | 1466 | class YoukuIE(InfoExtractor): |
59ae15a5 | 1467 | _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html' |
59ae15a5 | 1468 | |
59ae15a5 PH |
1469 | def _gen_sid(self): |
1470 | nowTime = int(time.time() * 1000) | |
1471 | random1 = random.randint(1000,1998) | |
1472 | random2 = random.randint(1000,9999) | |
1473 | ||
1474 | return "%d%d%d" %(nowTime,random1,random2) | |
1475 | ||
1476 | def _get_file_ID_mix_string(self, seed): | |
1477 | mixed = [] | |
1478 | source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") | |
1479 | seed = float(seed) | |
1480 | for i in range(len(source)): | |
1481 | seed = (seed * 211 + 30031 ) % 65536 | |
1482 | index = math.floor(seed / 65536 * len(source) ) | |
1483 | mixed.append(source[int(index)]) | |
1484 | source.remove(source[int(index)]) | |
1485 | #return ''.join(mixed) | |
1486 | return mixed | |
1487 | ||
1488 | def _get_file_id(self, fileId, seed): | |
1489 | mixed = self._get_file_ID_mix_string(seed) | |
1490 | ids = fileId.split('*') | |
1491 | realId = [] | |
1492 | for ch in ids: | |
1493 | if ch: | |
1494 | realId.append(mixed[int(ch)]) | |
1495 | return ''.join(realId) | |
1496 | ||
1497 | def _real_extract(self, url): | |
1498 | mobj = re.match(self._VALID_URL, url) | |
1499 | if mobj is None: | |
0c021ad1 | 1500 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
1501 | video_id = mobj.group('ID') |
1502 | ||
1503 | info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id | |
1504 | ||
46bfb422 | 1505 | jsondata = self._download_webpage(info_url, video_id) |
59ae15a5 PH |
1506 | |
1507 | self.report_extraction(video_id) | |
1508 | try: | |
46bfb422 | 1509 | config = json.loads(jsondata) |
59ae15a5 PH |
1510 | |
1511 | video_title = config['data'][0]['title'] | |
1512 | seed = config['data'][0]['seed'] | |
1513 | ||
1514 | format = self._downloader.params.get('format', None) | |
1a2c3c0f | 1515 | supported_format = list(config['data'][0]['streamfileids'].keys()) |
59ae15a5 PH |
1516 | |
1517 | if format is None or format == 'best': | |
1518 | if 'hd2' in supported_format: | |
1519 | format = 'hd2' | |
1520 | else: | |
1521 | format = 'flv' | |
1522 | ext = u'flv' | |
1523 | elif format == 'worst': | |
1524 | format = 'mp4' | |
1525 | ext = u'mp4' | |
1526 | else: | |
1527 | format = 'flv' | |
1528 | ext = u'flv' | |
1529 | ||
1530 | ||
1531 | fileid = config['data'][0]['streamfileids'][format] | |
e2a8ff24 | 1532 | keys = [s['k'] for s in config['data'][0]['segs'][format]] |
8f6f40d9 | 1533 | except (UnicodeDecodeError, ValueError, KeyError): |
0c021ad1 | 1534 | raise ExtractorError(u'Unable to extract info section') |
59ae15a5 PH |
1535 | |
1536 | files_info=[] | |
1537 | sid = self._gen_sid() | |
1538 | fileid = self._get_file_id(fileid, seed) | |
1539 | ||
1540 | #column 8,9 of fileid represent the segment number | |
1541 | #fileid[7:9] should be changed | |
1542 | for index, key in enumerate(keys): | |
1543 | ||
1544 | temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) | |
1545 | download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) | |
1546 | ||
1547 | info = { | |
1548 | 'id': '%s_part%02d' % (video_id, index), | |
1549 | 'url': download_url, | |
1550 | 'uploader': None, | |
1551 | 'upload_date': None, | |
1552 | 'title': video_title, | |
1553 | 'ext': ext, | |
1554 | } | |
1555 | files_info.append(info) | |
1556 | ||
1557 | return files_info | |
5dc846fa FV |
1558 | |
1559 | ||
6de7ef9b | 1560 | class XNXXIE(InfoExtractor): |
59ae15a5 PH |
1561 | """Information extractor for xnxx.com""" |
1562 | ||
caec7618 | 1563 | _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)' |
59ae15a5 PH |
1564 | IE_NAME = u'xnxx' |
1565 | VIDEO_URL_RE = r'flv_url=(.*?)&' | |
1566 | VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' | |
1567 | VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' | |
1568 | ||
59ae15a5 PH |
1569 | def _real_extract(self, url): |
1570 | mobj = re.match(self._VALID_URL, url) | |
1571 | if mobj is None: | |
0c021ad1 | 1572 | raise ExtractorError(u'Invalid URL: %s' % url) |
bec102a8 | 1573 | video_id = mobj.group(1) |
59ae15a5 | 1574 | |
59ae15a5 | 1575 | # Get webpage content |
46bfb422 | 1576 | webpage = self._download_webpage(url, video_id) |
59ae15a5 | 1577 | |
ac3e9394 AB |
1578 | video_url = self._search_regex(self.VIDEO_URL_RE, |
1579 | webpage, u'video URL') | |
1580 | video_url = compat_urllib_parse.unquote(video_url) | |
59ae15a5 | 1581 | |
979a9dd4 | 1582 | video_title = self._html_search_regex(self.VIDEO_TITLE_RE, |
ac3e9394 | 1583 | webpage, u'title') |
59ae15a5 | 1584 | |
ac3e9394 AB |
1585 | video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE, |
1586 | webpage, u'thumbnail', fatal=False) | |
59ae15a5 PH |
1587 | |
1588 | return [{ | |
1589 | 'id': video_id, | |
1590 | 'url': video_url, | |
1591 | 'uploader': None, | |
1592 | 'upload_date': None, | |
1593 | 'title': video_title, | |
1594 | 'ext': 'flv', | |
1595 | 'thumbnail': video_thumbnail, | |
1596 | 'description': None, | |
1597 | }] | |
fd873c69 FV |
1598 | |
1599 | ||
d443aca8 | 1600 | class GooglePlusIE(InfoExtractor): |
59ae15a5 PH |
1601 | """Information extractor for plus.google.com.""" |
1602 | ||
93702113 | 1603 | _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' |
59ae15a5 PH |
1604 | IE_NAME = u'plus.google' |
1605 | ||
59ae15a5 PH |
1606 | def _real_extract(self, url): |
1607 | # Extract id from URL | |
1608 | mobj = re.match(self._VALID_URL, url) | |
1609 | if mobj is None: | |
0c021ad1 | 1610 | raise ExtractorError(u'Invalid URL: %s' % url) |
59ae15a5 PH |
1611 | |
1612 | post_url = mobj.group(0) | |
93702113 | 1613 | video_id = mobj.group(1) |
59ae15a5 PH |
1614 | |
1615 | video_extension = 'flv' | |
1616 | ||
1617 | # Step 1, Retrieve post webpage to extract further information | |
46bfb422 | 1618 | webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage') |
59ae15a5 | 1619 | |
ac3e9394 AB |
1620 | self.report_extraction(video_id) |
1621 | ||
59ae15a5 | 1622 | # Extract update date |
979a9dd4 | 1623 | upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>', |
ac3e9394 AB |
1624 | webpage, u'upload date', fatal=False) |
1625 | if upload_date: | |
59ae15a5 PH |
1626 | # Convert timestring to a format suitable for filename |
1627 | upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d") | |
1628 | upload_date = upload_date.strftime('%Y%m%d') | |
59ae15a5 PH |
1629 | |
1630 | # Extract uploader | |
979a9dd4 | 1631 | uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>', |
ac3e9394 | 1632 | webpage, u'uploader', fatal=False) |
59ae15a5 PH |
1633 | |
1634 | # Extract title | |
1635 | # Get the first line for title | |
979a9dd4 | 1636 | video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]', |
468e2e92 | 1637 | webpage, 'title', default=u'NA') |
59ae15a5 PH |
1638 | |
1639 | # Step 2, Stimulate clicking the image box to launch video | |
5c676019 | 1640 | video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]', |
ac3e9394 | 1641 | webpage, u'video page URL') |
46bfb422 | 1642 | webpage = self._download_webpage(video_page, video_id, u'Downloading video page') |
59ae15a5 PH |
1643 | |
1644 | # Extract video links on video page | |
1645 | """Extract video links of all sizes""" | |
1646 | pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"' | |
1647 | mobj = re.findall(pattern, webpage) | |
1648 | if len(mobj) == 0: | |
486f0c94 | 1649 | raise ExtractorError(u'Unable to extract video links') |
59ae15a5 PH |
1650 | |
1651 | # Sort in resolution | |
1652 | links = sorted(mobj) | |
1653 | ||
1654 | # Choose the lowest of the sort, i.e. highest resolution | |
1655 | video_url = links[-1] | |
1656 | # Only get the url. The resolution part in the tuple has no use anymore | |
1657 | video_url = video_url[-1] | |
1658 | # Treat escaped \u0026 style hex | |
93702113 FV |
1659 | try: |
1660 | video_url = video_url.decode("unicode_escape") | |
1661 | except AttributeError: # Python 3 | |
1662 | video_url = bytes(video_url, 'ascii').decode('unicode-escape') | |
59ae15a5 PH |
1663 | |
1664 | ||
1665 | return [{ | |
93702113 | 1666 | 'id': video_id, |
59ae15a5 | 1667 | 'url': video_url, |
93702113 FV |
1668 | 'uploader': uploader, |
1669 | 'upload_date': upload_date, | |
1670 | 'title': video_title, | |
1671 | 'ext': video_extension, | |
59ae15a5 | 1672 | }] |
4cc3d074 PH |
1673 | |
1674 | class NBAIE(InfoExtractor): | |
be95cac1 | 1675 | _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$' |
4cc3d074 PH |
1676 | IE_NAME = u'nba' |
1677 | ||
4cc3d074 PH |
1678 | def _real_extract(self, url): |
1679 | mobj = re.match(self._VALID_URL, url) | |
1680 | if mobj is None: | |
0c021ad1 | 1681 | raise ExtractorError(u'Invalid URL: %s' % url) |
4cc3d074 PH |
1682 | |
1683 | video_id = mobj.group(1) | |
4cc3d074 | 1684 | |
5f955171 | 1685 | webpage = self._download_webpage(url, video_id) |
4cc3d074 PH |
1686 | |
1687 | video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' | |
4cc3d074 PH |
1688 | |
1689 | shortened_video_id = video_id.rpartition('/')[2] | |
979a9dd4 | 1690 | title = self._html_search_regex(r'<meta property="og:title" content="(.*?)"', |
468e2e92 FV |
1691 | webpage, 'title', default=shortened_video_id).replace('NBA.com: ', '') |
1692 | ||
be95cac1 | 1693 | # It isn't there in the HTML it returns to us |
979a9dd4 | 1694 | # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False) |
468e2e92 | 1695 | |
979a9dd4 | 1696 | description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False) |
468e2e92 | 1697 | |
4cc3d074 PH |
1698 | info = { |
1699 | 'id': shortened_video_id, | |
1700 | 'url': video_url, | |
1701 | 'ext': 'mp4', | |
1702 | 'title': title, | |
be95cac1 | 1703 | # 'uploader_date': uploader_date, |
468e2e92 | 1704 | 'description': description, |
4cc3d074 PH |
1705 | } |
1706 | return [info] | |
0b40544f DV |
1707 | |
1708 | class JustinTVIE(InfoExtractor): | |
1709 | """Information extractor for justin.tv and twitch.tv""" | |
2ab1c5ed DV |
1710 | # TODO: One broadcast may be split into multiple videos. The key |
1711 | # 'broadcast_id' is the same for all parts, and 'broadcast_part' | |
1712 | # starts at 1 and increases. Can we treat all parts as one video? | |
1713 | ||
4096b609 | 1714 | _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ |
0e16f094 PH |
1715 | (?: |
1716 | (?P<channelid>[^/]+)| | |
1717 | (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| | |
1718 | (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) | |
1719 | ) | |
1720 | /?(?:\#.*)?$ | |
1721 | """ | |
4096b609 | 1722 | _JUSTIN_PAGE_LIMIT = 100 |
0b40544f DV |
1723 | IE_NAME = u'justin.tv' |
1724 | ||
4096b609 DV |
1725 | def report_download_page(self, channel, offset): |
1726 | """Report attempt to download a single page of videos.""" | |
f17ce13a JMF |
1727 | self.to_screen(u'%s: Downloading video information from %d to %d' % |
1728 | (channel, offset, offset + self._JUSTIN_PAGE_LIMIT)) | |
4096b609 | 1729 | |
2ab1c5ed | 1730 | # Return count of items, list of *valid* items |
46bfb422 JMF |
1731 | def _parse_page(self, url, video_id): |
1732 | webpage = self._download_webpage(url, video_id, | |
1733 | u'Downloading video info JSON', | |
1734 | u'unable to download video info JSON') | |
cdb30764 | 1735 | |
0b40544f | 1736 | response = json.loads(webpage) |
fa1bf9c6 | 1737 | if type(response) != list: |
1738 | error_text = response.get('error', 'unknown error') | |
decd1d17 | 1739 | raise ExtractorError(u'Justin.tv API: %s' % error_text) |
0b40544f DV |
1740 | info = [] |
1741 | for clip in response: | |
1742 | video_url = clip['video_file_url'] | |
1743 | if video_url: | |
1744 | video_extension = os.path.splitext(video_url)[1][1:] | |
fa1bf9c6 | 1745 | video_date = re.sub('-', '', clip['start_time'][:10]) |
1746 | video_uploader_id = clip.get('user_id', clip.get('channel_id')) | |
97f194c1 PH |
1747 | video_id = clip['id'] |
1748 | video_title = clip.get('title', video_id) | |
0b40544f | 1749 | info.append({ |
97f194c1 | 1750 | 'id': video_id, |
0b40544f | 1751 | 'url': video_url, |
97f194c1 | 1752 | 'title': video_title, |
fa1bf9c6 | 1753 | 'uploader': clip.get('channel_name', video_uploader_id), |
1754 | 'uploader_id': video_uploader_id, | |
0b40544f DV |
1755 | 'upload_date': video_date, |
1756 | 'ext': video_extension, | |
1757 | }) | |
2ab1c5ed DV |
1758 | return (len(response), info) |
1759 | ||
1760 | def _real_extract(self, url): | |
1761 | mobj = re.match(self._VALID_URL, url) | |
1762 | if mobj is None: | |
0e16f094 | 1763 | raise ExtractorError(u'invalid URL: %s' % url) |
cdb30764 | 1764 | |
0e16f094 | 1765 | api_base = 'http://api.justin.tv' |
2ab1c5ed | 1766 | paged = False |
0e16f094 | 1767 | if mobj.group('channelid'): |
2ab1c5ed | 1768 | paged = True |
0e16f094 PH |
1769 | video_id = mobj.group('channelid') |
1770 | api = api_base + '/channel/archives/%s.json' % video_id | |
1771 | elif mobj.group('chapterid'): | |
1772 | chapter_id = mobj.group('chapterid') | |
0e16f094 PH |
1773 | |
1774 | webpage = self._download_webpage(url, chapter_id) | |
1775 | m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage) | |
1776 | if not m: | |
f4f31688 | 1777 | raise ExtractorError(u'Cannot find archive of a chapter') |
0e16f094 | 1778 | archive_id = m.group(1) |
f4f31688 PH |
1779 | |
1780 | api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id | |
1781 | chapter_info_xml = self._download_webpage(api, chapter_id, | |
1782 | note=u'Downloading chapter information', | |
1783 | errnote=u'Chapter information download failed') | |
1784 | doc = xml.etree.ElementTree.fromstring(chapter_info_xml) | |
1785 | for a in doc.findall('.//archive'): | |
1786 | if archive_id == a.find('./id').text: | |
1787 | break | |
1788 | else: | |
1789 | raise ExtractorError(u'Could not find chapter in chapter information') | |
1790 | ||
1791 | video_url = a.find('./video_file_url').text | |
1792 | video_ext = video_url.rpartition('.')[2] or u'flv' | |
1793 | ||
db8fd71c | 1794 | chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id |
c43e5724 | 1795 | chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id, |
db8fd71c PH |
1796 | note='Downloading chapter metadata', |
1797 | errnote='Download of chapter metadata failed') | |
1798 | chapter_info = json.loads(chapter_info_json) | |
1799 | ||
4539dd30 PH |
1800 | bracket_start = int(doc.find('.//bracket_start').text) |
1801 | bracket_end = int(doc.find('.//bracket_end').text) | |
c43e5724 | 1802 | |
f4f31688 PH |
1803 | # TODO determine start (and probably fix up file) |
1804 | # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 | |
4539dd30 PH |
1805 | #video_url += u'?start=' + TODO:start_timestamp |
1806 | # bracket_start is 13290, but we want 51670615 | |
1807 | self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. ' | |
1808 | u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) | |
0e16f094 | 1809 | |
f4f31688 PH |
1810 | info = { |
1811 | 'id': u'c' + chapter_id, | |
1812 | 'url': video_url, | |
1813 | 'ext': video_ext, | |
db8fd71c PH |
1814 | 'title': chapter_info['title'], |
1815 | 'thumbnail': chapter_info['preview'], | |
1816 | 'description': chapter_info['description'], | |
c43e5724 PH |
1817 | 'uploader': chapter_info['channel']['display_name'], |
1818 | 'uploader_id': chapter_info['channel']['name'], | |
f4f31688 PH |
1819 | } |
1820 | return [info] | |
2ab1c5ed | 1821 | else: |
0e16f094 PH |
1822 | video_id = mobj.group('videoid') |
1823 | api = api_base + '/broadcast/by_archive/%s.json' % video_id | |
cdb30764 | 1824 | |
2ab1c5ed | 1825 | self.report_extraction(video_id) |
cdb30764 | 1826 | |
2ab1c5ed DV |
1827 | info = [] |
1828 | offset = 0 | |
4096b609 DV |
1829 | limit = self._JUSTIN_PAGE_LIMIT |
1830 | while True: | |
1831 | if paged: | |
1832 | self.report_download_page(video_id, offset) | |
2ab1c5ed | 1833 | page_url = api + ('?offset=%d&limit=%d' % (offset, limit)) |
46bfb422 | 1834 | page_count, page_info = self._parse_page(page_url, video_id) |
2ab1c5ed DV |
1835 | info.extend(page_info) |
1836 | if not paged or page_count != limit: | |
1837 | break | |
1838 | offset += limit | |
0b40544f | 1839 | return info |
21a9c6aa PH |
1840 | |
1841 | class FunnyOrDieIE(InfoExtractor): | |
1842 | _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$' | |
21a9c6aa | 1843 | |
21a9c6aa PH |
1844 | def _real_extract(self, url): |
1845 | mobj = re.match(self._VALID_URL, url) | |
1846 | if mobj is None: | |
decd1d17 | 1847 | raise ExtractorError(u'invalid URL: %s' % url) |
21a9c6aa PH |
1848 | |
1849 | video_id = mobj.group('id') | |
5f955171 | 1850 | webpage = self._download_webpage(url, video_id) |
21a9c6aa | 1851 | |
979a9dd4 | 1852 | video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', |
ac3e9394 | 1853 | webpage, u'video URL', flags=re.DOTALL) |
21a9c6aa | 1854 | |
979a9dd4 | 1855 | title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", |
468e2e92 | 1856 | r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL) |
21a9c6aa | 1857 | |
979a9dd4 | 1858 | video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"', |
84095012 | 1859 | webpage, u'description', fatal=False, flags=re.DOTALL) |
21a9c6aa PH |
1860 | |
1861 | info = { | |
1862 | 'id': video_id, | |
1863 | 'url': video_url, | |
1864 | 'ext': 'mp4', | |
1865 | 'title': title, | |
ac3e9394 | 1866 | 'description': video_description, |
21a9c6aa PH |
1867 | } |
1868 | return [info] | |
d0d4f277 | 1869 | |
e314ba67 | 1870 | class SteamIE(InfoExtractor): |
feecf225 | 1871 | _VALID_URL = r"""http://store\.steampowered\.com/ |
4c9f7a99 | 1872 | (agecheck/)? |
e314ba67 JMF |
1873 | (?P<urltype>video|app)/ #If the page is only for videos or for a game |
1874 | (?P<gameID>\d+)/? | |
1875 | (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID | |
1876 | """ | |
68f54207 JMF |
1877 | _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' |
1878 | _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' | |
4aeae91f | 1879 | |
89de9eb1 FV |
1880 | @classmethod |
1881 | def suitable(cls, url): | |
e314ba67 | 1882 | """Receives a URL and returns True if suitable for this IE.""" |
89de9eb1 | 1883 | return re.match(cls._VALID_URL, url, re.VERBOSE) is not None |
5f955171 | 1884 | |
e314ba67 JMF |
1885 | def _real_extract(self, url): |
1886 | m = re.match(self._VALID_URL, url, re.VERBOSE) | |
e314ba67 | 1887 | gameID = m.group('gameID') |
68f54207 JMF |
1888 | |
1889 | videourl = self._VIDEO_PAGE_TEMPLATE % gameID | |
5f955171 | 1890 | webpage = self._download_webpage(videourl, gameID) |
68f54207 JMF |
1891 | |
1892 | if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: | |
1893 | videourl = self._AGECHECK_TEMPLATE % gameID | |
1894 | self.report_age_confirmation() | |
1895 | webpage = self._download_webpage(videourl, gameID) | |
1896 | ||
1897 | self.report_extraction(gameID) | |
1898 | game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>', | |
1899 | webpage, 'game title') | |
1900 | ||
9e1cf0c2 | 1901 | urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," |
e314ba67 | 1902 | mweb = re.finditer(urlRE, webpage) |
5e9d042d JMF |
1903 | namesRE = r'<span class="title">(?P<videoName>.+?)</span>' |
1904 | titles = re.finditer(namesRE, webpage) | |
60bd48b1 JMF |
1905 | thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">' |
1906 | thumbs = re.finditer(thumbsRE, webpage) | |
e314ba67 | 1907 | videos = [] |
60bd48b1 | 1908 | for vid,vtitle,thumb in zip(mweb,titles,thumbs): |
e314ba67 | 1909 | video_id = vid.group('videoID') |
5f955171 PH |
1910 | title = vtitle.group('videoName') |
1911 | video_url = vid.group('videoURL') | |
60bd48b1 | 1912 | video_thumb = thumb.group('thumbnail') |
e314ba67 | 1913 | if not video_url: |
486f0c94 | 1914 | raise ExtractorError(u'Cannot find video url for %s' % video_id) |
e314ba67 JMF |
1915 | info = { |
1916 | 'id':video_id, | |
1917 | 'url':video_url, | |
1918 | 'ext': 'flv', | |
60bd48b1 JMF |
1919 | 'title': unescapeHTML(title), |
1920 | 'thumbnail': video_thumb | |
e314ba67 JMF |
1921 | } |
1922 | videos.append(info) | |
9e1cf0c2 | 1923 | return [self.playlist_result(videos, gameID, game_title)] |
ef0c8d5f | 1924 | |
278986ea | 1925 | class UstreamIE(InfoExtractor): |
ef0c8d5f | 1926 | _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' |
278986ea | 1927 | IE_NAME = u'ustream' |
ef0c8d5f | 1928 | |
278986ea JMF |
1929 | def _real_extract(self, url): |
1930 | m = re.match(self._VALID_URL, url) | |
1931 | video_id = m.group('videoID') | |
ac3e9394 | 1932 | |
278986ea | 1933 | video_url = u'http://tcdn.ustream.tv/video/%s' % video_id |
d830b7c2 | 1934 | webpage = self._download_webpage(url, video_id) |
ac3e9394 | 1935 | |
340fa211 | 1936 | self.report_extraction(video_id) |
ac3e9394 | 1937 | |
979a9dd4 | 1938 | video_title = self._html_search_regex(r'data-title="(?P<title>.+)"', |
ac3e9394 AB |
1939 | webpage, u'title') |
1940 | ||
979a9dd4 | 1941 | uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', |
ac3e9394 | 1942 | webpage, u'uploader', fatal=False, flags=re.DOTALL) |
ac3e9394 | 1943 | |
979a9dd4 | 1944 | thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"', |
ac3e9394 AB |
1945 | webpage, u'thumbnail', fatal=False) |
1946 | ||
278986ea | 1947 | info = { |
ac3e9394 AB |
1948 | 'id': video_id, |
1949 | 'url': video_url, | |
278986ea | 1950 | 'ext': 'flv', |
ac3e9394 | 1951 | 'title': video_title, |
340fa211 | 1952 | 'uploader': uploader, |
ac3e9394 AB |
1953 | 'thumbnail': thumbnail, |
1954 | } | |
340fa211 | 1955 | return info |
4aeae91f | 1956 | |
40634747 | 1957 | class WorldStarHipHopIE(InfoExtractor): |
180e689f | 1958 | _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)' |
40634747 JMS |
1959 | IE_NAME = u'WorldStarHipHop' |
1960 | ||
1961 | def _real_extract(self, url): | |
08ec0af7 JMS |
1962 | m = re.match(self._VALID_URL, url) |
1963 | video_id = m.group('id') | |
1964 | ||
ac3e9394 | 1965 | webpage_src = self._download_webpage(url, video_id) |
46bfb422 | 1966 | |
ac3e9394 AB |
1967 | video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)', |
1968 | webpage_src, u'video URL') | |
46bfb422 | 1969 | |
ac3e9394 AB |
1970 | if 'mp4' in video_url: |
1971 | ext = 'mp4' | |
40634747 | 1972 | else: |
ac3e9394 | 1973 | ext = 'flv' |
40634747 | 1974 | |
979a9dd4 | 1975 | video_title = self._html_search_regex(r"<title>(.*)</title>", |
ac3e9394 | 1976 | webpage_src, u'title') |
40634747 JMS |
1977 | |
1978 | # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. | |
979a9dd4 | 1979 | thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />', |
ac3e9394 AB |
1980 | webpage_src, u'thumbnail', fatal=False) |
1981 | ||
1982 | if not thumbnail: | |
40634747 JMS |
1983 | _title = r"""candytitles.*>(.*)</span>""" |
1984 | mobj = re.search(_title, webpage_src) | |
1985 | if mobj is not None: | |
ac3e9394 | 1986 | video_title = mobj.group(1) |
fa41fbd3 | 1987 | |
b3bcca08 | 1988 | results = [{ |
64c78d50 | 1989 | 'id': video_id, |
b3bcca08 | 1990 | 'url' : video_url, |
ac3e9394 | 1991 | 'title' : video_title, |
b3bcca08 JMS |
1992 | 'thumbnail' : thumbnail, |
1993 | 'ext' : ext, | |
1994 | }] | |
40634747 JMS |
1995 | return results |
1996 | ||
ca0a0bbe PH |
1997 | class RBMARadioIE(InfoExtractor): |
1998 | _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$' | |
1999 | ||
2000 | def _real_extract(self, url): | |
2001 | m = re.match(self._VALID_URL, url) | |
2002 | video_id = m.group('videoID') | |
2003 | ||
2004 | webpage = self._download_webpage(url, video_id) | |
ac3e9394 | 2005 | |
038a3a1a JMF |
2006 | json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$', |
2007 | webpage, u'json data', flags=re.MULTILINE) | |
ca0a0bbe PH |
2008 | |
2009 | try: | |
2010 | data = json.loads(json_data) | |
2011 | except ValueError as e: | |
2012 | raise ExtractorError(u'Invalid JSON: ' + str(e)) | |
2013 | ||
2014 | video_url = data['akamai_url'] + '&cbr=256' | |
2015 | url_parts = compat_urllib_parse_urlparse(video_url) | |
2016 | video_ext = url_parts.path.rpartition('.')[2] | |
2017 | info = { | |
2018 | 'id': video_id, | |
2019 | 'url': video_url, | |
2020 | 'ext': video_ext, | |
2021 | 'title': data['title'], | |
2022 | 'description': data.get('teaser_text'), | |
2023 | 'location': data.get('country_of_origin'), | |
2024 | 'uploader': data.get('host', {}).get('name'), | |
2025 | 'uploader_id': data.get('host', {}).get('slug'), | |
187f491a | 2026 | 'thumbnail': data.get('image', {}).get('large_url_2x'), |
ca0a0bbe PH |
2027 | 'duration': data.get('duration'), |
2028 | } | |
2029 | return [info] | |
4aeae91f | 2030 | |
991ba7fa JC |
2031 | |
2032 | class YouPornIE(InfoExtractor): | |
2033 | """Information extractor for youporn.com.""" | |
991ba7fa | 2034 | _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)' |
6324fd1d | 2035 | |
991ba7fa JC |
2036 | def _print_formats(self, formats): |
2037 | """Print all available formats""" | |
565f7519 | 2038 | print(u'Available formats:') |
ca6710ee JC |
2039 | print(u'ext\t\tformat') |
2040 | print(u'---------------------------------') | |
991ba7fa | 2041 | for format in formats: |
ca6710ee | 2042 | print(u'%s\t\t%s' % (format['ext'], format['format'])) |
991ba7fa JC |
2043 | |
2044 | def _specific(self, req_format, formats): | |
2045 | for x in formats: | |
2046 | if(x["format"]==req_format): | |
2047 | return x | |
2048 | return None | |
2049 | ||
991ba7fa JC |
2050 | def _real_extract(self, url): |
2051 | mobj = re.match(self._VALID_URL, url) | |
2052 | if mobj is None: | |
0c021ad1 | 2053 | raise ExtractorError(u'Invalid URL: %s' % url) |
ca6710ee | 2054 | video_id = mobj.group('videoid') |
991ba7fa | 2055 | |
629fcdd1 PH |
2056 | req = compat_urllib_request.Request(url) |
2057 | req.add_header('Cookie', 'age_verified=1') | |
2058 | webpage = self._download_webpage(req, video_id) | |
991ba7fa | 2059 | |
be95cac1 FV |
2060 | # Get JSON parameters |
2061 | json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') | |
2062 | try: | |
2063 | params = json.loads(json_params) | |
2064 | except: | |
2065 | raise ExtractorError(u'Invalid JSON') | |
991ba7fa | 2066 | |
be95cac1 FV |
2067 | self.report_extraction(video_id) |
2068 | try: | |
2069 | video_title = params['title'] | |
2070 | upload_date = unified_strdate(params['release_date_f']) | |
2071 | video_description = params['description'] | |
2072 | video_uploader = params['submitted_by'] | |
2073 | thumbnail = params['thumbnails'][0]['image'] | |
2074 | except KeyError: | |
2075 | raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1]) | |
991ba7fa JC |
2076 | |
2077 | # Get all of the formats available | |
ca6710ee | 2078 | DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' |
ac3e9394 AB |
2079 | download_list_html = self._search_regex(DOWNLOAD_LIST_RE, |
2080 | webpage, u'download list').strip() | |
991ba7fa JC |
2081 | |
2082 | # Get all of the links from the page | |
ca6710ee JC |
2083 | LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">' |
2084 | links = re.findall(LINK_RE, download_list_html) | |
991ba7fa | 2085 | if(len(links) == 0): |
629fcdd1 | 2086 | raise ExtractorError(u'ERROR: no known formats available for video') |
6324fd1d | 2087 | |
f17ce13a | 2088 | self.to_screen(u'Links found: %d' % len(links)) |
991ba7fa JC |
2089 | |
2090 | formats = [] | |
2091 | for link in links: | |
2092 | ||
2093 | # A link looks like this: | |
2094 | # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0 | |
2095 | # A path looks like this: | |
2096 | # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4 | |
ca6710ee JC |
2097 | video_url = unescapeHTML( link ) |
2098 | path = compat_urllib_parse_urlparse( video_url ).path | |
991ba7fa JC |
2099 | extension = os.path.splitext( path )[1][1:] |
2100 | format = path.split('/')[4].split('_')[:2] | |
2101 | size = format[0] | |
2102 | bitrate = format[1] | |
2103 | format = "-".join( format ) | |
78d3442b | 2104 | # title = u'%s-%s-%s' % (video_title, size, bitrate) |
991ba7fa JC |
2105 | |
2106 | formats.append({ | |
2107 | 'id': video_id, | |
2108 | 'url': video_url, | |
2109 | 'uploader': video_uploader, | |
2110 | 'upload_date': upload_date, | |
78d3442b | 2111 | 'title': video_title, |
991ba7fa JC |
2112 | 'ext': extension, |
2113 | 'format': format, | |
be95cac1 FV |
2114 | 'thumbnail': thumbnail, |
2115 | 'description': video_description | |
991ba7fa JC |
2116 | }) |
2117 | ||
2118 | if self._downloader.params.get('listformats', None): | |
2119 | self._print_formats(formats) | |
2120 | return | |
2121 | ||
2122 | req_format = self._downloader.params.get('format', None) | |
f17ce13a | 2123 | self.to_screen(u'Format: %s' % req_format) |
991ba7fa | 2124 | |
991ba7fa JC |
2125 | if req_format is None or req_format == 'best': |
2126 | return [formats[0]] | |
2127 | elif req_format == 'worst': | |
2128 | return [formats[-1]] | |
2129 | elif req_format in ('-1', 'all'): | |
2130 | return formats | |
2131 | else: | |
2132 | format = self._specific( req_format, formats ) | |
2133 | if result is None: | |
0c021ad1 | 2134 | raise ExtractorError(u'Requested format not available') |
991ba7fa JC |
2135 | return [format] |
2136 | ||
6324fd1d | 2137 | |
991ba7fa JC |
2138 | |
2139 | class PornotubeIE(InfoExtractor): | |
2140 | """Information extractor for pornotube.com.""" | |
991ba7fa | 2141 | _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$' |
991ba7fa | 2142 | |
991ba7fa JC |
2143 | def _real_extract(self, url): |
2144 | mobj = re.match(self._VALID_URL, url) | |
2145 | if mobj is None: | |
0c021ad1 | 2146 | raise ExtractorError(u'Invalid URL: %s' % url) |
991ba7fa | 2147 | |
ca6710ee JC |
2148 | video_id = mobj.group('videoid') |
2149 | video_title = mobj.group('title') | |
991ba7fa JC |
2150 | |
2151 | # Get webpage content | |
ca6710ee | 2152 | webpage = self._download_webpage(url, video_id) |
991ba7fa JC |
2153 | |
2154 | # Get the video URL | |
ca6710ee | 2155 | VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",' |
ac3e9394 AB |
2156 | video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url') |
2157 | video_url = compat_urllib_parse.unquote(video_url) | |
991ba7fa JC |
2158 | |
2159 | #Get the uploaded date | |
ca6710ee | 2160 | VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by' |
979a9dd4 | 2161 | upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False) |
ac3e9394 | 2162 | if upload_date: upload_date = unified_strdate(upload_date) |
991ba7fa JC |
2163 | |
2164 | info = {'id': video_id, | |
2165 | 'url': video_url, | |
2166 | 'uploader': None, | |
2167 | 'upload_date': upload_date, | |
2168 | 'title': video_title, | |
2169 | 'ext': 'flv', | |
565f7519 | 2170 | 'format': 'flv'} |
991ba7fa JC |
2171 | |
2172 | return [info] | |
2173 | ||
991ba7fa JC |
2174 | class YouJizzIE(InfoExtractor): |
2175 | """Information extractor for youjizz.com.""" | |
ca6710ee | 2176 | _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$' |
991ba7fa | 2177 | |
991ba7fa | 2178 | def _real_extract(self, url): |
ca6710ee JC |
2179 | mobj = re.match(self._VALID_URL, url) |
2180 | if mobj is None: | |
0c021ad1 | 2181 | raise ExtractorError(u'Invalid URL: %s' % url) |
ca6710ee JC |
2182 | |
2183 | video_id = mobj.group('videoid') | |
2184 | ||
2185 | # Get webpage content | |
2186 | webpage = self._download_webpage(url, video_id) | |
991ba7fa JC |
2187 | |
2188 | # Get the video title | |
979a9dd4 | 2189 | video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>', |
ac3e9394 | 2190 | webpage, u'title').strip() |
991ba7fa JC |
2191 | |
2192 | # Get the embed page | |
db16276b | 2193 | result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage) |
991ba7fa | 2194 | if result is None: |
db16276b | 2195 | raise ExtractorError(u'ERROR: unable to extract embed page') |
991ba7fa | 2196 | |
ca6710ee JC |
2197 | embed_page_url = result.group(0).strip() |
2198 | video_id = result.group('videoid') | |
6324fd1d | 2199 | |
ca6710ee JC |
2200 | webpage = self._download_webpage(embed_page_url, video_id) |
2201 | ||
991ba7fa | 2202 | # Get the video URL |
ac3e9394 AB |
2203 | video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', |
2204 | webpage, u'video URL') | |
991ba7fa JC |
2205 | |
2206 | info = {'id': video_id, | |
2207 | 'url': video_url, | |
991ba7fa JC |
2208 | 'title': video_title, |
2209 | 'ext': 'flv', | |
2210 | 'format': 'flv', | |
991ba7fa JC |
2211 | 'player_url': embed_page_url} |
2212 | ||
2213 | return [info] | |
2214 | ||
ccf65f9d PH |
2215 | class EightTracksIE(InfoExtractor): |
2216 | IE_NAME = '8tracks' | |
25580f32 | 2217 | _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' |
ccf65f9d PH |
2218 | |
2219 | def _real_extract(self, url): | |
2220 | mobj = re.match(self._VALID_URL, url) | |
2221 | if mobj is None: | |
2222 | raise ExtractorError(u'Invalid URL: %s' % url) | |
2223 | playlist_id = mobj.group('id') | |
2224 | ||
2225 | webpage = self._download_webpage(url, playlist_id) | |
2226 | ||
ac3e9394 | 2227 | json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL) |
ccf65f9d PH |
2228 | data = json.loads(json_like) |
2229 | ||
2230 | session = str(random.randint(0, 1000000000)) | |
2231 | mix_id = data['id'] | |
2232 | track_count = data['tracks_count'] | |
2233 | first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) | |
2234 | next_url = first_url | |
2235 | res = [] | |
2236 | for i in itertools.count(): | |
2237 | api_json = self._download_webpage(next_url, playlist_id, | |
2238 | note=u'Downloading song information %s/%s' % (str(i+1), track_count), | |
2239 | errnote=u'Failed to download song information') | |
2240 | api_data = json.loads(api_json) | |
2241 | track_data = api_data[u'set']['track'] | |
2242 | info = { | |
2243 | 'id': track_data['id'], | |
2244 | 'url': track_data['track_file_stream_url'], | |
da4de959 PH |
2245 | 'title': track_data['performer'] + u' - ' + track_data['name'], |
2246 | 'raw_title': track_data['name'], | |
2247 | 'uploader_id': data['user']['login'], | |
ccf65f9d PH |
2248 | 'ext': 'm4a', |
2249 | } | |
2250 | res.append(info) | |
2251 | if api_data['set']['at_last_track']: | |
2252 | break | |
2253 | next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id']) | |
2254 | return res | |
991ba7fa | 2255 | |
da06e2da OK |
2256 | class KeekIE(InfoExtractor): |
2257 | _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)' | |
2258 | IE_NAME = u'keek' | |
2259 | ||
2260 | def _real_extract(self, url): | |
2261 | m = re.match(self._VALID_URL, url) | |
2262 | video_id = m.group('videoID') | |
ac3e9394 | 2263 | |
da06e2da OK |
2264 | video_url = u'http://cdn.keek.com/keek/video/%s' % video_id |
2265 | thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id | |
2266 | webpage = self._download_webpage(url, video_id) | |
ac3e9394 | 2267 | |
979a9dd4 | 2268 | video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"', |
ac3e9394 | 2269 | webpage, u'title') |
ac3e9394 | 2270 | |
979a9dd4 | 2271 | uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', |
ac3e9394 | 2272 | webpage, u'uploader', fatal=False) |
ac3e9394 | 2273 | |
da06e2da | 2274 | info = { |
f10b2a9c FV |
2275 | 'id': video_id, |
2276 | 'url': video_url, | |
da06e2da | 2277 | 'ext': 'mp4', |
ac3e9394 | 2278 | 'title': video_title, |
da06e2da OK |
2279 | 'thumbnail': thumbnail, |
2280 | 'uploader': uploader | |
f0877a44 | 2281 | } |
da06e2da OK |
2282 | return [info] |
2283 | ||
3a468f2d | 2284 | class TEDIE(InfoExtractor): |
feecf225 | 2285 | _VALID_URL=r'''http://www\.ted\.com/ |
414638cd JMF |
2286 | ( |
2287 | ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist | |
2288 | | | |
2289 | ((?P<type_talk>talks)) # We have a simple talk | |
2290 | ) | |
2e2038dc | 2291 | (/lang/(.*?))? # The url may contain the language |
414638cd JMF |
2292 | /(?P<name>\w+) # Here goes the name and then ".html" |
2293 | ''' | |
2294 | ||
89de9eb1 FV |
2295 | @classmethod |
2296 | def suitable(cls, url): | |
414638cd | 2297 | """Receives a URL and returns True if suitable for this IE.""" |
89de9eb1 | 2298 | return re.match(cls._VALID_URL, url, re.VERBOSE) is not None |
414638cd | 2299 | |
3a468f2d | 2300 | def _real_extract(self, url): |
414638cd JMF |
2301 | m=re.match(self._VALID_URL, url, re.VERBOSE) |
2302 | if m.group('type_talk'): | |
2303 | return [self._talk_info(url)] | |
2304 | else : | |
2305 | playlist_id=m.group('playlist_id') | |
2306 | name=m.group('name') | |
f17ce13a | 2307 | self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name)) |
e905b6f8 | 2308 | return [self._playlist_videos_info(url,name,playlist_id)] |
414638cd | 2309 | |
414638cd JMF |
2310 | def _playlist_videos_info(self,url,name,playlist_id=0): |
2311 | '''Returns the videos of the playlist''' | |
2312 | video_RE=r''' | |
2313 | <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)" | |
2314 | ([.\s]*?)data-playlist_item_id="(\d+)" | |
2315 | ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)" | |
2316 | ''' | |
c85538db | 2317 | video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>' |
414638cd JMF |
2318 | webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage') |
2319 | m_videos=re.finditer(video_RE,webpage,re.VERBOSE) | |
2320 | m_names=re.finditer(video_name_RE,webpage) | |
e905b6f8 | 2321 | |
f4c8bbcf JMF |
2322 | playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>', |
2323 | webpage, 'playlist title') | |
e905b6f8 JMF |
2324 | |
2325 | playlist_entries = [] | |
414638cd | 2326 | for m_video, m_name in zip(m_videos,m_names): |
c85538db JMF |
2327 | video_id=m_video.group('video_id') |
2328 | talk_url='http://www.ted.com%s' % m_name.group('talk_url') | |
e905b6f8 JMF |
2329 | playlist_entries.append(self.url_result(talk_url, 'TED')) |
2330 | return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title) | |
c85538db | 2331 | |
414638cd JMF |
2332 | def _talk_info(self, url, video_id=0): |
2333 | """Return the video for the talk in the url""" | |
f4c8bbcf JMF |
2334 | m = re.match(self._VALID_URL, url,re.VERBOSE) |
2335 | video_name = m.group('name') | |
2336 | webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name) | |
2337 | self.report_extraction(video_name) | |
414638cd | 2338 | # If the url includes the language we get the title translated |
f4c8bbcf JMF |
2339 | title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>', |
2340 | webpage, 'title') | |
2341 | json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>', | |
2342 | webpage, 'json data') | |
2343 | info = json.loads(json_data) | |
2344 | desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>', | |
2345 | webpage, 'description', flags = re.DOTALL) | |
2346 | ||
2347 | thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"', | |
2348 | webpage, 'thumbnail') | |
3a468f2d | 2349 | info = { |
f4c8bbcf JMF |
2350 | 'id': info['id'], |
2351 | 'url': info['htmlStreams'][-1]['file'], | |
3a468f2d | 2352 | 'ext': 'mp4', |
c85538db | 2353 | 'title': title, |
f4c8bbcf JMF |
2354 | 'thumbnail': thumbnail, |
2355 | 'description': desc, | |
414638cd JMF |
2356 | } |
2357 | return info | |
da06e2da | 2358 | |
58994225 | 2359 | class MySpassIE(InfoExtractor): |
1ad5d872 | 2360 | _VALID_URL = r'http://www.myspass.de/.*' |
6324fd1d | 2361 | |
1ad5d872 | 2362 | def _real_extract(self, url): |
2363 | META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' | |
58994225 | 2364 | |
1ad5d872 | 2365 | # video id is the last path element of the URL |
2366 | # usually there is a trailing slash, so also try the second but last | |
2367 | url_path = compat_urllib_parse_urlparse(url).path | |
2368 | url_parent_path, video_id = os.path.split(url_path) | |
2369 | if not video_id: | |
2370 | _, video_id = os.path.split(url_parent_path) | |
6324fd1d | 2371 | |
1ad5d872 | 2372 | # get metadata |
2373 | metadata_url = META_DATA_URL_TEMPLATE % video_id | |
2374 | metadata_text = self._download_webpage(metadata_url, video_id) | |
2375 | metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8')) | |
6324fd1d | 2376 | |
1ad5d872 | 2377 | # extract values from metadata |
2378 | url_flv_el = metadata.find('url_flv') | |
2379 | if url_flv_el is None: | |
0c021ad1 | 2380 | raise ExtractorError(u'Unable to extract download url') |
1ad5d872 | 2381 | video_url = url_flv_el.text |
2382 | extension = os.path.splitext(video_url)[1][1:] | |
2383 | title_el = metadata.find('title') | |
2384 | if title_el is None: | |
0c021ad1 | 2385 | raise ExtractorError(u'Unable to extract title') |
1ad5d872 | 2386 | title = title_el.text |
2387 | format_id_el = metadata.find('format_id') | |
2388 | if format_id_el is None: | |
2389 | format = ext | |
2390 | else: | |
2391 | format = format_id_el.text | |
2392 | description_el = metadata.find('description') | |
2393 | if description_el is not None: | |
2394 | description = description_el.text | |
2395 | else: | |
2396 | description = None | |
2397 | imagePreview_el = metadata.find('imagePreview') | |
2398 | if imagePreview_el is not None: | |
2399 | thumbnail = imagePreview_el.text | |
2400 | else: | |
2401 | thumbnail = None | |
2402 | info = { | |
2403 | 'id': video_id, | |
2404 | 'url': video_url, | |
2405 | 'title': title, | |
2406 | 'ext': extension, | |
2407 | 'format': format, | |
2408 | 'thumbnail': thumbnail, | |
2409 | 'description': description | |
2410 | } | |
2411 | return [info] | |
2412 | ||
e32b06e9 | 2413 | class SpiegelIE(InfoExtractor): |
1f46c152 | 2414 | _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' |
e32b06e9 PH |
2415 | |
2416 | def _real_extract(self, url): | |
2417 | m = re.match(self._VALID_URL, url) | |
2418 | video_id = m.group('videoID') | |
2419 | ||
2420 | webpage = self._download_webpage(url, video_id) | |
ac3e9394 | 2421 | |
979a9dd4 | 2422 | video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>', |
ac3e9394 | 2423 | webpage, u'title') |
e32b06e9 PH |
2424 | |
2425 | xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' | |
2426 | xml_code = self._download_webpage(xml_url, video_id, | |
2427 | note=u'Downloading XML', errnote=u'Failed to download XML') | |
2428 | ||
2429 | idoc = xml.etree.ElementTree.fromstring(xml_code) | |
2430 | last_type = idoc[-1] | |
2431 | filename = last_type.findall('./filename')[0].text | |
2432 | duration = float(last_type.findall('./duration')[0].text) | |
2433 | ||
2434 | video_url = 'http://video2.spiegel.de/flash/' + filename | |
2435 | video_ext = filename.rpartition('.')[2] | |
2436 | info = { | |
2437 | 'id': video_id, | |
2438 | 'url': video_url, | |
2439 | 'ext': video_ext, | |
2440 | 'title': video_title, | |
2441 | 'duration': duration, | |
2442 | } | |
2443 | return [info] | |
2444 | ||
0cd35867 | 2445 | class LiveLeakIE(InfoExtractor): |
43113d92 | 2446 | |
0cd35867 | 2447 | _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)' |
43113d92 | 2448 | IE_NAME = u'liveleak' |
2449 | ||
2450 | def _real_extract(self, url): | |
2451 | mobj = re.match(self._VALID_URL, url) | |
2452 | if mobj is None: | |
0c021ad1 | 2453 | raise ExtractorError(u'Invalid URL: %s' % url) |
43113d92 | 2454 | |
0cd35867 | 2455 | video_id = mobj.group('video_id') |
43113d92 | 2456 | |
2457 | webpage = self._download_webpage(url, video_id) | |
2458 | ||
ac3e9394 AB |
2459 | video_url = self._search_regex(r'file: "(.*?)",', |
2460 | webpage, u'video URL') | |
0cd35867 | 2461 | |
979a9dd4 FV |
2462 | video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"', |
2463 | webpage, u'title').replace('LiveLeak.com -', '').strip() | |
43113d92 | 2464 | |
979a9dd4 | 2465 | video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"', |
ac3e9394 | 2466 | webpage, u'description', fatal=False) |
43113d92 | 2467 | |
979a9dd4 | 2468 | video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>', |
ac3e9394 | 2469 | webpage, u'uploader', fatal=False) |
43113d92 | 2470 | |
2471 | info = { | |
2472 | 'id': video_id, | |
2473 | 'url': video_url, | |
2474 | 'ext': 'mp4', | |
ac3e9394 AB |
2475 | 'title': video_title, |
2476 | 'description': video_description, | |
2477 | 'uploader': video_uploader | |
43113d92 | 2478 | } |
2479 | ||
2480 | return [info] | |
2481 | ||
f2cd958c | 2482 | |
f2cd958c | 2483 | |
c15e0241 | 2484 | class TumblrIE(InfoExtractor): |
feecf225 | 2485 | _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)' |
c15e0241 JMF |
2486 | |
2487 | def _real_extract(self, url): | |
2488 | m_url = re.match(self._VALID_URL, url) | |
2489 | video_id = m_url.group('id') | |
2490 | blog = m_url.group('blog_name') | |
2491 | ||
2492 | url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) | |
2493 | webpage = self._download_webpage(url, video_id) | |
2494 | ||
feecf225 | 2495 | re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id) |
c15e0241 JMF |
2496 | video = re.search(re_video, webpage) |
2497 | if video is None: | |
ac3e9394 | 2498 | raise ExtractorError(u'Unable to extract video') |
c15e0241 JMF |
2499 | video_url = video.group('video_url') |
2500 | ext = video.group('ext') | |
2501 | ||
ac3e9394 AB |
2502 | video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22', |
2503 | webpage, u'thumbnail', fatal=False) # We pick the first poster | |
2504 | if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '') | |
c15e0241 JMF |
2505 | |
2506 | # The only place where you can get a title, it's not complete, | |
2507 | # but searching in other places doesn't work for all videos | |
979a9dd4 | 2508 | video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>', |
ac3e9394 | 2509 | webpage, u'title', flags=re.DOTALL) |
c15e0241 JMF |
2510 | |
2511 | return [{'id': video_id, | |
2512 | 'url': video_url, | |
ac3e9394 AB |
2513 | 'title': video_title, |
2514 | 'thumbnail': video_thumbnail, | |
c15e0241 JMF |
2515 | 'ext': ext |
2516 | }] | |
2517 | ||
aed523ec | 2518 | class BandcampIE(InfoExtractor): |
feecf225 | 2519 | _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' |
aed523ec JMF |
2520 | |
2521 | def _real_extract(self, url): | |
2522 | mobj = re.match(self._VALID_URL, url) | |
2523 | title = mobj.group('title') | |
2524 | webpage = self._download_webpage(url, title) | |
2525 | # We get the link to the free download page | |
2526 | m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) | |
2527 | if m_download is None: | |
ac3e9394 | 2528 | raise ExtractorError(u'No free songs found') |
0c021ad1 | 2529 | |
aed523ec JMF |
2530 | download_link = m_download.group(1) |
2531 | id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', | |
2532 | webpage, re.MULTILINE|re.DOTALL).group('id') | |
2533 | ||
2534 | download_webpage = self._download_webpage(download_link, id, | |
2535 | 'Downloading free downloads page') | |
2536 | # We get the dictionary of the track from some javascrip code | |
2537 | info = re.search(r'items: (.*?),$', | |
2538 | download_webpage, re.MULTILINE).group(1) | |
2539 | info = json.loads(info)[0] | |
2540 | # We pick mp3-320 for now, until format selection can be easily implemented. | |
2541 | mp3_info = info[u'downloads'][u'mp3-320'] | |
2542 | # If we try to use this url it says the link has expired | |
2543 | initial_url = mp3_info[u'url'] | |
feecf225 | 2544 | re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' |
aed523ec JMF |
2545 | m_url = re.match(re_url, initial_url) |
2546 | #We build the url we will use to get the final track url | |
2547 | # This url is build in Bandcamp in the script download_bunde_*.js | |
2548 | request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts')) | |
2549 | final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url') | |
2550 | # If we could correctly generate the .rand field the url would be | |
2551 | #in the "download_url" key | |
2552 | final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) | |
2553 | ||
2554 | track_info = {'id':id, | |
2555 | 'title' : info[u'title'], | |
ac3e9394 AB |
2556 | 'ext' : 'mp3', |
2557 | 'url' : final_url, | |
aed523ec | 2558 | 'thumbnail' : info[u'thumb_url'], |
ac3e9394 | 2559 | 'uploader' : info[u'artist'] |
aed523ec JMF |
2560 | } |
2561 | ||
2562 | return [track_info] | |
2563 | ||
c34407d1 | 2564 | class RedTubeIE(InfoExtractor): |
5e34d2eb YUK |
2565 | """Information Extractor for redtube""" |
2566 | _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)' | |
5e34d2eb YUK |
2567 | |
2568 | def _real_extract(self,url): | |
2569 | mobj = re.match(self._VALID_URL, url) | |
2570 | if mobj is None: | |
c34407d1 PH |
2571 | raise ExtractorError(u'Invalid URL: %s' % url) |
2572 | ||
5e34d2eb YUK |
2573 | video_id = mobj.group('id') |
2574 | video_extension = 'mp4' | |
2575 | webpage = self._download_webpage(url, video_id) | |
ac3e9394 | 2576 | |
5e34d2eb | 2577 | self.report_extraction(video_id) |
c34407d1 | 2578 | |
979a9dd4 | 2579 | video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">', |
ac3e9394 | 2580 | webpage, u'video URL') |
c34407d1 | 2581 | |
979a9dd4 | 2582 | video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', |
ac3e9394 | 2583 | webpage, u'title') |
5e34d2eb YUK |
2584 | |
2585 | return [{ | |
2586 | 'id': video_id, | |
2587 | 'url': video_url, | |
2588 | 'ext': video_extension, | |
2589 | 'title': video_title, | |
2590 | }] | |
7f5bd09b | 2591 | |
2592 | class InaIE(InfoExtractor): | |
2593 | """Information Extractor for Ina.fr""" | |
d4f76f16 | 2594 | _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*' |
dfb9323c | 2595 | |
7f5bd09b | 2596 | def _real_extract(self,url): |
2597 | mobj = re.match(self._VALID_URL, url) | |
dfb9323c | 2598 | |
7f5bd09b | 2599 | video_id = mobj.group('id') |
dfb9323c PH |
2600 | mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id |
2601 | video_extension = 'mp4' | |
2602 | webpage = self._download_webpage(mrss_url, video_id) | |
7f5bd09b | 2603 | |
ac3e9394 | 2604 | self.report_extraction(video_id) |
dfb9323c | 2605 | |
979a9dd4 | 2606 | video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)', |
ac3e9394 AB |
2607 | webpage, u'video URL') |
2608 | ||
2609 | video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>', | |
2610 | webpage, u'title') | |
7f5bd09b | 2611 | |
2612 | return [{ | |
2613 | 'id': video_id, | |
2614 | 'url': video_url, | |
2615 | 'ext': video_extension, | |
2616 | 'title': video_title, | |
2617 | }] | |
e32b06e9 | 2618 | |
d4f76f16 | 2619 | class HowcastIE(InfoExtractor): |
5b0d3cc0 AB |
2620 | """Information Extractor for Howcast.com""" |
2621 | _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)' | |
d4f76f16 FV |
2622 | |
2623 | def _real_extract(self, url): | |
2624 | mobj = re.match(self._VALID_URL, url) | |
2625 | ||
2626 | video_id = mobj.group('id') | |
2627 | webpage_url = 'http://www.howcast.com/videos/' + video_id | |
2628 | webpage = self._download_webpage(webpage_url, video_id) | |
2629 | ||
b1d568f0 JMF |
2630 | self.report_extraction(video_id) |
2631 | ||
ac3e9394 AB |
2632 | video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)', |
2633 | webpage, u'video URL') | |
d4f76f16 | 2634 | |
979a9dd4 | 2635 | video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', |
ac3e9394 | 2636 | webpage, u'title') |
d4f76f16 | 2637 | |
979a9dd4 | 2638 | video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', |
ac3e9394 | 2639 | webpage, u'description', fatal=False) |
d4f76f16 | 2640 | |
979a9dd4 | 2641 | thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'', |
ac3e9394 | 2642 | webpage, u'thumbnail', fatal=False) |
b1d568f0 | 2643 | |
d4f76f16 FV |
2644 | return [{ |
2645 | 'id': video_id, | |
2646 | 'url': video_url, | |
2647 | 'ext': 'mp4', | |
2648 | 'title': video_title, | |
2649 | 'description': video_description, | |
b1d568f0 | 2650 | 'thumbnail': thumbnail, |
d4f76f16 FV |
2651 | }] |
2652 | ||
5b0d3cc0 AB |
2653 | class VineIE(InfoExtractor): |
2654 | """Information Extractor for Vine.co""" | |
2655 | _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)' | |
2656 | ||
2657 | def _real_extract(self, url): | |
5b0d3cc0 AB |
2658 | mobj = re.match(self._VALID_URL, url) |
2659 | ||
2660 | video_id = mobj.group('id') | |
2661 | webpage_url = 'https://vine.co/v/' + video_id | |
2662 | webpage = self._download_webpage(webpage_url, video_id) | |
2663 | ||
17bd1b2f JMF |
2664 | self.report_extraction(video_id) |
2665 | ||
979a9dd4 | 2666 | video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"', |
ac3e9394 | 2667 | webpage, u'video URL') |
5b0d3cc0 | 2668 | |
979a9dd4 | 2669 | video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"', |
ac3e9394 | 2670 | webpage, u'title') |
5b0d3cc0 | 2671 | |
979a9dd4 | 2672 | thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"', |
ac3e9394 | 2673 | webpage, u'thumbnail', fatal=False) |
17bd1b2f | 2674 | |
979a9dd4 | 2675 | uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>', |
ac3e9394 | 2676 | webpage, u'uploader', fatal=False, flags=re.DOTALL) |
17bd1b2f | 2677 | |
5b0d3cc0 | 2678 | return [{ |
17bd1b2f JMF |
2679 | 'id': video_id, |
2680 | 'url': video_url, | |
2681 | 'ext': 'mp4', | |
2682 | 'title': video_title, | |
2683 | 'thumbnail': thumbnail, | |
2684 | 'uploader': uploader, | |
5b0d3cc0 AB |
2685 | }] |
2686 | ||
afef36c9 AB |
2687 | class FlickrIE(InfoExtractor): |
2688 | """Information Extractor for Flickr videos""" | |
51d2453c | 2689 | _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*' |
afef36c9 AB |
2690 | |
2691 | def _real_extract(self, url): | |
2692 | mobj = re.match(self._VALID_URL, url) | |
2693 | ||
2694 | video_id = mobj.group('id') | |
2695 | video_uploader_id = mobj.group('uploader_id') | |
2696 | webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id | |
2697 | webpage = self._download_webpage(webpage_url, video_id) | |
2698 | ||
ac3e9394 | 2699 | secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret') |
afef36c9 AB |
2700 | |
2701 | first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self' | |
51d2453c | 2702 | first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage') |
afef36c9 | 2703 | |
979a9dd4 | 2704 | node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>', |
ac3e9394 | 2705 | first_xml, u'node_id') |
afef36c9 AB |
2706 | |
2707 | second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1' | |
51d2453c FV |
2708 | second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage') |
2709 | ||
2710 | self.report_extraction(video_id) | |
afef36c9 AB |
2711 | |
2712 | mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml) | |
2713 | if mobj is None: | |
2714 | raise ExtractorError(u'Unable to extract video url') | |
2715 | video_url = mobj.group(1) + unescapeHTML(mobj.group(2)) | |
2716 | ||
979a9dd4 | 2717 | video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')', |
ac3e9394 | 2718 | webpage, u'video title') |
afef36c9 | 2719 | |
979a9dd4 | 2720 | video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')', |
ac3e9394 | 2721 | webpage, u'description', fatal=False) |
afef36c9 | 2722 | |
979a9dd4 | 2723 | thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')', |
ac3e9394 | 2724 | webpage, u'thumbnail', fatal=False) |
afef36c9 AB |
2725 | |
2726 | return [{ | |
51d2453c FV |
2727 | 'id': video_id, |
2728 | 'url': video_url, | |
2729 | 'ext': 'mp4', | |
2730 | 'title': video_title, | |
afef36c9 | 2731 | 'description': video_description, |
51d2453c | 2732 | 'thumbnail': thumbnail, |
afef36c9 AB |
2733 | 'uploader_id': video_uploader_id, |
2734 | }] | |
2735 | ||
45014296 JMF |
2736 | class TeamcocoIE(InfoExtractor): |
2737 | _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)' | |
2738 | ||
2739 | def _real_extract(self, url): | |
2740 | mobj = re.match(self._VALID_URL, url) | |
2741 | if mobj is None: | |
2742 | raise ExtractorError(u'Invalid URL: %s' % url) | |
2743 | url_title = mobj.group('url_title') | |
2744 | webpage = self._download_webpage(url, url_title) | |
2745 | ||
979a9dd4 | 2746 | video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"', |
ac3e9394 | 2747 | webpage, u'video id') |
45014296 JMF |
2748 | |
2749 | self.report_extraction(video_id) | |
2750 | ||
979a9dd4 | 2751 | video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"', |
ac3e9394 | 2752 | webpage, u'title') |
45014296 | 2753 | |
979a9dd4 | 2754 | thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"', |
ac3e9394 | 2755 | webpage, u'thumbnail', fatal=False) |
45014296 | 2756 | |
979a9dd4 | 2757 | video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"', |
ac3e9394 | 2758 | webpage, u'description', fatal=False) |
45014296 JMF |
2759 | |
2760 | data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id | |
2761 | data = self._download_webpage(data_url, video_id, 'Downloading data webpage') | |
ac3e9394 | 2762 | |
979a9dd4 | 2763 | video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>', |
ac3e9394 | 2764 | data, u'video URL') |
45014296 JMF |
2765 | |
2766 | return [{ | |
2767 | 'id': video_id, | |
2768 | 'url': video_url, | |
2769 | 'ext': 'mp4', | |
2770 | 'title': video_title, | |
2771 | 'thumbnail': thumbnail, | |
ac3e9394 | 2772 | 'description': video_description, |
45014296 | 2773 | }] |
84095012 | 2774 | |
71e458d4 YUK |
2775 | class XHamsterIE(InfoExtractor): |
2776 | """Information Extractor for xHamster""" | |
2777 | _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html' | |
2778 | ||
2779 | def _real_extract(self,url): | |
2780 | mobj = re.match(self._VALID_URL, url) | |
2781 | ||
2782 | video_id = mobj.group('id') | |
84095012 | 2783 | mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id |
71e458d4 | 2784 | webpage = self._download_webpage(mrss_url, video_id) |
84095012 | 2785 | |
71e458d4 YUK |
2786 | mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) |
2787 | if mobj is None: | |
2788 | raise ExtractorError(u'Unable to extract media URL') | |
2789 | if len(mobj.group('server')) == 0: | |
2790 | video_url = compat_urllib_parse.unquote(mobj.group('file')) | |
2791 | else: | |
2792 | video_url = mobj.group('server')+'/key='+mobj.group('file') | |
2793 | video_extension = video_url.split('.')[-1] | |
2794 | ||
979a9dd4 | 2795 | video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', |
84095012 | 2796 | webpage, u'title') |
71e458d4 | 2797 | |
8b59a986 | 2798 | # Can't see the description anywhere in the UI |
979a9dd4 | 2799 | # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)', |
8b59a986 FV |
2800 | # webpage, u'description', fatal=False) |
2801 | # if video_description: video_description = unescapeHTML(video_description) | |
71e458d4 YUK |
2802 | |
2803 | mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) | |
84095012 FV |
2804 | if mobj: |
2805 | video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') | |
71e458d4 | 2806 | else: |
84095012 FV |
2807 | video_upload_date = None |
2808 | self._downloader.report_warning(u'Unable to extract upload date') | |
71e458d4 | 2809 | |
78d3442b | 2810 | video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', |
84095012 FV |
2811 | webpage, u'uploader id', default=u'anonymous') |
2812 | ||
2813 | video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'', | |
2814 | webpage, u'thumbnail', fatal=False) | |
71e458d4 YUK |
2815 | |
2816 | return [{ | |
2817 | 'id': video_id, | |
2818 | 'url': video_url, | |
2819 | 'ext': video_extension, | |
2820 | 'title': video_title, | |
8b59a986 | 2821 | # 'description': video_description, |
71e458d4 YUK |
2822 | 'upload_date': video_upload_date, |
2823 | 'uploader_id': video_uploader_id, | |
2824 | 'thumbnail': video_thumbnail | |
2825 | }] | |
afef36c9 | 2826 | |
157b864a YK |
2827 | class HypemIE(InfoExtractor): |
2828 | """Information Extractor for hypem""" | |
2829 | _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' | |
2830 | ||
868d62a5 | 2831 | def _real_extract(self, url): |
157b864a YK |
2832 | mobj = re.match(self._VALID_URL, url) |
2833 | if mobj is None: | |
2834 | raise ExtractorError(u'Invalid URL: %s' % url) | |
868d62a5 FV |
2835 | track_id = mobj.group(1) |
2836 | ||
2837 | data = { 'ax': 1, 'ts': time.time() } | |
157b864a | 2838 | data_encoded = compat_urllib_parse.urlencode(data) |
868d62a5 | 2839 | complete_url = url + "?" + data_encoded |
157b864a | 2840 | request = compat_urllib_request.Request(complete_url) |
868d62a5 | 2841 | response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url') |
157b864a | 2842 | cookie = urlh.headers.get('Set-Cookie', '') |
868d62a5 FV |
2843 | |
2844 | self.report_extraction(track_id) | |
84095012 | 2845 | |
979a9dd4 | 2846 | html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>', |
84095012 | 2847 | response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip() |
157b864a YK |
2848 | try: |
2849 | track_list = json.loads(html_tracks) | |
868d62a5 | 2850 | track = track_list[u'tracks'][0] |
157b864a | 2851 | except ValueError: |
868d62a5 FV |
2852 | raise ExtractorError(u'Hypemachine contained invalid JSON.') |
2853 | ||
2854 | key = track[u"key"] | |
2855 | track_id = track[u"id"] | |
2856 | artist = track[u"artist"] | |
2857 | title = track[u"song"] | |
2858 | ||
2859 | serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key)) | |
157b864a YK |
2860 | request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) |
2861 | request.add_header('cookie', cookie) | |
868d62a5 FV |
2862 | song_data_json = self._download_webpage(request, track_id, u'Downloading metadata') |
2863 | try: | |
2864 | song_data = json.loads(song_data_json) | |
2865 | except ValueError: | |
2866 | raise ExtractorError(u'Hypemachine contained invalid JSON.') | |
157b864a | 2867 | final_url = song_data[u"url"] |
868d62a5 | 2868 | |
157b864a | 2869 | return [{ |
868d62a5 | 2870 | 'id': track_id, |
157b864a YK |
2871 | 'url': final_url, |
2872 | 'ext': "mp3", | |
2873 | 'title': title, | |
2874 | 'artist': artist, | |
2875 | }] | |
2876 | ||
ecb3e676 YK |
2877 | class Vbox7IE(InfoExtractor): |
2878 | """Information Extractor for Vbox7""" | |
2879 | _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)' | |
2880 | ||
2881 | def _real_extract(self,url): | |
2882 | mobj = re.match(self._VALID_URL, url) | |
2883 | if mobj is None: | |
2884 | raise ExtractorError(u'Invalid URL: %s' % url) | |
2885 | video_id = mobj.group(1) | |
2886 | ||
2887 | redirect_page, urlh = self._download_webpage_handle(url, video_id) | |
0251f9c9 FV |
2888 | new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location') |
2889 | redirect_url = urlh.geturl() + new_location | |
ecb3e676 YK |
2890 | webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page') |
2891 | ||
0251f9c9 FV |
2892 | title = self._html_search_regex(r'<title>(.*)</title>', |
2893 | webpage, u'title').split('/')[0].strip() | |
ecb3e676 YK |
2894 | |
2895 | ext = "flv" | |
2896 | info_url = "http://vbox7.com/play/magare.do" | |
2897 | data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id}) | |
2898 | info_request = compat_urllib_request.Request(info_url, data) | |
2899 | info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | |
2900 | info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage') | |
2901 | if info_response is None: | |
2902 | raise ExtractorError(u'Unable to extract the media url') | |
cd453d38 | 2903 | (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&')) |
ecb3e676 YK |
2904 | |
2905 | return [{ | |
cd453d38 JMF |
2906 | 'id': video_id, |
2907 | 'url': final_url, | |
2908 | 'ext': ext, | |
2909 | 'title': title, | |
2910 | 'thumbnail': thumbnail_url, | |
ecb3e676 | 2911 | }] |
157b864a | 2912 | |
32aa88bc JMF |
2913 | class GametrailersIE(InfoExtractor): |
2914 | _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' | |
2915 | ||
2916 | def _real_extract(self, url): | |
2917 | mobj = re.match(self._VALID_URL, url) | |
2918 | if mobj is None: | |
2919 | raise ExtractorError(u'Invalid URL: %s' % url) | |
2920 | video_id = mobj.group('id') | |
2921 | video_type = mobj.group('type') | |
2922 | webpage = self._download_webpage(url, video_id) | |
2923 | if video_type == 'full-episodes': | |
2924 | mgid_re = r'data-video="(?P<mgid>mgid:.*?)"' | |
2925 | else: | |
2926 | mgid_re = r'data-contentId=\'(?P<mgid>mgid:.*?)\'' | |
0251f9c9 | 2927 | mgid = self._search_regex(mgid_re, webpage, u'mgid') |
32aa88bc JMF |
2928 | data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'}) |
2929 | ||
2930 | info_page = self._download_webpage('http://www.gametrailers.com/feeds/mrss?' + data, | |
2931 | video_id, u'Downloading video info') | |
2932 | links_webpage = self._download_webpage('http://www.gametrailers.com/feeds/mediagen/?' + data, | |
2933 | video_id, u'Downloading video urls info') | |
2934 | ||
2935 | self.report_extraction(video_id) | |
2936 | info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.* | |
2937 | <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.* | |
2938 | <image>.* | |
2939 | <url>(?P<thumb>.*?)</url>.* | |
2940 | </image>''' | |
2941 | ||
2942 | m_info = re.search(info_re, info_page, re.VERBOSE|re.DOTALL) | |
2943 | if m_info is None: | |
2944 | raise ExtractorError(u'Unable to extract video info') | |
2945 | video_title = m_info.group('title') | |
2946 | video_description = m_info.group('description') | |
2947 | video_thumb = m_info.group('thumb') | |
2948 | ||
0251f9c9 FV |
2949 | m_urls = list(re.finditer(r'<src>(?P<url>.*)</src>', links_webpage)) |
2950 | if m_urls is None or len(m_urls) == 0: | |
32aa88bc JMF |
2951 | raise ExtractError(u'Unable to extrat video url') |
2952 | # They are sorted from worst to best quality | |
0251f9c9 | 2953 | video_url = m_urls[-1].group('url') |
32aa88bc JMF |
2954 | |
2955 | return {'url': video_url, | |
2956 | 'id': video_id, | |
2957 | 'title': video_title, | |
2958 | # Videos are actually flv not mp4 | |
2959 | 'ext': 'flv', | |
2960 | 'thumbnail': video_thumb, | |
2961 | 'description': video_description, | |
2962 | } | |
2963 | ||
4aeae91f PH |
2964 | def gen_extractors(): |
2965 | """ Return a list of an instance of every supported extractor. | |
2966 | The order does matter; the first extractor matched is the one handling the URL. | |
2967 | """ | |
2968 | return [ | |
2969 | YoutubePlaylistIE(), | |
2970 | YoutubeChannelIE(), | |
2971 | YoutubeUserIE(), | |
2972 | YoutubeSearchIE(), | |
2973 | YoutubeIE(), | |
2974 | MetacafeIE(), | |
2975 | DailymotionIE(), | |
2976 | GoogleSearchIE(), | |
2977 | PhotobucketIE(), | |
2978 | YahooIE(), | |
2979 | YahooSearchIE(), | |
2980 | DepositFilesIE(), | |
2981 | FacebookIE(), | |
4aeae91f | 2982 | BlipTVIE(), |
1b2b22ed | 2983 | BlipTVUserIE(), |
4aeae91f PH |
2984 | VimeoIE(), |
2985 | MyVideoIE(), | |
2986 | ComedyCentralIE(), | |
2987 | EscapistIE(), | |
2988 | CollegeHumorIE(), | |
2989 | XVideosIE(), | |
5011cded | 2990 | SoundcloudSetIE(), |
4aeae91f PH |
2991 | SoundcloudIE(), |
2992 | InfoQIE(), | |
2993 | MixcloudIE(), | |
2994 | StanfordOpenClassroomIE(), | |
2995 | MTVIE(), | |
2996 | YoukuIE(), | |
2997 | XNXXIE(), | |
18be482a JC |
2998 | YouJizzIE(), |
2999 | PornotubeIE(), | |
3000 | YouPornIE(), | |
4aeae91f PH |
3001 | GooglePlusIE(), |
3002 | ArteTvIE(), | |
3003 | NBAIE(), | |
40634747 | 3004 | WorldStarHipHopIE(), |
4aeae91f PH |
3005 | JustinTVIE(), |
3006 | FunnyOrDieIE(), | |
4aeae91f PH |
3007 | SteamIE(), |
3008 | UstreamIE(), | |
ca0a0bbe | 3009 | RBMARadioIE(), |
ccf65f9d | 3010 | EightTracksIE(), |
da06e2da | 3011 | KeekIE(), |
3a468f2d | 3012 | TEDIE(), |
58994225 | 3013 | MySpassIE(), |
e32b06e9 | 3014 | SpiegelIE(), |
0cd35867 | 3015 | LiveLeakIE(), |
df2dedee | 3016 | ARDIE(), |
f2cd958c | 3017 | ZDFIE(), |
c15e0241 | 3018 | TumblrIE(), |
aed523ec | 3019 | BandcampIE(), |
c34407d1 | 3020 | RedTubeIE(), |
dfb9323c | 3021 | InaIE(), |
d4f76f16 | 3022 | HowcastIE(), |
5b0d3cc0 | 3023 | VineIE(), |
afef36c9 | 3024 | FlickrIE(), |
45014296 | 3025 | TeamcocoIE(), |
71e458d4 | 3026 | XHamsterIE(), |
157b864a | 3027 | HypemIE(), |
ecb3e676 | 3028 | Vbox7IE(), |
32aa88bc | 3029 | GametrailersIE(), |
6b3f5a32 | 3030 | StatigramIE(), |
4aeae91f PH |
3031 | GenericIE() |
3032 | ] | |
93412126 JMF |
3033 | |
3034 | def get_info_extractor(ie_name): | |
3035 | """Returns the info extractor class with the given ie_name""" | |
3036 | return globals()[ie_name+'IE'] |