]> jfr.im git - yt-dlp.git/blame - youtube_dl/InfoExtractors.py
Move YoutubeSearchIE to the other youtube IEs
[yt-dlp.git] / youtube_dl / InfoExtractors.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
9e8056d5
PH
4from __future__ import absolute_import
5
4fcca4bb 6import base64
d77c3dfd 7import datetime
ccf65f9d 8import itertools
d77c3dfd
FV
9import netrc
10import os
11import re
12import socket
13import time
d77c3dfd 14import email.utils
921a1455 15import xml.etree.ElementTree
302efc19 16import random
17import math
6324fd1d 18import operator
de5d66d4 19import hashlib
20import binascii
21import urllib
d77c3dfd 22
9e8056d5 23from .utils import *
d6983cb4 24from .extractor.common import InfoExtractor, SearchInfoExtractor
d5822b96
PH
25
26from .extractor.ard import ARDIE
27from .extractor.arte import ArteTvIE
219b8130 28from .extractor.dailymotion import DailymotionIE
38cbc40a
PH
29from .extractor.metacafe import MetacafeIE
30from .extractor.statigram import StatigramIE
97d6faac 31from .extractor.photobucket import PhotobucketIE
b3d14cbf 32from .extractor.vimeo import VimeoIE
d6039175 33from .extractor.yahoo import YahooIE
b05654f0 34from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
d5822b96 35from .extractor.zdf import ZDFIE
e30e9318 36
d830b7c2 37
d77c3dfd 38
d77c3dfd
FV
39
40
d77c3dfd
FV
41
42
d77c3dfd
FV
43
44
f2ad10a9
CA
45
46
d77c3dfd 47class GenericIE(InfoExtractor):
59ae15a5
PH
48 """Generic last-resort information extractor."""
49
50 _VALID_URL = r'.*'
51 IE_NAME = u'generic'
52
59ae15a5
PH
53 def report_download_webpage(self, video_id):
54 """Report webpage download."""
3d342357 55 if not self._downloader.params.get('test', False):
f17ce13a 56 self._downloader.report_warning(u'Falling back on generic information extractor.')
0d173446 57 super(GenericIE, self).report_download_webpage(video_id)
59ae15a5 58
59ae15a5
PH
59 def report_following_redirect(self, new_url):
60 """Report information extraction."""
61 self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
cdb30764 62
59ae15a5 63 def _test_redirect(self, url):
a0d6fe7b 64 """Check if it is a redirect, like url shorteners, in case return the new url."""
59ae15a5
PH
65 class HeadRequest(compat_urllib_request.Request):
66 def get_method(self):
67 return "HEAD"
68
69 class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
70 """
cdb30764 71 Subclass the HTTPRedirectHandler to make it use our
59ae15a5
PH
72 HeadRequest also on the redirected URL
73 """
cdb30764 74 def redirect_request(self, req, fp, code, msg, headers, newurl):
59ae15a5 75 if code in (301, 302, 303, 307):
cdb30764 76 newurl = newurl.replace(' ', '%20')
59ae15a5
PH
77 newheaders = dict((k,v) for k,v in req.headers.items()
78 if k.lower() not in ("content-length", "content-type"))
cdb30764 79 return HeadRequest(newurl,
59ae15a5 80 headers=newheaders,
cdb30764
ND
81 origin_req_host=req.get_origin_req_host(),
82 unverifiable=True)
83 else:
84 raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
59ae15a5
PH
85
86 class HTTPMethodFallback(compat_urllib_request.BaseHandler):
87 """
88 Fallback to GET if HEAD is not allowed (405 HTTP error)
89 """
cdb30764 90 def http_error_405(self, req, fp, code, msg, headers):
59ae15a5
PH
91 fp.read()
92 fp.close()
93
94 newheaders = dict((k,v) for k,v in req.headers.items()
95 if k.lower() not in ("content-length", "content-type"))
cdb30764
ND
96 return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
97 headers=newheaders,
98 origin_req_host=req.get_origin_req_host(),
59ae15a5
PH
99 unverifiable=True))
100
101 # Build our opener
cdb30764 102 opener = compat_urllib_request.OpenerDirector()
59ae15a5
PH
103 for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
104 HTTPMethodFallback, HEADRedirectHandler,
7c038b3c 105 compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
59ae15a5
PH
106 opener.add_handler(handler())
107
108 response = opener.open(HeadRequest(url))
419c64b1
PH
109 if response is None:
110 raise ExtractorError(u'Invalid URL protocol')
59ae15a5
PH
111 new_url = response.geturl()
112
113 if url == new_url:
114 return False
115
116 self.report_following_redirect(new_url)
a0d6fe7b 117 return new_url
59ae15a5
PH
118
119 def _real_extract(self, url):
a0d6fe7b
JMF
120 new_url = self._test_redirect(url)
121 if new_url: return [self.url_result(new_url)]
59ae15a5
PH
122
123 video_id = url.split('/')[-1]
59ae15a5 124 try:
3d342357 125 webpage = self._download_webpage(url, video_id)
59ae15a5
PH
126 except ValueError as err:
127 # since this is the last-resort InfoExtractor, if
128 # this error is thrown, it'll be thrown here
0c021ad1 129 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
130
131 self.report_extraction(video_id)
132 # Start with something easy: JW Player in SWFObject
133 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
134 if mobj is None:
135 # Broaden the search a little bit
136 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1013186a
PH
137 if mobj is None:
138 # Broaden the search a little bit: JWPlayer JS loader
139 mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage)
fb8f7280
JMF
140 if mobj is None:
141 # Try to find twitter cards info
142 mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
1037d539
JMF
143 if mobj is None:
144 # We look for Open Graph info:
145 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
146 m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
147 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
148 if m_video_type is not None:
149 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
59ae15a5 150 if mobj is None:
0c021ad1 151 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
152
153 # It's possible that one of the regexes
154 # matched, but returned an empty group:
155 if mobj.group(1) is None:
0c021ad1 156 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
157
158 video_url = compat_urllib_parse.unquote(mobj.group(1))
159 video_id = os.path.basename(video_url)
160
161 # here's a fun little line of code for you:
162 video_extension = os.path.splitext(video_id)[1][1:]
163 video_id = os.path.splitext(video_id)[0]
164
165 # it's tempting to parse this further, but you would
166 # have to take into account all the variations like
167 # Video Title - Site Name
168 # Site Name | Video Title
169 # Video Title - Tagline | Site Name
170 # and so on and so forth; it's just not practical
af44c948
FV
171 video_title = self._html_search_regex(r'<title>(.*)</title>',
172 webpage, u'video title')
59ae15a5
PH
173
174 # video uploader is domain name
af44c948
FV
175 video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
176 url, u'video uploader')
59ae15a5
PH
177
178 return [{
f1171f7c
PH
179 'id': video_id,
180 'url': video_url,
59ae15a5
PH
181 'uploader': video_uploader,
182 'upload_date': None,
183 'title': video_title,
f1171f7c 184 'ext': video_extension,
59ae15a5 185 }]
d77c3dfd
FV
186
187
d77c3dfd 188
e30e9318 189class GoogleSearchIE(SearchInfoExtractor):
59ae15a5 190 """Information Extractor for Google Video search queries."""
3c5e7729 191 _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
99a5ae3f 192 _MAX_RESULTS = 1000
59ae15a5 193 IE_NAME = u'video.google:search'
e30e9318 194 _SEARCH_KEY = 'gvsearch'
59ae15a5 195
43b62acc
JMF
196 def _get_n_results(self, query, n):
197 """Get a specified number of results for a query"""
59ae15a5 198
94ca71b7
PH
199 res = {
200 '_type': 'playlist',
201 'id': query,
202 'entries': []
203 }
59ae15a5 204
94ca71b7 205 for pagenum in itertools.count(1):
3c5e7729 206 result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
94ca71b7
PH
207 webpage = self._download_webpage(result_url, u'gvsearch:' + query,
208 note='Downloading result page ' + str(pagenum))
59ae15a5 209
94ca71b7
PH
210 for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage):
211 e = {
212 '_type': 'url',
213 'url': mobj.group(1)
214 }
215 res['entries'].append(e)
d77c3dfd 216
94ca71b7
PH
217 if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
218 return res
d77c3dfd 219
e30e9318 220class YahooSearchIE(SearchInfoExtractor):
59ae15a5 221 """Information Extractor for Yahoo! Video search queries."""
93702113 222
99a5ae3f 223 _MAX_RESULTS = 1000
5a853e14 224 IE_NAME = u'screen.yahoo:search'
e30e9318 225 _SEARCH_KEY = 'yvsearch'
59ae15a5 226
5a853e14
JMF
227 def _get_n_results(self, query, n):
228 """Get a specified number of results for a query"""
59ae15a5 229
5a853e14
JMF
230 res = {
231 '_type': 'playlist',
232 'id': query,
233 'entries': []
234 }
235 for pagenum in itertools.count(0):
236 result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
237 webpage = self._download_webpage(result_url, query,
238 note='Downloading results page '+str(pagenum+1))
239 info = json.loads(webpage)
240 m = info[u'm']
241 results = info[u'results']
242
243 for (i, r) in enumerate(results):
244 if (pagenum * 30) +i >= n:
245 break
246 mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
247 e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
248 res['entries'].append(e)
249 if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
250 break
59ae15a5 251
5a853e14 252 return res
d77c3dfd
FV
253
254
eeeb4daa 255class BlipTVUserIE(InfoExtractor):
59ae15a5 256 """Information Extractor for blip.tv users."""
eeeb4daa 257
59ae15a5
PH
258 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
259 _PAGE_SIZE = 12
260 IE_NAME = u'blip.tv:user'
eeeb4daa 261
59ae15a5
PH
262 def _real_extract(self, url):
263 # Extract username
264 mobj = re.match(self._VALID_URL, url)
265 if mobj is None:
0c021ad1 266 raise ExtractorError(u'Invalid URL: %s' % url)
eeeb4daa 267
59ae15a5 268 username = mobj.group(1)
eeeb4daa 269
59ae15a5 270 page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
eeeb4daa 271
46bfb422
JMF
272 page = self._download_webpage(url, username, u'Downloading user page')
273 mobj = re.search(r'data-users-id="([^"]+)"', page)
274 page_base = page_base % mobj.group(1)
eeeb4daa
JCGS
275
276
59ae15a5
PH
277 # Download video ids using BlipTV Ajax calls. Result size per
278 # query is limited (currently to 12 videos) so we need to query
279 # page by page until there are no video ids - it means we got
280 # all of them.
eeeb4daa 281
59ae15a5
PH
282 video_ids = []
283 pagenum = 1
eeeb4daa 284
59ae15a5 285 while True:
450e7099 286 url = page_base + "&page=" + str(pagenum)
46bfb422
JMF
287 page = self._download_webpage(url, username,
288 u'Downloading video ids from page %d' % pagenum)
eeeb4daa 289
59ae15a5
PH
290 # Extract video identifiers
291 ids_in_page = []
eeeb4daa 292
59ae15a5
PH
293 for mobj in re.finditer(r'href="/([^"]+)"', page):
294 if mobj.group(1) not in ids_in_page:
295 ids_in_page.append(unescapeHTML(mobj.group(1)))
eeeb4daa 296
59ae15a5 297 video_ids.extend(ids_in_page)
eeeb4daa 298
59ae15a5
PH
299 # A little optimization - if current page is not
300 # "full", ie. does not contain PAGE_SIZE video ids then
301 # we can assume that this page is the last one - there
302 # are no more ids on further pages - no need to query
303 # again.
eeeb4daa 304
59ae15a5
PH
305 if len(ids_in_page) < self._PAGE_SIZE:
306 break
eeeb4daa 307
59ae15a5 308 pagenum += 1
eeeb4daa 309
f6e6da95 310 urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
6de8f1af 311 url_entries = [self.url_result(url, 'BlipTV') for url in urls]
d2c69082 312 return [self.playlist_result(url_entries, playlist_title = username)]
eeeb4daa
JCGS
313
314
d77c3dfd 315class DepositFilesIE(InfoExtractor):
59ae15a5
PH
316 """Information extractor for depositfiles.com"""
317
318 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
59ae15a5 319
59ae15a5
PH
320 def _real_extract(self, url):
321 file_id = url.split('/')[-1]
322 # Rebuild url in english locale
323 url = 'http://depositfiles.com/en/files/' + file_id
324
325 # Retrieve file webpage with 'Free download' button pressed
326 free_download_indication = { 'gateway_result' : '1' }
327 request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
328 try:
329 self.report_download_webpage(file_id)
330 webpage = compat_urllib_request.urlopen(request).read()
331 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
0c021ad1 332 raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
59ae15a5
PH
333
334 # Search for the real file URL
335 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
336 if (mobj is None) or (mobj.group(1) is None):
337 # Try to figure out reason of the error.
338 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
339 if (mobj is not None) and (mobj.group(1) is not None):
340 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
0c021ad1 341 raise ExtractorError(u'%s' % restriction_message)
59ae15a5 342 else:
0c021ad1 343 raise ExtractorError(u'Unable to extract download URL from: %s' % url)
59ae15a5
PH
344
345 file_url = mobj.group(1)
346 file_extension = os.path.splitext(file_url)[1][1:]
347
348 # Search for file title
ac3e9394 349 file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
59ae15a5
PH
350
351 return [{
352 'id': file_id.decode('utf-8'),
353 'url': file_url.decode('utf-8'),
354 'uploader': None,
355 'upload_date': None,
356 'title': file_title,
357 'ext': file_extension.decode('utf-8'),
358 }]
d77c3dfd
FV
359
360
361class FacebookIE(InfoExtractor):
59ae15a5
PH
362 """Information Extractor for Facebook"""
363
59ae15a5
PH
364 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
365 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
366 _NETRC_MACHINE = 'facebook'
59ae15a5
PH
367 IE_NAME = u'facebook'
368
59ae15a5
PH
369 def report_login(self):
370 """Report attempt to log in."""
f17ce13a 371 self.to_screen(u'Logging in')
59ae15a5
PH
372
373 def _real_initialize(self):
374 if self._downloader is None:
375 return
376
377 useremail = None
378 password = None
379 downloader_params = self._downloader.params
380
381 # Attempt to use provided username and password or .netrc data
382 if downloader_params.get('username', None) is not None:
383 useremail = downloader_params['username']
384 password = downloader_params['password']
385 elif downloader_params.get('usenetrc', False):
386 try:
387 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
388 if info is not None:
389 useremail = info[0]
390 password = info[2]
391 else:
392 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
393 except (IOError, netrc.NetrcParseError) as err:
2e5457be 394 self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
59ae15a5
PH
395 return
396
397 if useremail is None:
398 return
399
400 # Log in
401 login_form = {
402 'email': useremail,
403 'pass': password,
404 'login': 'Log+In'
405 }
406 request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
407 try:
408 self.report_login()
409 login_results = compat_urllib_request.urlopen(request).read()
410 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2e5457be 411 self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
59ae15a5
PH
412 return
413 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2e5457be 414 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
59ae15a5
PH
415 return
416
417 def _real_extract(self, url):
418 mobj = re.match(self._VALID_URL, url)
419 if mobj is None:
0c021ad1 420 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
421 video_id = mobj.group('ID')
422
b954070d
PH
423 url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
424 webpage = self._download_webpage(url, video_id)
425
32c96387 426 BEFORE = '{swf.addParam(param[0], param[1]);});\n'
b954070d
PH
427 AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
428 m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
429 if not m:
430 raise ExtractorError(u'Cannot parse data')
431 data = dict(json.loads(m.group(1)))
edba5137
PH
432 params_raw = compat_urllib_parse.unquote(data['params'])
433 params = json.loads(params_raw)
32c96387
PH
434 video_data = params['video_data'][0]
435 video_url = video_data.get('hd_src')
7796e8c2 436 if not video_url:
32c96387 437 video_url = video_data['sd_src']
7796e8c2
PH
438 if not video_url:
439 raise ExtractorError(u'Cannot find video URL')
32c96387
PH
440 video_duration = int(video_data['video_duration'])
441 thumbnail = video_data['thumbnail_src']
b954070d 442
979a9dd4 443 video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
ac3e9394 444 webpage, u'title')
b954070d
PH
445
446 info = {
447 'id': video_id,
448 'title': video_title,
449 'url': video_url,
450 'ext': 'mp4',
451 'duration': video_duration,
32c96387 452 'thumbnail': thumbnail,
b954070d
PH
453 }
454 return [info]
59ae15a5 455
d77c3dfd
FV
456
457class BlipTVIE(InfoExtractor):
59ae15a5
PH
458 """Information extractor for blip.tv"""
459
1b2b22ed 460 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
59ae15a5
PH
461 _URL_EXT = r'^.*\.([a-z0-9]+)$'
462 IE_NAME = u'blip.tv'
463
59ae15a5
PH
464 def report_direct_download(self, title):
465 """Report information extraction."""
f17ce13a 466 self.to_screen(u'%s: Direct download detected' % title)
59ae15a5
PH
467
468 def _real_extract(self, url):
469 mobj = re.match(self._VALID_URL, url)
470 if mobj is None:
0c021ad1 471 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5 472
1b2b22ed
JMF
473 # See https://github.com/rg3/youtube-dl/issues/857
474 api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url)
475 if api_mobj is not None:
476 url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
f7b567ff
PH
477 urlp = compat_urllib_parse_urlparse(url)
478 if urlp.path.startswith('/play/'):
7f9d41a5
JCGS
479 request = compat_urllib_request.Request(url)
480 response = compat_urllib_request.urlopen(request)
481 redirecturl = response.geturl()
f7b567ff
PH
482 rurlp = compat_urllib_parse_urlparse(redirecturl)
483 file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
484 url = 'http://blip.tv/a/a-' + file_id
485 return self._real_extract(url)
486
7f9d41a5 487
59ae15a5
PH
488 if '?' in url:
489 cchar = '&'
490 else:
491 cchar = '?'
492 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
55c05398 493 request = compat_urllib_request.Request(json_url)
3446dfb7 494 request.add_header('User-Agent', 'iTunes/10.6.1')
59ae15a5
PH
495 self.report_extraction(mobj.group(1))
496 info = None
497 try:
498 urlh = compat_urllib_request.urlopen(request)
499 if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
500 basename = url.split('/')[-1]
501 title,ext = os.path.splitext(basename)
502 title = title.decode('UTF-8')
503 ext = ext.replace('.', '')
504 self.report_direct_download(title)
505 info = {
506 'id': title,
507 'url': url,
508 'uploader': None,
509 'upload_date': None,
510 'title': title,
511 'ext': ext,
512 'urlhandle': urlh
513 }
514 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
3446dfb7 515 raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
59ae15a5
PH
516 if info is None: # Regular URL
517 try:
55c05398
PH
518 json_code_bytes = urlh.read()
519 json_code = json_code_bytes.decode('utf-8')
59ae15a5 520 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
0c021ad1 521 raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
59ae15a5
PH
522
523 try:
524 json_data = json.loads(json_code)
525 if 'Post' in json_data:
526 data = json_data['Post']
527 else:
528 data = json_data
529
530 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
531 video_url = data['media']['url']
532 umobj = re.match(self._URL_EXT, video_url)
533 if umobj is None:
534 raise ValueError('Can not determine filename extension')
535 ext = umobj.group(1)
536
537 info = {
538 'id': data['item_id'],
539 'url': video_url,
540 'uploader': data['display_name'],
541 'upload_date': upload_date,
542 'title': data['title'],
543 'ext': ext,
544 'format': data['media']['mimeType'],
545 'thumbnail': data['thumbnailUrl'],
546 'description': data['description'],
3446dfb7
PH
547 'player_url': data['embedUrl'],
548 'user_agent': 'iTunes/10.6.1',
59ae15a5
PH
549 }
550 except (ValueError,KeyError) as err:
0c021ad1 551 raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
59ae15a5 552
59ae15a5 553 return [info]
d77c3dfd
FV
554
555
556class MyVideoIE(InfoExtractor):
59ae15a5
PH
557 """Information Extractor for myvideo.de."""
558
559 _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
560 IE_NAME = u'myvideo'
561
b31756c1
FV
562 # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
563 # Released into the Public Domain by Tristan Fischer on 2013-05-19
564 # https://github.com/rg3/youtube-dl/pull/842
de5d66d4 565 def __rc4crypt(self,data, key):
566 x = 0
567 box = list(range(256))
568 for i in list(range(256)):
b31756c1 569 x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
de5d66d4 570 box[i], box[x] = box[x], box[i]
571 x = 0
572 y = 0
b31756c1 573 out = ''
de5d66d4 574 for char in data:
575 x = (x + 1) % 256
576 y = (y + box[x]) % 256
577 box[x], box[y] = box[y], box[x]
b31756c1
FV
578 out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
579 return out
de5d66d4 580
581 def __md5(self,s):
b31756c1 582 return hashlib.md5(s).hexdigest().encode()
de5d66d4 583
59ae15a5
PH
584 def _real_extract(self,url):
585 mobj = re.match(self._VALID_URL, url)
586 if mobj is None:
de5d66d4 587 raise ExtractorError(u'invalid URL: %s' % url)
59ae15a5
PH
588
589 video_id = mobj.group(1)
590
de5d66d4 591 GK = (
592 b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
593 b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
594 b'TnpsbA0KTVRkbU1tSTRNdz09'
595 )
596
59ae15a5 597 # Get video webpage
5f955171
PH
598 webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
599 webpage = self._download_webpage(webpage_url, video_id)
59ae15a5 600
de5d66d4 601 mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
602 if mobj is not None:
603 self.report_extraction(video_id)
604 video_url = mobj.group(1) + '.flv'
605
979a9dd4 606 video_title = self._html_search_regex('<title>([^<]+)</title>',
ac3e9394 607 webpage, u'title')
de5d66d4 608
ac3e9394 609 video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
de5d66d4 610
611 return [{
612 'id': video_id,
613 'url': video_url,
614 'uploader': None,
615 'upload_date': None,
616 'title': video_title,
617 'ext': u'flv',
618 }]
619
620 # try encxml
b31756c1
FV
621 mobj = re.search('var flashvars={(.+?)}', webpage)
622 if mobj is None:
623 raise ExtractorError(u'Unable to extract video')
624
de5d66d4 625 params = {}
626 encxml = ''
b31756c1 627 sec = mobj.group(1)
de5d66d4 628 for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
629 if not a == '_encxml':
630 params[a] = b
631 else:
632 encxml = compat_urllib_parse.unquote(b)
633 if not params.get('domain'):
634 params['domain'] = 'www.myvideo.de'
635 xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
636 if 'flash_playertype=MTV' in xmldata_url:
637 self._downloader.report_warning(u'avoiding MTV player')
638 xmldata_url = (
639 'http://www.myvideo.de/dynamic/get_player_video_xml.php'
640 '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
641 ) % video_id
642
643 # get enc data
644 enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
645 enc_data_b = binascii.unhexlify(enc_data)
b31756c1
FV
646 sk = self.__md5(
647 base64.b64decode(base64.b64decode(GK)) +
648 self.__md5(
649 str(video_id).encode('utf-8')
650 )
de5d66d4 651 )
652 dec_data = self.__rc4crypt(enc_data_b, sk)
653
654 # extracting infos
59ae15a5 655 self.report_extraction(video_id)
de5d66d4 656
ac3e9394 657 video_url = None
de5d66d4 658 mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
ac3e9394
AB
659 if mobj:
660 video_url = compat_urllib_parse.unquote(mobj.group(1))
661 if 'myvideo2flash' in video_url:
662 self._downloader.report_warning(u'forcing RTMPT ...')
663 video_url = video_url.replace('rtmpe://', 'rtmpt://')
664
665 if not video_url:
666 # extract non rtmp videos
de5d66d4 667 mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
668 if mobj is None:
669 raise ExtractorError(u'unable to extract url')
ac3e9394 670 video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
59ae15a5 671
ac3e9394
AB
672 video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
673 video_file = compat_urllib_parse.unquote(video_file)
de5d66d4 674
de5d66d4 675 if not video_file.endswith('f4m'):
676 ppath, prefix = video_file.split('.')
677 video_playpath = '%s:%s' % (prefix, ppath)
678 video_hls_playlist = ''
679 else:
680 video_playpath = ''
681 video_hls_playlist = (
682 video_filepath + video_file
683 ).replace('.f4m', '.m3u8')
59ae15a5 684
ac3e9394
AB
685 video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
686 video_swfobj = compat_urllib_parse.unquote(video_swfobj)
de5d66d4 687
979a9dd4 688 video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
ac3e9394 689 webpage, u'title')
59ae15a5
PH
690
691 return [{
de5d66d4 692 'id': video_id,
ac3e9394
AB
693 'url': video_url,
694 'tc_url': video_url,
de5d66d4 695 'uploader': None,
696 'upload_date': None,
697 'title': video_title,
698 'ext': u'flv',
699 'play_path': video_playpath,
700 'video_file': video_file,
de5d66d4 701 'video_hls_playlist': video_hls_playlist,
702 'player_url': video_swfobj,
59ae15a5 703 }]
d77c3dfd 704
ac3e9394 705
d77c3dfd 706class ComedyCentralIE(InfoExtractor):
59ae15a5
PH
707 """Information extractor for The Daily Show and Colbert Report """
708
ca6849e6 709 # urls can be abbreviations like :thedailyshow or :colbert
cdb30764 710 # urls for episodes like:
ca6849e6 711 # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
712 # or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
cdb30764 713 # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
ca6849e6 714 _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
715 |(https?://)?(www\.)?
716 (?P<showname>thedailyshow|colbertnation)\.com/
717 (full-episodes/(?P<episode>.*)|
718 (?P<clip>
719 (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
720 |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
cdb30764 721 $"""
59ae15a5
PH
722
723 _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
724
725 _video_extensions = {
726 '3500': 'mp4',
727 '2200': 'mp4',
728 '1700': 'mp4',
729 '1200': 'mp4',
730 '750': 'mp4',
731 '400': 'mp4',
732 }
733 _video_dimensions = {
734 '3500': '1280x720',
735 '2200': '960x540',
736 '1700': '768x432',
737 '1200': '640x360',
738 '750': '512x288',
739 '400': '384x216',
740 }
741
89de9eb1
FV
742 @classmethod
743 def suitable(cls, url):
ca6849e6 744 """Receives a URL and returns True if suitable for this IE."""
89de9eb1 745 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
ca6849e6 746
59ae15a5
PH
747 def _print_formats(self, formats):
748 print('Available formats:')
749 for x in formats:
750 print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
751
752
753 def _real_extract(self, url):
ca6849e6 754 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
59ae15a5 755 if mobj is None:
0c021ad1 756 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
757
758 if mobj.group('shortname'):
759 if mobj.group('shortname') in ('tds', 'thedailyshow'):
760 url = u'http://www.thedailyshow.com/full-episodes/'
761 else:
762 url = u'http://www.colbertnation.com/full-episodes/'
ca6849e6 763 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
59ae15a5
PH
764 assert mobj is not None
765
ca6849e6 766 if mobj.group('clip'):
767 if mobj.group('showname') == 'thedailyshow':
768 epTitle = mobj.group('tdstitle')
769 else:
770 epTitle = mobj.group('cntitle')
771 dlNewest = False
59ae15a5 772 else:
ca6849e6 773 dlNewest = not mobj.group('episode')
774 if dlNewest:
775 epTitle = mobj.group('showname')
776 else:
777 epTitle = mobj.group('episode')
59ae15a5 778
59ae15a5 779 self.report_extraction(epTitle)
480b6c1e 780 webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
59ae15a5
PH
781 if dlNewest:
782 url = htmlHandle.geturl()
ca6849e6 783 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
59ae15a5 784 if mobj is None:
480b6c1e 785 raise ExtractorError(u'Invalid redirected URL: ' + url)
59ae15a5 786 if mobj.group('episode') == '':
480b6c1e 787 raise ExtractorError(u'Redirected URL is still not specific: ' + url)
59ae15a5
PH
788 epTitle = mobj.group('episode')
789
93148102 790 mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
59ae15a5
PH
791
792 if len(mMovieParams) == 0:
793 # The Colbert Report embeds the information in a without
794 # a URL prefix; so extract the alternate reference
795 # and then add the URL prefix manually.
796
93148102 797 altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
59ae15a5 798 if len(altMovieParams) == 0:
480b6c1e 799 raise ExtractorError(u'unable to find Flash URL in webpage ' + url)
59ae15a5
PH
800 else:
801 mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
cdb30764 802
59ae15a5
PH
803 uri = mMovieParams[0][1]
804 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
46bfb422
JMF
805 indexXml = self._download_webpage(indexUrl, epTitle,
806 u'Downloading show index',
807 u'unable to download episode index')
59ae15a5
PH
808
809 results = []
810
811 idoc = xml.etree.ElementTree.fromstring(indexXml)
812 itemEls = idoc.findall('.//item')
7717ae19 813 for partNum,itemEl in enumerate(itemEls):
59ae15a5
PH
814 mediaId = itemEl.findall('./guid')[0].text
815 shortMediaId = mediaId.split(':')[-1]
816 showId = mediaId.split(':')[-2].replace('.com', '')
817 officialTitle = itemEl.findall('./title')[0].text
bf50b038 818 officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
59ae15a5
PH
819
820 configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
821 compat_urllib_parse.urlencode({'uri': mediaId}))
46bfb422
JMF
822 configXml = self._download_webpage(configUrl, epTitle,
823 u'Downloading configuration for %s' % shortMediaId)
59ae15a5
PH
824
825 cdoc = xml.etree.ElementTree.fromstring(configXml)
826 turls = []
827 for rendition in cdoc.findall('.//rendition'):
828 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
829 turls.append(finfo)
830
831 if len(turls) == 0:
c9fa1cba 832 self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')
59ae15a5 833 continue
cdb30764 834
59ae15a5
PH
835 if self._downloader.params.get('listformats', None):
836 self._print_formats([i[0] for i in turls])
837 return
838
839 # For now, just pick the highest bitrate
32635ec6 840 format,rtmp_video_url = turls[-1]
59ae15a5
PH
841
842 # Get the format arg from the arg stream
843 req_format = self._downloader.params.get('format', None)
844
845 # Select format if we can find one
846 for f,v in turls:
847 if f == req_format:
32635ec6 848 format, rtmp_video_url = f, v
59ae15a5
PH
849 break
850
32635ec6
PH
851 m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
852 if not m:
853 raise ExtractorError(u'Cannot transform RTMP url')
854 base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
855 video_url = base + m.group('finalid')
59ae15a5 856
7717ae19 857 effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
59ae15a5
PH
858 info = {
859 'id': shortMediaId,
860 'url': video_url,
861 'uploader': showId,
862 'upload_date': officialDate,
863 'title': effTitle,
864 'ext': 'mp4',
865 'format': format,
866 'thumbnail': None,
867 'description': officialTitle,
59ae15a5 868 }
59ae15a5 869 results.append(info)
cdb30764 870
59ae15a5 871 return results
d77c3dfd
FV
872
873
874class EscapistIE(InfoExtractor):
59ae15a5
PH
875 """Information extractor for The Escapist """
876
877 _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
878 IE_NAME = u'escapist'
879
59ae15a5
PH
880 def _real_extract(self, url):
881 mobj = re.match(self._VALID_URL, url)
882 if mobj is None:
0c021ad1 883 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
884 showName = mobj.group('showname')
885 videoId = mobj.group('episode')
886
78d3442b
FV
887 self.report_extraction(videoId)
888 webpage = self._download_webpage(url, videoId)
59ae15a5 889
979a9dd4 890 videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
ac3e9394 891 webpage, u'description', fatal=False)
59ae15a5 892
979a9dd4 893 imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"',
ac3e9394 894 webpage, u'thumbnail', fatal=False)
ac3e9394 895
979a9dd4 896 playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',
ac3e9394 897 webpage, u'player url')
ac3e9394 898
78d3442b
FV
899 title = self._html_search_regex('<meta name="title" content="([^"]*)"',
900 webpage, u'player url').split(' : ')[-1]
901
ac3e9394
AB
902 configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
903 configUrl = compat_urllib_parse.unquote(configUrl)
59ae15a5 904
78d3442b 905 configJSON = self._download_webpage(configUrl, videoId,
46bfb422
JMF
906 u'Downloading configuration',
907 u'unable to download configuration')
59ae15a5
PH
908
909 # Technically, it's JavaScript, not JSON
910 configJSON = configJSON.replace("'", '"')
911
912 try:
913 config = json.loads(configJSON)
914 except (ValueError,) as err:
0c021ad1 915 raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
59ae15a5
PH
916
917 playlist = config['playlist']
918 videoUrl = playlist[1]['url']
919
920 info = {
921 'id': videoId,
922 'url': videoUrl,
923 'uploader': showName,
924 'upload_date': None,
78d3442b 925 'title': title,
47dcd621 926 'ext': 'mp4',
59ae15a5 927 'thumbnail': imgUrl,
ac3e9394 928 'description': videoDesc,
59ae15a5
PH
929 'player_url': playerUrl,
930 }
931
932 return [info]
d77c3dfd 933
d77c3dfd 934class CollegeHumorIE(InfoExtractor):
59ae15a5
PH
935 """Information extractor for collegehumor.com"""
936
0eb0faa2 937 _WORKING = False
59ae15a5
PH
938 _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
939 IE_NAME = u'collegehumor'
940
799c0763 941 def report_manifest(self, video_id):
59ae15a5 942 """Report information extraction."""
f17ce13a 943 self.to_screen(u'%s: Downloading XML manifest' % video_id)
59ae15a5 944
59ae15a5
PH
945 def _real_extract(self, url):
946 mobj = re.match(self._VALID_URL, url)
947 if mobj is None:
0c021ad1 948 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
949 video_id = mobj.group('videoid')
950
59ae15a5
PH
951 info = {
952 'id': video_id,
59ae15a5
PH
953 'uploader': None,
954 'upload_date': None,
955 }
956
957 self.report_extraction(video_id)
799c0763 958 xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
59ae15a5
PH
959 try:
960 metaXml = compat_urllib_request.urlopen(xmlUrl).read()
961 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
0c021ad1 962 raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
59ae15a5
PH
963
964 mdoc = xml.etree.ElementTree.fromstring(metaXml)
965 try:
966 videoNode = mdoc.findall('./video')[0]
967 info['description'] = videoNode.findall('./description')[0].text
968 info['title'] = videoNode.findall('./caption')[0].text
59ae15a5 969 info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
799c0763 970 manifest_url = videoNode.findall('./file')[0].text
59ae15a5 971 except IndexError:
0c021ad1 972 raise ExtractorError(u'Invalid metadata XML file')
59ae15a5 973
799c0763
PH
974 manifest_url += '?hdcore=2.10.3'
975 self.report_manifest(video_id)
976 try:
977 manifestXml = compat_urllib_request.urlopen(manifest_url).read()
978 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
0c021ad1 979 raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
799c0763
PH
980
981 adoc = xml.etree.ElementTree.fromstring(manifestXml)
982 try:
983 media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
984 node_id = media_node.attrib['url']
985 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
986 except IndexError as err:
0c021ad1 987 raise ExtractorError(u'Invalid manifest file')
799c0763
PH
988
989 url_pr = compat_urllib_parse_urlparse(manifest_url)
990 url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
991
992 info['url'] = url
993 info['ext'] = 'f4f'
59ae15a5 994 return [info]
d77c3dfd
FV
995
996
997class XVideosIE(InfoExtractor):
59ae15a5 998 """Information extractor for xvideos.com"""
d77c3dfd 999
59ae15a5
PH
1000 _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
1001 IE_NAME = u'xvideos'
d77c3dfd 1002
59ae15a5
PH
1003 def _real_extract(self, url):
1004 mobj = re.match(self._VALID_URL, url)
1005 if mobj is None:
0c021ad1 1006 raise ExtractorError(u'Invalid URL: %s' % url)
8588a86f 1007 video_id = mobj.group(1)
d77c3dfd 1008
5f955171 1009 webpage = self._download_webpage(url, video_id)
d77c3dfd 1010
59ae15a5 1011 self.report_extraction(video_id)
d77c3dfd 1012
59ae15a5 1013 # Extract video URL
ac3e9394
AB
1014 video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&',
1015 webpage, u'video URL'))
d77c3dfd 1016
59ae15a5 1017 # Extract title
979a9dd4 1018 video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID',
ac3e9394 1019 webpage, u'title')
d77c3dfd 1020
59ae15a5 1021 # Extract video thumbnail
ac3e9394
AB
1022 video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)',
1023 webpage, u'thumbnail', fatal=False)
d77c3dfd 1024
59ae15a5
PH
1025 info = {
1026 'id': video_id,
1027 'url': video_url,
1028 'uploader': None,
1029 'upload_date': None,
1030 'title': video_title,
1031 'ext': 'flv',
1032 'thumbnail': video_thumbnail,
1033 'description': None,
1034 }
d77c3dfd 1035
59ae15a5 1036 return [info]
d77c3dfd
FV
1037
1038
1039class SoundcloudIE(InfoExtractor):
59ae15a5
PH
1040 """Information extractor for soundcloud.com
1041 To access the media, the uid of the song and a stream token
1042 must be extracted from the page source and the script must make
1043 a request to media.soundcloud.com/crossdomain.xml. Then
1044 the media can be grabbed by requesting from an url composed
1045 of the stream token and uid
1046 """
1047
1048 _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
1049 IE_NAME = u'soundcloud'
1050
8fd3afd5 1051 def report_resolve(self, video_id):
59ae15a5 1052 """Report information extraction."""
f17ce13a 1053 self.to_screen(u'%s: Resolving id' % video_id)
59ae15a5 1054
59ae15a5
PH
1055 def _real_extract(self, url):
1056 mobj = re.match(self._VALID_URL, url)
1057 if mobj is None:
0c021ad1 1058 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
1059
1060 # extract uploader (which is in the url)
15c8d833 1061 uploader = mobj.group(1)
59ae15a5 1062 # extract simple title (uploader + slug of song title)
15c8d833 1063 slug_title = mobj.group(2)
59ae15a5 1064 simple_title = uploader + u'-' + slug_title
46bfb422 1065 full_title = '%s/%s' % (uploader, slug_title)
59ae15a5 1066
46bfb422 1067 self.report_resolve(full_title)
59ae15a5 1068
8fd3afd5
PH
1069 url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
1070 resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
46bfb422 1071 info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON')
59ae15a5 1072
8fd3afd5
PH
1073 info = json.loads(info_json)
1074 video_id = info['id']
46bfb422 1075 self.report_extraction(full_title)
59ae15a5 1076
8fd3afd5 1077 streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
46bfb422
JMF
1078 stream_json = self._download_webpage(streams_url, full_title,
1079 u'Downloading stream definitions',
1080 u'unable to download stream definitions')
59ae15a5 1081
8fd3afd5 1082 streams = json.loads(stream_json)
c7214f9a 1083 mediaURL = streams['http_mp3_128_url']
bf50b038 1084 upload_date = unified_strdate(info['created_at'])
59ae15a5
PH
1085
1086 return [{
c7214f9a 1087 'id': info['id'],
59ae15a5 1088 'url': mediaURL,
c7214f9a 1089 'uploader': info['user']['username'],
bf50b038 1090 'upload_date': upload_date,
c7214f9a 1091 'title': info['title'],
59ae15a5 1092 'ext': u'mp3',
c7214f9a 1093 'description': info['description'],
59ae15a5 1094 }]
d77c3dfd 1095
5011cded 1096class SoundcloudSetIE(InfoExtractor):
1097 """Information extractor for soundcloud.com sets
1098 To access the media, the uid of the song and a stream token
1099 must be extracted from the page source and the script must make
1100 a request to media.soundcloud.com/crossdomain.xml. Then
1101 the media can be grabbed by requesting from an url composed
1102 of the stream token and uid
1103 """
1104
1105 _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
50f6412e 1106 IE_NAME = u'soundcloud:set'
5011cded 1107
5011cded 1108 def report_resolve(self, video_id):
1109 """Report information extraction."""
f17ce13a 1110 self.to_screen(u'%s: Resolving id' % video_id)
5011cded 1111
5011cded 1112 def _real_extract(self, url):
1113 mobj = re.match(self._VALID_URL, url)
1114 if mobj is None:
0c021ad1 1115 raise ExtractorError(u'Invalid URL: %s' % url)
5011cded 1116
1117 # extract uploader (which is in the url)
1118 uploader = mobj.group(1)
1119 # extract simple title (uploader + slug of song title)
1120 slug_title = mobj.group(2)
1121 simple_title = uploader + u'-' + slug_title
46bfb422 1122 full_title = '%s/sets/%s' % (uploader, slug_title)
5011cded 1123
46bfb422 1124 self.report_resolve(full_title)
5011cded 1125
1126 url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
1127 resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
46bfb422 1128 info_json = self._download_webpage(resolv_url, full_title)
5011cded 1129
1130 videos = []
1131 info = json.loads(info_json)
1132 if 'errors' in info:
1133 for err in info['errors']:
613bf669 1134 self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message']))
5011cded 1135 return
1136
46bfb422 1137 self.report_extraction(full_title)
5011cded 1138 for track in info['tracks']:
1139 video_id = track['id']
5011cded 1140
1141 streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
46bfb422 1142 stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON')
5011cded 1143
46bfb422 1144 self.report_extraction(video_id)
5011cded 1145 streams = json.loads(stream_json)
1146 mediaURL = streams['http_mp3_128_url']
1147
1148 videos.append({
1149 'id': video_id,
1150 'url': mediaURL,
1151 'uploader': track['user']['username'],
fe348844 1152 'upload_date': unified_strdate(track['created_at']),
5011cded 1153 'title': track['title'],
1154 'ext': u'mp3',
1155 'description': track['description'],
1156 })
1157 return videos
1158
d77c3dfd
FV
1159
1160class InfoQIE(InfoExtractor):
59ae15a5 1161 """Information extractor for infoq.com"""
59ae15a5 1162 _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
59ae15a5 1163
59ae15a5
PH
1164 def _real_extract(self, url):
1165 mobj = re.match(self._VALID_URL, url)
1166 if mobj is None:
0c021ad1 1167 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5 1168
4fcca4bb 1169 webpage = self._download_webpage(url, video_id=url)
59ae15a5
PH
1170 self.report_extraction(url)
1171
59ae15a5 1172 # Extract video URL
a3d689cf 1173 mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
59ae15a5 1174 if mobj is None:
0c021ad1 1175 raise ExtractorError(u'Unable to extract video url')
4fcca4bb
PH
1176 real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
1177 video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
59ae15a5
PH
1178
1179 # Extract title
ac3e9394
AB
1180 video_title = self._search_regex(r'contentTitle = "(.*?)";',
1181 webpage, u'title')
59ae15a5
PH
1182
1183 # Extract description
979a9dd4 1184 video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
ac3e9394 1185 webpage, u'description', fatal=False)
59ae15a5
PH
1186
1187 video_filename = video_url.split('/')[-1]
1188 video_id, extension = video_filename.split('.')
1189
1190 info = {
1191 'id': video_id,
1192 'url': video_url,
1193 'uploader': None,
1194 'upload_date': None,
1195 'title': video_title,
1196 'ext': extension, # Extension is always(?) mp4, but seems to be flv
1197 'thumbnail': None,
1198 'description': video_description,
1199 }
1200
1201 return [info]
d77c3dfd
FV
1202
1203class MixcloudIE(InfoExtractor):
59ae15a5 1204 """Information extractor for www.mixcloud.com"""
93702113
FV
1205
1206 _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
59ae15a5
PH
1207 _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
1208 IE_NAME = u'mixcloud'
1209
59ae15a5
PH
1210 def report_download_json(self, file_id):
1211 """Report JSON download."""
f17ce13a 1212 self.to_screen(u'Downloading json')
59ae15a5 1213
59ae15a5
PH
1214 def get_urls(self, jsonData, fmt, bitrate='best'):
1215 """Get urls from 'audio_formats' section in json"""
1216 file_url = None
1217 try:
1218 bitrate_list = jsonData[fmt]
1219 if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
1220 bitrate = max(bitrate_list) # select highest
1221
1222 url_list = jsonData[fmt][bitrate]
1223 except TypeError: # we have no bitrate info.
1224 url_list = jsonData[fmt]
1225 return url_list
1226
1227 def check_urls(self, url_list):
1228 """Returns 1st active url from list"""
1229 for url in url_list:
1230 try:
1231 compat_urllib_request.urlopen(url)
1232 return url
1233 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1234 url = None
1235
1236 return None
1237
1238 def _print_formats(self, formats):
1239 print('Available formats:')
1240 for fmt in formats.keys():
1241 for b in formats[fmt]:
1242 try:
1243 ext = formats[fmt][b][0]
1244 print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
1245 except TypeError: # we have no bitrate info
1246 ext = formats[fmt][0]
1247 print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
1248 break
1249
1250 def _real_extract(self, url):
1251 mobj = re.match(self._VALID_URL, url)
1252 if mobj is None:
0c021ad1 1253 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
1254 # extract uploader & filename from url
1255 uploader = mobj.group(1).decode('utf-8')
1256 file_id = uploader + "-" + mobj.group(2).decode('utf-8')
1257
1258 # construct API request
1259 file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
1260 # retrieve .json file with links to files
1261 request = compat_urllib_request.Request(file_url)
1262 try:
1263 self.report_download_json(file_url)
1264 jsonData = compat_urllib_request.urlopen(request).read()
1265 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
0c021ad1 1266 raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))
59ae15a5
PH
1267
1268 # parse JSON
1269 json_data = json.loads(jsonData)
1270 player_url = json_data['player_swf_url']
1271 formats = dict(json_data['audio_formats'])
1272
1273 req_format = self._downloader.params.get('format', None)
1274 bitrate = None
1275
1276 if self._downloader.params.get('listformats', None):
1277 self._print_formats(formats)
1278 return
1279
1280 if req_format is None or req_format == 'best':
1281 for format_param in formats.keys():
1282 url_list = self.get_urls(formats, format_param)
1283 # check urls
1284 file_url = self.check_urls(url_list)
1285 if file_url is not None:
1286 break # got it!
1287 else:
99b0a129 1288 if req_format not in formats:
0c021ad1 1289 raise ExtractorError(u'Format is not available')
59ae15a5
PH
1290
1291 url_list = self.get_urls(formats, req_format)
1292 file_url = self.check_urls(url_list)
1293 format_param = req_format
1294
1295 return [{
1296 'id': file_id.decode('utf-8'),
1297 'url': file_url.decode('utf-8'),
1298 'uploader': uploader.decode('utf-8'),
1299 'upload_date': None,
1300 'title': json_data['name'],
1301 'ext': file_url.split('.')[-1].decode('utf-8'),
1302 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
1303 'thumbnail': json_data['thumbnail_url'],
1304 'description': json_data['description'],
1305 'player_url': player_url.decode('utf-8'),
1306 }]
d77c3dfd
FV
1307
1308class StanfordOpenClassroomIE(InfoExtractor):
59ae15a5
PH
1309 """Information extractor for Stanford's Open ClassRoom"""
1310
1311 _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
1312 IE_NAME = u'stanfordoc'
1313
59ae15a5
PH
1314 def _real_extract(self, url):
1315 mobj = re.match(self._VALID_URL, url)
1316 if mobj is None:
f0bad2b0 1317 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
1318
1319 if mobj.group('course') and mobj.group('video'): # A specific video
1320 course = mobj.group('course')
1321 video = mobj.group('video')
1322 info = {
1323 'id': course + '_' + video,
1324 'uploader': None,
1325 'upload_date': None,
1326 }
1327
1328 self.report_extraction(info['id'])
1329 baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
1330 xmlUrl = baseUrl + video + '.xml'
1331 try:
1332 metaXml = compat_urllib_request.urlopen(xmlUrl).read()
1333 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
0c021ad1 1334 raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
59ae15a5
PH
1335 mdoc = xml.etree.ElementTree.fromstring(metaXml)
1336 try:
1337 info['title'] = mdoc.findall('./title')[0].text
1338 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
1339 except IndexError:
0c021ad1 1340 raise ExtractorError(u'Invalid metadata XML file')
59ae15a5
PH
1341 info['ext'] = info['url'].rpartition('.')[2]
1342 return [info]
1343 elif mobj.group('course'): # A course page
1344 course = mobj.group('course')
1345 info = {
1346 'id': course,
1347 'type': 'playlist',
1348 'uploader': None,
1349 'upload_date': None,
1350 }
1351
f0bad2b0
PH
1352 coursepage = self._download_webpage(url, info['id'],
1353 note='Downloading course info page',
1354 errnote='Unable to download course info page')
59ae15a5 1355
979a9dd4 1356 info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
59ae15a5 1357
979a9dd4 1358 info['description'] = self._html_search_regex('<description>([^<]+)</description>',
ac3e9394 1359 coursepage, u'description', fatal=False)
59ae15a5
PH
1360
1361 links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
1362 info['list'] = [
1363 {
1364 'type': 'reference',
1365 'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
1366 }
1367 for vpage in links]
1368 results = []
1369 for entry in info['list']:
1370 assert entry['type'] == 'reference'
1371 results += self.extract(entry['url'])
1372 return results
59ae15a5
PH
1373 else: # Root page
1374 info = {
1375 'id': 'Stanford OpenClassroom',
1376 'type': 'playlist',
1377 'uploader': None,
1378 'upload_date': None,
1379 }
1380
1381 self.report_download_webpage(info['id'])
1382 rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
1383 try:
1384 rootpage = compat_urllib_request.urlopen(rootURL).read()
1385 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
0c021ad1 1386 raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
59ae15a5
PH
1387
1388 info['title'] = info['id']
1389
1390 links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
1391 info['list'] = [
1392 {
1393 'type': 'reference',
1394 'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
1395 }
1396 for cpage in links]
1397
1398 results = []
1399 for entry in info['list']:
1400 assert entry['type'] == 'reference'
1401 results += self.extract(entry['url'])
1402 return results
d77c3dfd
FV
1403
1404class MTVIE(InfoExtractor):
59ae15a5
PH
1405 """Information extractor for MTV.com"""
1406
1407 _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
1408 IE_NAME = u'mtv'
1409
59ae15a5
PH
1410 def _real_extract(self, url):
1411 mobj = re.match(self._VALID_URL, url)
1412 if mobj is None:
0c021ad1 1413 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
1414 if not mobj.group('proto'):
1415 url = 'http://' + url
1416 video_id = mobj.group('videoid')
59ae15a5 1417
5f955171 1418 webpage = self._download_webpage(url, video_id)
59ae15a5 1419
979a9dd4 1420 song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
ac3e9394 1421 webpage, u'song name', fatal=False)
59ae15a5 1422
979a9dd4 1423 video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>',
ac3e9394 1424 webpage, u'title')
59ae15a5 1425
979a9dd4 1426 mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>',
ac3e9394
AB
1427 webpage, u'mtvn_uri', fatal=False)
1428
1429 content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);',
1430 webpage, u'content id', fatal=False)
59ae15a5
PH
1431
1432 videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
1433 self.report_extraction(video_id)
1434 request = compat_urllib_request.Request(videogen_url)
1435 try:
1436 metadataXml = compat_urllib_request.urlopen(request).read()
1437 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
0c021ad1 1438 raise ExtractorError(u'Unable to download video metadata: %s' % compat_str(err))
59ae15a5
PH
1439
1440 mdoc = xml.etree.ElementTree.fromstring(metadataXml)
1441 renditions = mdoc.findall('.//rendition')
1442
1443 # For now, always pick the highest quality.
1444 rendition = renditions[-1]
1445
1446 try:
1447 _,_,ext = rendition.attrib['type'].partition('/')
1448 format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
1449 video_url = rendition.find('./src').text
1450 except KeyError:
0c021ad1 1451 raise ExtractorError('Invalid rendition field.')
59ae15a5
PH
1452
1453 info = {
1454 'id': video_id,
1455 'url': video_url,
1456 'uploader': performer,
1457 'upload_date': None,
1458 'title': video_title,
1459 'ext': ext,
1460 'format': format,
1461 }
1462
1463 return [info]
6de7ef9b 1464
302efc19 1465
302efc19 1466class YoukuIE(InfoExtractor):
59ae15a5 1467 _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
59ae15a5 1468
59ae15a5
PH
1469 def _gen_sid(self):
1470 nowTime = int(time.time() * 1000)
1471 random1 = random.randint(1000,1998)
1472 random2 = random.randint(1000,9999)
1473
1474 return "%d%d%d" %(nowTime,random1,random2)
1475
1476 def _get_file_ID_mix_string(self, seed):
1477 mixed = []
1478 source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
1479 seed = float(seed)
1480 for i in range(len(source)):
1481 seed = (seed * 211 + 30031 ) % 65536
1482 index = math.floor(seed / 65536 * len(source) )
1483 mixed.append(source[int(index)])
1484 source.remove(source[int(index)])
1485 #return ''.join(mixed)
1486 return mixed
1487
1488 def _get_file_id(self, fileId, seed):
1489 mixed = self._get_file_ID_mix_string(seed)
1490 ids = fileId.split('*')
1491 realId = []
1492 for ch in ids:
1493 if ch:
1494 realId.append(mixed[int(ch)])
1495 return ''.join(realId)
1496
1497 def _real_extract(self, url):
1498 mobj = re.match(self._VALID_URL, url)
1499 if mobj is None:
0c021ad1 1500 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
1501 video_id = mobj.group('ID')
1502
1503 info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
1504
46bfb422 1505 jsondata = self._download_webpage(info_url, video_id)
59ae15a5
PH
1506
1507 self.report_extraction(video_id)
1508 try:
46bfb422 1509 config = json.loads(jsondata)
59ae15a5
PH
1510
1511 video_title = config['data'][0]['title']
1512 seed = config['data'][0]['seed']
1513
1514 format = self._downloader.params.get('format', None)
1a2c3c0f 1515 supported_format = list(config['data'][0]['streamfileids'].keys())
59ae15a5
PH
1516
1517 if format is None or format == 'best':
1518 if 'hd2' in supported_format:
1519 format = 'hd2'
1520 else:
1521 format = 'flv'
1522 ext = u'flv'
1523 elif format == 'worst':
1524 format = 'mp4'
1525 ext = u'mp4'
1526 else:
1527 format = 'flv'
1528 ext = u'flv'
1529
1530
1531 fileid = config['data'][0]['streamfileids'][format]
e2a8ff24 1532 keys = [s['k'] for s in config['data'][0]['segs'][format]]
8f6f40d9 1533 except (UnicodeDecodeError, ValueError, KeyError):
0c021ad1 1534 raise ExtractorError(u'Unable to extract info section')
59ae15a5
PH
1535
1536 files_info=[]
1537 sid = self._gen_sid()
1538 fileid = self._get_file_id(fileid, seed)
1539
1540 #column 8,9 of fileid represent the segment number
1541 #fileid[7:9] should be changed
1542 for index, key in enumerate(keys):
1543
1544 temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
1545 download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
1546
1547 info = {
1548 'id': '%s_part%02d' % (video_id, index),
1549 'url': download_url,
1550 'uploader': None,
1551 'upload_date': None,
1552 'title': video_title,
1553 'ext': ext,
1554 }
1555 files_info.append(info)
1556
1557 return files_info
5dc846fa
FV
1558
1559
6de7ef9b 1560class XNXXIE(InfoExtractor):
59ae15a5
PH
1561 """Information extractor for xnxx.com"""
1562
caec7618 1563 _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
59ae15a5
PH
1564 IE_NAME = u'xnxx'
1565 VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
1566 VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
1567 VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
1568
59ae15a5
PH
1569 def _real_extract(self, url):
1570 mobj = re.match(self._VALID_URL, url)
1571 if mobj is None:
0c021ad1 1572 raise ExtractorError(u'Invalid URL: %s' % url)
bec102a8 1573 video_id = mobj.group(1)
59ae15a5 1574
59ae15a5 1575 # Get webpage content
46bfb422 1576 webpage = self._download_webpage(url, video_id)
59ae15a5 1577
ac3e9394
AB
1578 video_url = self._search_regex(self.VIDEO_URL_RE,
1579 webpage, u'video URL')
1580 video_url = compat_urllib_parse.unquote(video_url)
59ae15a5 1581
979a9dd4 1582 video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
ac3e9394 1583 webpage, u'title')
59ae15a5 1584
ac3e9394
AB
1585 video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
1586 webpage, u'thumbnail', fatal=False)
59ae15a5
PH
1587
1588 return [{
1589 'id': video_id,
1590 'url': video_url,
1591 'uploader': None,
1592 'upload_date': None,
1593 'title': video_title,
1594 'ext': 'flv',
1595 'thumbnail': video_thumbnail,
1596 'description': None,
1597 }]
fd873c69
FV
1598
1599
d443aca8 1600class GooglePlusIE(InfoExtractor):
59ae15a5
PH
1601 """Information extractor for plus.google.com."""
1602
93702113 1603 _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
59ae15a5
PH
1604 IE_NAME = u'plus.google'
1605
59ae15a5
PH
1606 def _real_extract(self, url):
1607 # Extract id from URL
1608 mobj = re.match(self._VALID_URL, url)
1609 if mobj is None:
0c021ad1 1610 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
1611
1612 post_url = mobj.group(0)
93702113 1613 video_id = mobj.group(1)
59ae15a5
PH
1614
1615 video_extension = 'flv'
1616
1617 # Step 1, Retrieve post webpage to extract further information
46bfb422 1618 webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
59ae15a5 1619
ac3e9394
AB
1620 self.report_extraction(video_id)
1621
59ae15a5 1622 # Extract update date
979a9dd4 1623 upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
ac3e9394
AB
1624 webpage, u'upload date', fatal=False)
1625 if upload_date:
59ae15a5
PH
1626 # Convert timestring to a format suitable for filename
1627 upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
1628 upload_date = upload_date.strftime('%Y%m%d')
59ae15a5
PH
1629
1630 # Extract uploader
979a9dd4 1631 uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
ac3e9394 1632 webpage, u'uploader', fatal=False)
59ae15a5
PH
1633
1634 # Extract title
1635 # Get the first line for title
979a9dd4 1636 video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
468e2e92 1637 webpage, 'title', default=u'NA')
59ae15a5
PH
1638
1639 # Step 2, Stimulate clicking the image box to launch video
5c676019 1640 video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]',
ac3e9394 1641 webpage, u'video page URL')
46bfb422 1642 webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
59ae15a5
PH
1643
1644 # Extract video links on video page
1645 """Extract video links of all sizes"""
1646 pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
1647 mobj = re.findall(pattern, webpage)
1648 if len(mobj) == 0:
486f0c94 1649 raise ExtractorError(u'Unable to extract video links')
59ae15a5
PH
1650
1651 # Sort in resolution
1652 links = sorted(mobj)
1653
1654 # Choose the lowest of the sort, i.e. highest resolution
1655 video_url = links[-1]
1656 # Only get the url. The resolution part in the tuple has no use anymore
1657 video_url = video_url[-1]
1658 # Treat escaped \u0026 style hex
93702113
FV
1659 try:
1660 video_url = video_url.decode("unicode_escape")
1661 except AttributeError: # Python 3
1662 video_url = bytes(video_url, 'ascii').decode('unicode-escape')
59ae15a5
PH
1663
1664
1665 return [{
93702113 1666 'id': video_id,
59ae15a5 1667 'url': video_url,
93702113
FV
1668 'uploader': uploader,
1669 'upload_date': upload_date,
1670 'title': video_title,
1671 'ext': video_extension,
59ae15a5 1672 }]
4cc3d074
PH
1673
1674class NBAIE(InfoExtractor):
be95cac1 1675 _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
4cc3d074
PH
1676 IE_NAME = u'nba'
1677
4cc3d074
PH
1678 def _real_extract(self, url):
1679 mobj = re.match(self._VALID_URL, url)
1680 if mobj is None:
0c021ad1 1681 raise ExtractorError(u'Invalid URL: %s' % url)
4cc3d074
PH
1682
1683 video_id = mobj.group(1)
4cc3d074 1684
5f955171 1685 webpage = self._download_webpage(url, video_id)
4cc3d074
PH
1686
1687 video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
4cc3d074
PH
1688
1689 shortened_video_id = video_id.rpartition('/')[2]
979a9dd4 1690 title = self._html_search_regex(r'<meta property="og:title" content="(.*?)"',
468e2e92
FV
1691 webpage, 'title', default=shortened_video_id).replace('NBA.com: ', '')
1692
be95cac1 1693 # It isn't there in the HTML it returns to us
979a9dd4 1694 # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
468e2e92 1695
979a9dd4 1696 description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
468e2e92 1697
4cc3d074
PH
1698 info = {
1699 'id': shortened_video_id,
1700 'url': video_url,
1701 'ext': 'mp4',
1702 'title': title,
be95cac1 1703 # 'uploader_date': uploader_date,
468e2e92 1704 'description': description,
4cc3d074
PH
1705 }
1706 return [info]
0b40544f
DV
1707
1708class JustinTVIE(InfoExtractor):
1709 """Information extractor for justin.tv and twitch.tv"""
2ab1c5ed
DV
1710 # TODO: One broadcast may be split into multiple videos. The key
1711 # 'broadcast_id' is the same for all parts, and 'broadcast_part'
1712 # starts at 1 and increases. Can we treat all parts as one video?
1713
4096b609 1714 _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
0e16f094
PH
1715 (?:
1716 (?P<channelid>[^/]+)|
1717 (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
1718 (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
1719 )
1720 /?(?:\#.*)?$
1721 """
4096b609 1722 _JUSTIN_PAGE_LIMIT = 100
0b40544f
DV
1723 IE_NAME = u'justin.tv'
1724
4096b609
DV
1725 def report_download_page(self, channel, offset):
1726 """Report attempt to download a single page of videos."""
f17ce13a
JMF
1727 self.to_screen(u'%s: Downloading video information from %d to %d' %
1728 (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
4096b609 1729
2ab1c5ed 1730 # Return count of items, list of *valid* items
46bfb422
JMF
1731 def _parse_page(self, url, video_id):
1732 webpage = self._download_webpage(url, video_id,
1733 u'Downloading video info JSON',
1734 u'unable to download video info JSON')
cdb30764 1735
0b40544f 1736 response = json.loads(webpage)
fa1bf9c6 1737 if type(response) != list:
1738 error_text = response.get('error', 'unknown error')
decd1d17 1739 raise ExtractorError(u'Justin.tv API: %s' % error_text)
0b40544f
DV
1740 info = []
1741 for clip in response:
1742 video_url = clip['video_file_url']
1743 if video_url:
1744 video_extension = os.path.splitext(video_url)[1][1:]
fa1bf9c6 1745 video_date = re.sub('-', '', clip['start_time'][:10])
1746 video_uploader_id = clip.get('user_id', clip.get('channel_id'))
97f194c1
PH
1747 video_id = clip['id']
1748 video_title = clip.get('title', video_id)
0b40544f 1749 info.append({
97f194c1 1750 'id': video_id,
0b40544f 1751 'url': video_url,
97f194c1 1752 'title': video_title,
fa1bf9c6 1753 'uploader': clip.get('channel_name', video_uploader_id),
1754 'uploader_id': video_uploader_id,
0b40544f
DV
1755 'upload_date': video_date,
1756 'ext': video_extension,
1757 })
2ab1c5ed
DV
1758 return (len(response), info)
1759
1760 def _real_extract(self, url):
1761 mobj = re.match(self._VALID_URL, url)
1762 if mobj is None:
0e16f094 1763 raise ExtractorError(u'invalid URL: %s' % url)
cdb30764 1764
0e16f094 1765 api_base = 'http://api.justin.tv'
2ab1c5ed 1766 paged = False
0e16f094 1767 if mobj.group('channelid'):
2ab1c5ed 1768 paged = True
0e16f094
PH
1769 video_id = mobj.group('channelid')
1770 api = api_base + '/channel/archives/%s.json' % video_id
1771 elif mobj.group('chapterid'):
1772 chapter_id = mobj.group('chapterid')
0e16f094
PH
1773
1774 webpage = self._download_webpage(url, chapter_id)
1775 m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
1776 if not m:
f4f31688 1777 raise ExtractorError(u'Cannot find archive of a chapter')
0e16f094 1778 archive_id = m.group(1)
f4f31688
PH
1779
1780 api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
1781 chapter_info_xml = self._download_webpage(api, chapter_id,
1782 note=u'Downloading chapter information',
1783 errnote=u'Chapter information download failed')
1784 doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
1785 for a in doc.findall('.//archive'):
1786 if archive_id == a.find('./id').text:
1787 break
1788 else:
1789 raise ExtractorError(u'Could not find chapter in chapter information')
1790
1791 video_url = a.find('./video_file_url').text
1792 video_ext = video_url.rpartition('.')[2] or u'flv'
1793
db8fd71c 1794 chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
c43e5724 1795 chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
db8fd71c
PH
1796 note='Downloading chapter metadata',
1797 errnote='Download of chapter metadata failed')
1798 chapter_info = json.loads(chapter_info_json)
1799
4539dd30
PH
1800 bracket_start = int(doc.find('.//bracket_start').text)
1801 bracket_end = int(doc.find('.//bracket_end').text)
c43e5724 1802
f4f31688
PH
1803 # TODO determine start (and probably fix up file)
1804 # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
4539dd30
PH
1805 #video_url += u'?start=' + TODO:start_timestamp
1806 # bracket_start is 13290, but we want 51670615
1807 self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
1808 u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
0e16f094 1809
f4f31688
PH
1810 info = {
1811 'id': u'c' + chapter_id,
1812 'url': video_url,
1813 'ext': video_ext,
db8fd71c
PH
1814 'title': chapter_info['title'],
1815 'thumbnail': chapter_info['preview'],
1816 'description': chapter_info['description'],
c43e5724
PH
1817 'uploader': chapter_info['channel']['display_name'],
1818 'uploader_id': chapter_info['channel']['name'],
f4f31688
PH
1819 }
1820 return [info]
2ab1c5ed 1821 else:
0e16f094
PH
1822 video_id = mobj.group('videoid')
1823 api = api_base + '/broadcast/by_archive/%s.json' % video_id
cdb30764 1824
2ab1c5ed 1825 self.report_extraction(video_id)
cdb30764 1826
2ab1c5ed
DV
1827 info = []
1828 offset = 0
4096b609
DV
1829 limit = self._JUSTIN_PAGE_LIMIT
1830 while True:
1831 if paged:
1832 self.report_download_page(video_id, offset)
2ab1c5ed 1833 page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
46bfb422 1834 page_count, page_info = self._parse_page(page_url, video_id)
2ab1c5ed
DV
1835 info.extend(page_info)
1836 if not paged or page_count != limit:
1837 break
1838 offset += limit
0b40544f 1839 return info
21a9c6aa
PH
1840
1841class FunnyOrDieIE(InfoExtractor):
1842 _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
21a9c6aa 1843
21a9c6aa
PH
1844 def _real_extract(self, url):
1845 mobj = re.match(self._VALID_URL, url)
1846 if mobj is None:
decd1d17 1847 raise ExtractorError(u'invalid URL: %s' % url)
21a9c6aa
PH
1848
1849 video_id = mobj.group('id')
5f955171 1850 webpage = self._download_webpage(url, video_id)
21a9c6aa 1851
979a9dd4 1852 video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
ac3e9394 1853 webpage, u'video URL', flags=re.DOTALL)
21a9c6aa 1854
979a9dd4 1855 title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
468e2e92 1856 r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
21a9c6aa 1857
979a9dd4 1858 video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
84095012 1859 webpage, u'description', fatal=False, flags=re.DOTALL)
21a9c6aa
PH
1860
1861 info = {
1862 'id': video_id,
1863 'url': video_url,
1864 'ext': 'mp4',
1865 'title': title,
ac3e9394 1866 'description': video_description,
21a9c6aa
PH
1867 }
1868 return [info]
d0d4f277 1869
e314ba67 1870class SteamIE(InfoExtractor):
feecf225 1871 _VALID_URL = r"""http://store\.steampowered\.com/
4c9f7a99 1872 (agecheck/)?
e314ba67
JMF
1873 (?P<urltype>video|app)/ #If the page is only for videos or for a game
1874 (?P<gameID>\d+)/?
1875 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
1876 """
68f54207
JMF
1877 _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
1878 _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
4aeae91f 1879
89de9eb1
FV
1880 @classmethod
1881 def suitable(cls, url):
e314ba67 1882 """Receives a URL and returns True if suitable for this IE."""
89de9eb1 1883 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
5f955171 1884
e314ba67
JMF
1885 def _real_extract(self, url):
1886 m = re.match(self._VALID_URL, url, re.VERBOSE)
e314ba67 1887 gameID = m.group('gameID')
68f54207
JMF
1888
1889 videourl = self._VIDEO_PAGE_TEMPLATE % gameID
5f955171 1890 webpage = self._download_webpage(videourl, gameID)
68f54207
JMF
1891
1892 if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
1893 videourl = self._AGECHECK_TEMPLATE % gameID
1894 self.report_age_confirmation()
1895 webpage = self._download_webpage(videourl, gameID)
1896
1897 self.report_extraction(gameID)
1898 game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
1899 webpage, 'game title')
1900
9e1cf0c2 1901 urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
e314ba67 1902 mweb = re.finditer(urlRE, webpage)
5e9d042d
JMF
1903 namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
1904 titles = re.finditer(namesRE, webpage)
60bd48b1
JMF
1905 thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
1906 thumbs = re.finditer(thumbsRE, webpage)
e314ba67 1907 videos = []
60bd48b1 1908 for vid,vtitle,thumb in zip(mweb,titles,thumbs):
e314ba67 1909 video_id = vid.group('videoID')
5f955171
PH
1910 title = vtitle.group('videoName')
1911 video_url = vid.group('videoURL')
60bd48b1 1912 video_thumb = thumb.group('thumbnail')
e314ba67 1913 if not video_url:
486f0c94 1914 raise ExtractorError(u'Cannot find video url for %s' % video_id)
e314ba67
JMF
1915 info = {
1916 'id':video_id,
1917 'url':video_url,
1918 'ext': 'flv',
60bd48b1
JMF
1919 'title': unescapeHTML(title),
1920 'thumbnail': video_thumb
e314ba67
JMF
1921 }
1922 videos.append(info)
9e1cf0c2 1923 return [self.playlist_result(videos, gameID, game_title)]
ef0c8d5f 1924
278986ea 1925class UstreamIE(InfoExtractor):
ef0c8d5f 1926 _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
278986ea 1927 IE_NAME = u'ustream'
ef0c8d5f 1928
278986ea
JMF
1929 def _real_extract(self, url):
1930 m = re.match(self._VALID_URL, url)
1931 video_id = m.group('videoID')
ac3e9394 1932
278986ea 1933 video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
d830b7c2 1934 webpage = self._download_webpage(url, video_id)
ac3e9394 1935
340fa211 1936 self.report_extraction(video_id)
ac3e9394 1937
979a9dd4 1938 video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
ac3e9394
AB
1939 webpage, u'title')
1940
979a9dd4 1941 uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
ac3e9394 1942 webpage, u'uploader', fatal=False, flags=re.DOTALL)
ac3e9394 1943
979a9dd4 1944 thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
ac3e9394
AB
1945 webpage, u'thumbnail', fatal=False)
1946
278986ea 1947 info = {
ac3e9394
AB
1948 'id': video_id,
1949 'url': video_url,
278986ea 1950 'ext': 'flv',
ac3e9394 1951 'title': video_title,
340fa211 1952 'uploader': uploader,
ac3e9394
AB
1953 'thumbnail': thumbnail,
1954 }
340fa211 1955 return info
4aeae91f 1956
40634747 1957class WorldStarHipHopIE(InfoExtractor):
180e689f 1958 _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
40634747
JMS
1959 IE_NAME = u'WorldStarHipHop'
1960
1961 def _real_extract(self, url):
08ec0af7
JMS
1962 m = re.match(self._VALID_URL, url)
1963 video_id = m.group('id')
1964
ac3e9394 1965 webpage_src = self._download_webpage(url, video_id)
46bfb422 1966
ac3e9394
AB
1967 video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
1968 webpage_src, u'video URL')
46bfb422 1969
ac3e9394
AB
1970 if 'mp4' in video_url:
1971 ext = 'mp4'
40634747 1972 else:
ac3e9394 1973 ext = 'flv'
40634747 1974
979a9dd4 1975 video_title = self._html_search_regex(r"<title>(.*)</title>",
ac3e9394 1976 webpage_src, u'title')
40634747
JMS
1977
1978 # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
979a9dd4 1979 thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
ac3e9394
AB
1980 webpage_src, u'thumbnail', fatal=False)
1981
1982 if not thumbnail:
40634747
JMS
1983 _title = r"""candytitles.*>(.*)</span>"""
1984 mobj = re.search(_title, webpage_src)
1985 if mobj is not None:
ac3e9394 1986 video_title = mobj.group(1)
fa41fbd3 1987
b3bcca08 1988 results = [{
64c78d50 1989 'id': video_id,
b3bcca08 1990 'url' : video_url,
ac3e9394 1991 'title' : video_title,
b3bcca08
JMS
1992 'thumbnail' : thumbnail,
1993 'ext' : ext,
1994 }]
40634747
JMS
1995 return results
1996
ca0a0bbe
PH
1997class RBMARadioIE(InfoExtractor):
1998 _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
1999
2000 def _real_extract(self, url):
2001 m = re.match(self._VALID_URL, url)
2002 video_id = m.group('videoID')
2003
2004 webpage = self._download_webpage(url, video_id)
ac3e9394 2005
038a3a1a
JMF
2006 json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
2007 webpage, u'json data', flags=re.MULTILINE)
ca0a0bbe
PH
2008
2009 try:
2010 data = json.loads(json_data)
2011 except ValueError as e:
2012 raise ExtractorError(u'Invalid JSON: ' + str(e))
2013
2014 video_url = data['akamai_url'] + '&cbr=256'
2015 url_parts = compat_urllib_parse_urlparse(video_url)
2016 video_ext = url_parts.path.rpartition('.')[2]
2017 info = {
2018 'id': video_id,
2019 'url': video_url,
2020 'ext': video_ext,
2021 'title': data['title'],
2022 'description': data.get('teaser_text'),
2023 'location': data.get('country_of_origin'),
2024 'uploader': data.get('host', {}).get('name'),
2025 'uploader_id': data.get('host', {}).get('slug'),
187f491a 2026 'thumbnail': data.get('image', {}).get('large_url_2x'),
ca0a0bbe
PH
2027 'duration': data.get('duration'),
2028 }
2029 return [info]
4aeae91f 2030
991ba7fa
JC
2031
2032class YouPornIE(InfoExtractor):
2033 """Information extractor for youporn.com."""
991ba7fa 2034 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
6324fd1d 2035
991ba7fa
JC
2036 def _print_formats(self, formats):
2037 """Print all available formats"""
565f7519 2038 print(u'Available formats:')
ca6710ee
JC
2039 print(u'ext\t\tformat')
2040 print(u'---------------------------------')
991ba7fa 2041 for format in formats:
ca6710ee 2042 print(u'%s\t\t%s' % (format['ext'], format['format']))
991ba7fa
JC
2043
2044 def _specific(self, req_format, formats):
2045 for x in formats:
2046 if(x["format"]==req_format):
2047 return x
2048 return None
2049
991ba7fa
JC
2050 def _real_extract(self, url):
2051 mobj = re.match(self._VALID_URL, url)
2052 if mobj is None:
0c021ad1 2053 raise ExtractorError(u'Invalid URL: %s' % url)
ca6710ee 2054 video_id = mobj.group('videoid')
991ba7fa 2055
629fcdd1
PH
2056 req = compat_urllib_request.Request(url)
2057 req.add_header('Cookie', 'age_verified=1')
2058 webpage = self._download_webpage(req, video_id)
991ba7fa 2059
be95cac1
FV
2060 # Get JSON parameters
2061 json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
2062 try:
2063 params = json.loads(json_params)
2064 except:
2065 raise ExtractorError(u'Invalid JSON')
991ba7fa 2066
be95cac1
FV
2067 self.report_extraction(video_id)
2068 try:
2069 video_title = params['title']
2070 upload_date = unified_strdate(params['release_date_f'])
2071 video_description = params['description']
2072 video_uploader = params['submitted_by']
2073 thumbnail = params['thumbnails'][0]['image']
2074 except KeyError:
2075 raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
991ba7fa
JC
2076
2077 # Get all of the formats available
ca6710ee 2078 DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
ac3e9394
AB
2079 download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
2080 webpage, u'download list').strip()
991ba7fa
JC
2081
2082 # Get all of the links from the page
ca6710ee
JC
2083 LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
2084 links = re.findall(LINK_RE, download_list_html)
991ba7fa 2085 if(len(links) == 0):
629fcdd1 2086 raise ExtractorError(u'ERROR: no known formats available for video')
6324fd1d 2087
f17ce13a 2088 self.to_screen(u'Links found: %d' % len(links))
991ba7fa
JC
2089
2090 formats = []
2091 for link in links:
2092
2093 # A link looks like this:
2094 # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
2095 # A path looks like this:
2096 # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
ca6710ee
JC
2097 video_url = unescapeHTML( link )
2098 path = compat_urllib_parse_urlparse( video_url ).path
991ba7fa
JC
2099 extension = os.path.splitext( path )[1][1:]
2100 format = path.split('/')[4].split('_')[:2]
2101 size = format[0]
2102 bitrate = format[1]
2103 format = "-".join( format )
78d3442b 2104 # title = u'%s-%s-%s' % (video_title, size, bitrate)
991ba7fa
JC
2105
2106 formats.append({
2107 'id': video_id,
2108 'url': video_url,
2109 'uploader': video_uploader,
2110 'upload_date': upload_date,
78d3442b 2111 'title': video_title,
991ba7fa
JC
2112 'ext': extension,
2113 'format': format,
be95cac1
FV
2114 'thumbnail': thumbnail,
2115 'description': video_description
991ba7fa
JC
2116 })
2117
2118 if self._downloader.params.get('listformats', None):
2119 self._print_formats(formats)
2120 return
2121
2122 req_format = self._downloader.params.get('format', None)
f17ce13a 2123 self.to_screen(u'Format: %s' % req_format)
991ba7fa 2124
991ba7fa
JC
2125 if req_format is None or req_format == 'best':
2126 return [formats[0]]
2127 elif req_format == 'worst':
2128 return [formats[-1]]
2129 elif req_format in ('-1', 'all'):
2130 return formats
2131 else:
2132 format = self._specific( req_format, formats )
2133 if result is None:
0c021ad1 2134 raise ExtractorError(u'Requested format not available')
991ba7fa
JC
2135 return [format]
2136
6324fd1d 2137
991ba7fa
JC
2138
2139class PornotubeIE(InfoExtractor):
2140 """Information extractor for pornotube.com."""
991ba7fa 2141 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
991ba7fa 2142
991ba7fa
JC
2143 def _real_extract(self, url):
2144 mobj = re.match(self._VALID_URL, url)
2145 if mobj is None:
0c021ad1 2146 raise ExtractorError(u'Invalid URL: %s' % url)
991ba7fa 2147
ca6710ee
JC
2148 video_id = mobj.group('videoid')
2149 video_title = mobj.group('title')
991ba7fa
JC
2150
2151 # Get webpage content
ca6710ee 2152 webpage = self._download_webpage(url, video_id)
991ba7fa
JC
2153
2154 # Get the video URL
ca6710ee 2155 VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
ac3e9394
AB
2156 video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
2157 video_url = compat_urllib_parse.unquote(video_url)
991ba7fa
JC
2158
2159 #Get the uploaded date
ca6710ee 2160 VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
979a9dd4 2161 upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
ac3e9394 2162 if upload_date: upload_date = unified_strdate(upload_date)
991ba7fa
JC
2163
2164 info = {'id': video_id,
2165 'url': video_url,
2166 'uploader': None,
2167 'upload_date': upload_date,
2168 'title': video_title,
2169 'ext': 'flv',
565f7519 2170 'format': 'flv'}
991ba7fa
JC
2171
2172 return [info]
2173
991ba7fa
JC
2174class YouJizzIE(InfoExtractor):
2175 """Information extractor for youjizz.com."""
ca6710ee 2176 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
991ba7fa 2177
991ba7fa 2178 def _real_extract(self, url):
ca6710ee
JC
2179 mobj = re.match(self._VALID_URL, url)
2180 if mobj is None:
0c021ad1 2181 raise ExtractorError(u'Invalid URL: %s' % url)
ca6710ee
JC
2182
2183 video_id = mobj.group('videoid')
2184
2185 # Get webpage content
2186 webpage = self._download_webpage(url, video_id)
991ba7fa
JC
2187
2188 # Get the video title
979a9dd4 2189 video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
ac3e9394 2190 webpage, u'title').strip()
991ba7fa
JC
2191
2192 # Get the embed page
db16276b 2193 result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
991ba7fa 2194 if result is None:
db16276b 2195 raise ExtractorError(u'ERROR: unable to extract embed page')
991ba7fa 2196
ca6710ee
JC
2197 embed_page_url = result.group(0).strip()
2198 video_id = result.group('videoid')
6324fd1d 2199
ca6710ee
JC
2200 webpage = self._download_webpage(embed_page_url, video_id)
2201
991ba7fa 2202 # Get the video URL
ac3e9394
AB
2203 video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
2204 webpage, u'video URL')
991ba7fa
JC
2205
2206 info = {'id': video_id,
2207 'url': video_url,
991ba7fa
JC
2208 'title': video_title,
2209 'ext': 'flv',
2210 'format': 'flv',
991ba7fa
JC
2211 'player_url': embed_page_url}
2212
2213 return [info]
2214
ccf65f9d
PH
2215class EightTracksIE(InfoExtractor):
2216 IE_NAME = '8tracks'
25580f32 2217 _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
ccf65f9d
PH
2218
2219 def _real_extract(self, url):
2220 mobj = re.match(self._VALID_URL, url)
2221 if mobj is None:
2222 raise ExtractorError(u'Invalid URL: %s' % url)
2223 playlist_id = mobj.group('id')
2224
2225 webpage = self._download_webpage(url, playlist_id)
2226
ac3e9394 2227 json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
ccf65f9d
PH
2228 data = json.loads(json_like)
2229
2230 session = str(random.randint(0, 1000000000))
2231 mix_id = data['id']
2232 track_count = data['tracks_count']
2233 first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
2234 next_url = first_url
2235 res = []
2236 for i in itertools.count():
2237 api_json = self._download_webpage(next_url, playlist_id,
2238 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
2239 errnote=u'Failed to download song information')
2240 api_data = json.loads(api_json)
2241 track_data = api_data[u'set']['track']
2242 info = {
2243 'id': track_data['id'],
2244 'url': track_data['track_file_stream_url'],
da4de959
PH
2245 'title': track_data['performer'] + u' - ' + track_data['name'],
2246 'raw_title': track_data['name'],
2247 'uploader_id': data['user']['login'],
ccf65f9d
PH
2248 'ext': 'm4a',
2249 }
2250 res.append(info)
2251 if api_data['set']['at_last_track']:
2252 break
2253 next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
2254 return res
991ba7fa 2255
da06e2da
OK
2256class KeekIE(InfoExtractor):
2257 _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
2258 IE_NAME = u'keek'
2259
2260 def _real_extract(self, url):
2261 m = re.match(self._VALID_URL, url)
2262 video_id = m.group('videoID')
ac3e9394 2263
da06e2da
OK
2264 video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
2265 thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
2266 webpage = self._download_webpage(url, video_id)
ac3e9394 2267
979a9dd4 2268 video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
ac3e9394 2269 webpage, u'title')
ac3e9394 2270
979a9dd4 2271 uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
ac3e9394 2272 webpage, u'uploader', fatal=False)
ac3e9394 2273
da06e2da 2274 info = {
f10b2a9c
FV
2275 'id': video_id,
2276 'url': video_url,
da06e2da 2277 'ext': 'mp4',
ac3e9394 2278 'title': video_title,
da06e2da
OK
2279 'thumbnail': thumbnail,
2280 'uploader': uploader
f0877a44 2281 }
da06e2da
OK
2282 return [info]
2283
3a468f2d 2284class TEDIE(InfoExtractor):
feecf225 2285 _VALID_URL=r'''http://www\.ted\.com/
414638cd
JMF
2286 (
2287 ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
2288 |
2289 ((?P<type_talk>talks)) # We have a simple talk
2290 )
2e2038dc 2291 (/lang/(.*?))? # The url may contain the language
414638cd
JMF
2292 /(?P<name>\w+) # Here goes the name and then ".html"
2293 '''
2294
89de9eb1
FV
2295 @classmethod
2296 def suitable(cls, url):
414638cd 2297 """Receives a URL and returns True if suitable for this IE."""
89de9eb1 2298 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
414638cd 2299
3a468f2d 2300 def _real_extract(self, url):
414638cd
JMF
2301 m=re.match(self._VALID_URL, url, re.VERBOSE)
2302 if m.group('type_talk'):
2303 return [self._talk_info(url)]
2304 else :
2305 playlist_id=m.group('playlist_id')
2306 name=m.group('name')
f17ce13a 2307 self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
e905b6f8 2308 return [self._playlist_videos_info(url,name,playlist_id)]
414638cd 2309
414638cd
JMF
2310 def _playlist_videos_info(self,url,name,playlist_id=0):
2311 '''Returns the videos of the playlist'''
2312 video_RE=r'''
2313 <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
2314 ([.\s]*?)data-playlist_item_id="(\d+)"
2315 ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
2316 '''
c85538db 2317 video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
414638cd
JMF
2318 webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
2319 m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
2320 m_names=re.finditer(video_name_RE,webpage)
e905b6f8 2321
f4c8bbcf
JMF
2322 playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
2323 webpage, 'playlist title')
e905b6f8
JMF
2324
2325 playlist_entries = []
414638cd 2326 for m_video, m_name in zip(m_videos,m_names):
c85538db
JMF
2327 video_id=m_video.group('video_id')
2328 talk_url='http://www.ted.com%s' % m_name.group('talk_url')
e905b6f8
JMF
2329 playlist_entries.append(self.url_result(talk_url, 'TED'))
2330 return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title)
c85538db 2331
414638cd
JMF
2332 def _talk_info(self, url, video_id=0):
2333 """Return the video for the talk in the url"""
f4c8bbcf
JMF
2334 m = re.match(self._VALID_URL, url,re.VERBOSE)
2335 video_name = m.group('name')
2336 webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
2337 self.report_extraction(video_name)
414638cd 2338 # If the url includes the language we get the title translated
f4c8bbcf
JMF
2339 title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
2340 webpage, 'title')
2341 json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
2342 webpage, 'json data')
2343 info = json.loads(json_data)
2344 desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
2345 webpage, 'description', flags = re.DOTALL)
2346
2347 thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
2348 webpage, 'thumbnail')
3a468f2d 2349 info = {
f4c8bbcf
JMF
2350 'id': info['id'],
2351 'url': info['htmlStreams'][-1]['file'],
3a468f2d 2352 'ext': 'mp4',
c85538db 2353 'title': title,
f4c8bbcf
JMF
2354 'thumbnail': thumbnail,
2355 'description': desc,
414638cd
JMF
2356 }
2357 return info
da06e2da 2358
58994225 2359class MySpassIE(InfoExtractor):
1ad5d872 2360 _VALID_URL = r'http://www.myspass.de/.*'
6324fd1d 2361
1ad5d872 2362 def _real_extract(self, url):
2363 META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
58994225 2364
1ad5d872 2365 # video id is the last path element of the URL
2366 # usually there is a trailing slash, so also try the second but last
2367 url_path = compat_urllib_parse_urlparse(url).path
2368 url_parent_path, video_id = os.path.split(url_path)
2369 if not video_id:
2370 _, video_id = os.path.split(url_parent_path)
6324fd1d 2371
1ad5d872 2372 # get metadata
2373 metadata_url = META_DATA_URL_TEMPLATE % video_id
2374 metadata_text = self._download_webpage(metadata_url, video_id)
2375 metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
6324fd1d 2376
1ad5d872 2377 # extract values from metadata
2378 url_flv_el = metadata.find('url_flv')
2379 if url_flv_el is None:
0c021ad1 2380 raise ExtractorError(u'Unable to extract download url')
1ad5d872 2381 video_url = url_flv_el.text
2382 extension = os.path.splitext(video_url)[1][1:]
2383 title_el = metadata.find('title')
2384 if title_el is None:
0c021ad1 2385 raise ExtractorError(u'Unable to extract title')
1ad5d872 2386 title = title_el.text
2387 format_id_el = metadata.find('format_id')
2388 if format_id_el is None:
2389 format = ext
2390 else:
2391 format = format_id_el.text
2392 description_el = metadata.find('description')
2393 if description_el is not None:
2394 description = description_el.text
2395 else:
2396 description = None
2397 imagePreview_el = metadata.find('imagePreview')
2398 if imagePreview_el is not None:
2399 thumbnail = imagePreview_el.text
2400 else:
2401 thumbnail = None
2402 info = {
2403 'id': video_id,
2404 'url': video_url,
2405 'title': title,
2406 'ext': extension,
2407 'format': format,
2408 'thumbnail': thumbnail,
2409 'description': description
2410 }
2411 return [info]
2412
e32b06e9 2413class SpiegelIE(InfoExtractor):
1f46c152 2414 _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
e32b06e9
PH
2415
2416 def _real_extract(self, url):
2417 m = re.match(self._VALID_URL, url)
2418 video_id = m.group('videoID')
2419
2420 webpage = self._download_webpage(url, video_id)
ac3e9394 2421
979a9dd4 2422 video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
ac3e9394 2423 webpage, u'title')
e32b06e9
PH
2424
2425 xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
2426 xml_code = self._download_webpage(xml_url, video_id,
2427 note=u'Downloading XML', errnote=u'Failed to download XML')
2428
2429 idoc = xml.etree.ElementTree.fromstring(xml_code)
2430 last_type = idoc[-1]
2431 filename = last_type.findall('./filename')[0].text
2432 duration = float(last_type.findall('./duration')[0].text)
2433
2434 video_url = 'http://video2.spiegel.de/flash/' + filename
2435 video_ext = filename.rpartition('.')[2]
2436 info = {
2437 'id': video_id,
2438 'url': video_url,
2439 'ext': video_ext,
2440 'title': video_title,
2441 'duration': duration,
2442 }
2443 return [info]
2444
0cd35867 2445class LiveLeakIE(InfoExtractor):
43113d92 2446
0cd35867 2447 _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
43113d92 2448 IE_NAME = u'liveleak'
2449
2450 def _real_extract(self, url):
2451 mobj = re.match(self._VALID_URL, url)
2452 if mobj is None:
0c021ad1 2453 raise ExtractorError(u'Invalid URL: %s' % url)
43113d92 2454
0cd35867 2455 video_id = mobj.group('video_id')
43113d92 2456
2457 webpage = self._download_webpage(url, video_id)
2458
ac3e9394
AB
2459 video_url = self._search_regex(r'file: "(.*?)",',
2460 webpage, u'video URL')
0cd35867 2461
979a9dd4
FV
2462 video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
2463 webpage, u'title').replace('LiveLeak.com -', '').strip()
43113d92 2464
979a9dd4 2465 video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
ac3e9394 2466 webpage, u'description', fatal=False)
43113d92 2467
979a9dd4 2468 video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
ac3e9394 2469 webpage, u'uploader', fatal=False)
43113d92 2470
2471 info = {
2472 'id': video_id,
2473 'url': video_url,
2474 'ext': 'mp4',
ac3e9394
AB
2475 'title': video_title,
2476 'description': video_description,
2477 'uploader': video_uploader
43113d92 2478 }
2479
2480 return [info]
2481
f2cd958c 2482
f2cd958c 2483
c15e0241 2484class TumblrIE(InfoExtractor):
feecf225 2485 _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
c15e0241
JMF
2486
2487 def _real_extract(self, url):
2488 m_url = re.match(self._VALID_URL, url)
2489 video_id = m_url.group('id')
2490 blog = m_url.group('blog_name')
2491
2492 url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
2493 webpage = self._download_webpage(url, video_id)
2494
feecf225 2495 re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
c15e0241
JMF
2496 video = re.search(re_video, webpage)
2497 if video is None:
ac3e9394 2498 raise ExtractorError(u'Unable to extract video')
c15e0241
JMF
2499 video_url = video.group('video_url')
2500 ext = video.group('ext')
2501
ac3e9394
AB
2502 video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
2503 webpage, u'thumbnail', fatal=False) # We pick the first poster
2504 if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
c15e0241
JMF
2505
2506 # The only place where you can get a title, it's not complete,
2507 # but searching in other places doesn't work for all videos
979a9dd4 2508 video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
ac3e9394 2509 webpage, u'title', flags=re.DOTALL)
c15e0241
JMF
2510
2511 return [{'id': video_id,
2512 'url': video_url,
ac3e9394
AB
2513 'title': video_title,
2514 'thumbnail': video_thumbnail,
c15e0241
JMF
2515 'ext': ext
2516 }]
2517
aed523ec 2518class BandcampIE(InfoExtractor):
feecf225 2519 _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
aed523ec
JMF
2520
2521 def _real_extract(self, url):
2522 mobj = re.match(self._VALID_URL, url)
2523 title = mobj.group('title')
2524 webpage = self._download_webpage(url, title)
2525 # We get the link to the free download page
2526 m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
2527 if m_download is None:
ac3e9394 2528 raise ExtractorError(u'No free songs found')
0c021ad1 2529
aed523ec
JMF
2530 download_link = m_download.group(1)
2531 id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
2532 webpage, re.MULTILINE|re.DOTALL).group('id')
2533
2534 download_webpage = self._download_webpage(download_link, id,
2535 'Downloading free downloads page')
2536 # We get the dictionary of the track from some javascrip code
2537 info = re.search(r'items: (.*?),$',
2538 download_webpage, re.MULTILINE).group(1)
2539 info = json.loads(info)[0]
2540 # We pick mp3-320 for now, until format selection can be easily implemented.
2541 mp3_info = info[u'downloads'][u'mp3-320']
2542 # If we try to use this url it says the link has expired
2543 initial_url = mp3_info[u'url']
feecf225 2544 re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
aed523ec
JMF
2545 m_url = re.match(re_url, initial_url)
2546 #We build the url we will use to get the final track url
2547 # This url is build in Bandcamp in the script download_bunde_*.js
2548 request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
2549 final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
2550 # If we could correctly generate the .rand field the url would be
2551 #in the "download_url" key
2552 final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
2553
2554 track_info = {'id':id,
2555 'title' : info[u'title'],
ac3e9394
AB
2556 'ext' : 'mp3',
2557 'url' : final_url,
aed523ec 2558 'thumbnail' : info[u'thumb_url'],
ac3e9394 2559 'uploader' : info[u'artist']
aed523ec
JMF
2560 }
2561
2562 return [track_info]
2563
c34407d1 2564class RedTubeIE(InfoExtractor):
5e34d2eb
YUK
2565 """Information Extractor for redtube"""
2566 _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
5e34d2eb
YUK
2567
2568 def _real_extract(self,url):
2569 mobj = re.match(self._VALID_URL, url)
2570 if mobj is None:
c34407d1
PH
2571 raise ExtractorError(u'Invalid URL: %s' % url)
2572
5e34d2eb
YUK
2573 video_id = mobj.group('id')
2574 video_extension = 'mp4'
2575 webpage = self._download_webpage(url, video_id)
ac3e9394 2576
5e34d2eb 2577 self.report_extraction(video_id)
c34407d1 2578
979a9dd4 2579 video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
ac3e9394 2580 webpage, u'video URL')
c34407d1 2581
979a9dd4 2582 video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
ac3e9394 2583 webpage, u'title')
5e34d2eb
YUK
2584
2585 return [{
2586 'id': video_id,
2587 'url': video_url,
2588 'ext': video_extension,
2589 'title': video_title,
2590 }]
7f5bd09b 2591
2592class InaIE(InfoExtractor):
2593 """Information Extractor for Ina.fr"""
d4f76f16 2594 _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
dfb9323c 2595
7f5bd09b 2596 def _real_extract(self,url):
2597 mobj = re.match(self._VALID_URL, url)
dfb9323c 2598
7f5bd09b 2599 video_id = mobj.group('id')
dfb9323c
PH
2600 mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
2601 video_extension = 'mp4'
2602 webpage = self._download_webpage(mrss_url, video_id)
7f5bd09b 2603
ac3e9394 2604 self.report_extraction(video_id)
dfb9323c 2605
979a9dd4 2606 video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
ac3e9394
AB
2607 webpage, u'video URL')
2608
2609 video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
2610 webpage, u'title')
7f5bd09b 2611
2612 return [{
2613 'id': video_id,
2614 'url': video_url,
2615 'ext': video_extension,
2616 'title': video_title,
2617 }]
e32b06e9 2618
d4f76f16 2619class HowcastIE(InfoExtractor):
5b0d3cc0
AB
2620 """Information Extractor for Howcast.com"""
2621 _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
d4f76f16
FV
2622
2623 def _real_extract(self, url):
2624 mobj = re.match(self._VALID_URL, url)
2625
2626 video_id = mobj.group('id')
2627 webpage_url = 'http://www.howcast.com/videos/' + video_id
2628 webpage = self._download_webpage(webpage_url, video_id)
2629
b1d568f0
JMF
2630 self.report_extraction(video_id)
2631
ac3e9394
AB
2632 video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
2633 webpage, u'video URL')
d4f76f16 2634
979a9dd4 2635 video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
ac3e9394 2636 webpage, u'title')
d4f76f16 2637
979a9dd4 2638 video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
ac3e9394 2639 webpage, u'description', fatal=False)
d4f76f16 2640
979a9dd4 2641 thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
ac3e9394 2642 webpage, u'thumbnail', fatal=False)
b1d568f0 2643
d4f76f16
FV
2644 return [{
2645 'id': video_id,
2646 'url': video_url,
2647 'ext': 'mp4',
2648 'title': video_title,
2649 'description': video_description,
b1d568f0 2650 'thumbnail': thumbnail,
d4f76f16
FV
2651 }]
2652
5b0d3cc0
AB
2653class VineIE(InfoExtractor):
2654 """Information Extractor for Vine.co"""
2655 _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
2656
2657 def _real_extract(self, url):
5b0d3cc0
AB
2658 mobj = re.match(self._VALID_URL, url)
2659
2660 video_id = mobj.group('id')
2661 webpage_url = 'https://vine.co/v/' + video_id
2662 webpage = self._download_webpage(webpage_url, video_id)
2663
17bd1b2f
JMF
2664 self.report_extraction(video_id)
2665
979a9dd4 2666 video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
ac3e9394 2667 webpage, u'video URL')
5b0d3cc0 2668
979a9dd4 2669 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
ac3e9394 2670 webpage, u'title')
5b0d3cc0 2671
979a9dd4 2672 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
ac3e9394 2673 webpage, u'thumbnail', fatal=False)
17bd1b2f 2674
979a9dd4 2675 uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
ac3e9394 2676 webpage, u'uploader', fatal=False, flags=re.DOTALL)
17bd1b2f 2677
5b0d3cc0 2678 return [{
17bd1b2f
JMF
2679 'id': video_id,
2680 'url': video_url,
2681 'ext': 'mp4',
2682 'title': video_title,
2683 'thumbnail': thumbnail,
2684 'uploader': uploader,
5b0d3cc0
AB
2685 }]
2686
afef36c9
AB
2687class FlickrIE(InfoExtractor):
2688 """Information Extractor for Flickr videos"""
51d2453c 2689 _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
afef36c9
AB
2690
2691 def _real_extract(self, url):
2692 mobj = re.match(self._VALID_URL, url)
2693
2694 video_id = mobj.group('id')
2695 video_uploader_id = mobj.group('uploader_id')
2696 webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
2697 webpage = self._download_webpage(webpage_url, video_id)
2698
ac3e9394 2699 secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
afef36c9
AB
2700
2701 first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
51d2453c 2702 first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
afef36c9 2703
979a9dd4 2704 node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
ac3e9394 2705 first_xml, u'node_id')
afef36c9
AB
2706
2707 second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
51d2453c
FV
2708 second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
2709
2710 self.report_extraction(video_id)
afef36c9
AB
2711
2712 mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
2713 if mobj is None:
2714 raise ExtractorError(u'Unable to extract video url')
2715 video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
2716
979a9dd4 2717 video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
ac3e9394 2718 webpage, u'video title')
afef36c9 2719
979a9dd4 2720 video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
ac3e9394 2721 webpage, u'description', fatal=False)
afef36c9 2722
979a9dd4 2723 thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
ac3e9394 2724 webpage, u'thumbnail', fatal=False)
afef36c9
AB
2725
2726 return [{
51d2453c
FV
2727 'id': video_id,
2728 'url': video_url,
2729 'ext': 'mp4',
2730 'title': video_title,
afef36c9 2731 'description': video_description,
51d2453c 2732 'thumbnail': thumbnail,
afef36c9
AB
2733 'uploader_id': video_uploader_id,
2734 }]
2735
45014296
JMF
2736class TeamcocoIE(InfoExtractor):
2737 _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
2738
2739 def _real_extract(self, url):
2740 mobj = re.match(self._VALID_URL, url)
2741 if mobj is None:
2742 raise ExtractorError(u'Invalid URL: %s' % url)
2743 url_title = mobj.group('url_title')
2744 webpage = self._download_webpage(url, url_title)
2745
979a9dd4 2746 video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
ac3e9394 2747 webpage, u'video id')
45014296
JMF
2748
2749 self.report_extraction(video_id)
2750
979a9dd4 2751 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
ac3e9394 2752 webpage, u'title')
45014296 2753
979a9dd4 2754 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
ac3e9394 2755 webpage, u'thumbnail', fatal=False)
45014296 2756
979a9dd4 2757 video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
ac3e9394 2758 webpage, u'description', fatal=False)
45014296
JMF
2759
2760 data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
2761 data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
ac3e9394 2762
979a9dd4 2763 video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
ac3e9394 2764 data, u'video URL')
45014296
JMF
2765
2766 return [{
2767 'id': video_id,
2768 'url': video_url,
2769 'ext': 'mp4',
2770 'title': video_title,
2771 'thumbnail': thumbnail,
ac3e9394 2772 'description': video_description,
45014296 2773 }]
84095012 2774
71e458d4
YUK
2775class XHamsterIE(InfoExtractor):
2776 """Information Extractor for xHamster"""
2777 _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
2778
2779 def _real_extract(self,url):
2780 mobj = re.match(self._VALID_URL, url)
2781
2782 video_id = mobj.group('id')
84095012 2783 mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
71e458d4 2784 webpage = self._download_webpage(mrss_url, video_id)
84095012 2785
71e458d4
YUK
2786 mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
2787 if mobj is None:
2788 raise ExtractorError(u'Unable to extract media URL')
2789 if len(mobj.group('server')) == 0:
2790 video_url = compat_urllib_parse.unquote(mobj.group('file'))
2791 else:
2792 video_url = mobj.group('server')+'/key='+mobj.group('file')
2793 video_extension = video_url.split('.')[-1]
2794
979a9dd4 2795 video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
84095012 2796 webpage, u'title')
71e458d4 2797
8b59a986 2798 # Can't see the description anywhere in the UI
979a9dd4 2799 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
8b59a986
FV
2800 # webpage, u'description', fatal=False)
2801 # if video_description: video_description = unescapeHTML(video_description)
71e458d4
YUK
2802
2803 mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
84095012
FV
2804 if mobj:
2805 video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
71e458d4 2806 else:
84095012
FV
2807 video_upload_date = None
2808 self._downloader.report_warning(u'Unable to extract upload date')
71e458d4 2809
78d3442b 2810 video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
84095012
FV
2811 webpage, u'uploader id', default=u'anonymous')
2812
2813 video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
2814 webpage, u'thumbnail', fatal=False)
71e458d4
YUK
2815
2816 return [{
2817 'id': video_id,
2818 'url': video_url,
2819 'ext': video_extension,
2820 'title': video_title,
8b59a986 2821 # 'description': video_description,
71e458d4
YUK
2822 'upload_date': video_upload_date,
2823 'uploader_id': video_uploader_id,
2824 'thumbnail': video_thumbnail
2825 }]
afef36c9 2826
157b864a
YK
2827class HypemIE(InfoExtractor):
2828 """Information Extractor for hypem"""
2829 _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
2830
868d62a5 2831 def _real_extract(self, url):
157b864a
YK
2832 mobj = re.match(self._VALID_URL, url)
2833 if mobj is None:
2834 raise ExtractorError(u'Invalid URL: %s' % url)
868d62a5
FV
2835 track_id = mobj.group(1)
2836
2837 data = { 'ax': 1, 'ts': time.time() }
157b864a 2838 data_encoded = compat_urllib_parse.urlencode(data)
868d62a5 2839 complete_url = url + "?" + data_encoded
157b864a 2840 request = compat_urllib_request.Request(complete_url)
868d62a5 2841 response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
157b864a 2842 cookie = urlh.headers.get('Set-Cookie', '')
868d62a5
FV
2843
2844 self.report_extraction(track_id)
84095012 2845
979a9dd4 2846 html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
84095012 2847 response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
157b864a
YK
2848 try:
2849 track_list = json.loads(html_tracks)
868d62a5 2850 track = track_list[u'tracks'][0]
157b864a 2851 except ValueError:
868d62a5
FV
2852 raise ExtractorError(u'Hypemachine contained invalid JSON.')
2853
2854 key = track[u"key"]
2855 track_id = track[u"id"]
2856 artist = track[u"artist"]
2857 title = track[u"song"]
2858
2859 serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
157b864a
YK
2860 request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
2861 request.add_header('cookie', cookie)
868d62a5
FV
2862 song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
2863 try:
2864 song_data = json.loads(song_data_json)
2865 except ValueError:
2866 raise ExtractorError(u'Hypemachine contained invalid JSON.')
157b864a 2867 final_url = song_data[u"url"]
868d62a5 2868
157b864a 2869 return [{
868d62a5 2870 'id': track_id,
157b864a
YK
2871 'url': final_url,
2872 'ext': "mp3",
2873 'title': title,
2874 'artist': artist,
2875 }]
2876
ecb3e676
YK
2877class Vbox7IE(InfoExtractor):
2878 """Information Extractor for Vbox7"""
2879 _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
2880
2881 def _real_extract(self,url):
2882 mobj = re.match(self._VALID_URL, url)
2883 if mobj is None:
2884 raise ExtractorError(u'Invalid URL: %s' % url)
2885 video_id = mobj.group(1)
2886
2887 redirect_page, urlh = self._download_webpage_handle(url, video_id)
0251f9c9
FV
2888 new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
2889 redirect_url = urlh.geturl() + new_location
ecb3e676
YK
2890 webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
2891
0251f9c9
FV
2892 title = self._html_search_regex(r'<title>(.*)</title>',
2893 webpage, u'title').split('/')[0].strip()
ecb3e676
YK
2894
2895 ext = "flv"
2896 info_url = "http://vbox7.com/play/magare.do"
2897 data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
2898 info_request = compat_urllib_request.Request(info_url, data)
2899 info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
2900 info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
2901 if info_response is None:
2902 raise ExtractorError(u'Unable to extract the media url')
cd453d38 2903 (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
ecb3e676
YK
2904
2905 return [{
cd453d38
JMF
2906 'id': video_id,
2907 'url': final_url,
2908 'ext': ext,
2909 'title': title,
2910 'thumbnail': thumbnail_url,
ecb3e676 2911 }]
157b864a 2912
32aa88bc
JMF
2913class GametrailersIE(InfoExtractor):
2914 _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
2915
2916 def _real_extract(self, url):
2917 mobj = re.match(self._VALID_URL, url)
2918 if mobj is None:
2919 raise ExtractorError(u'Invalid URL: %s' % url)
2920 video_id = mobj.group('id')
2921 video_type = mobj.group('type')
2922 webpage = self._download_webpage(url, video_id)
2923 if video_type == 'full-episodes':
2924 mgid_re = r'data-video="(?P<mgid>mgid:.*?)"'
2925 else:
2926 mgid_re = r'data-contentId=\'(?P<mgid>mgid:.*?)\''
0251f9c9 2927 mgid = self._search_regex(mgid_re, webpage, u'mgid')
32aa88bc
JMF
2928 data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'})
2929
2930 info_page = self._download_webpage('http://www.gametrailers.com/feeds/mrss?' + data,
2931 video_id, u'Downloading video info')
2932 links_webpage = self._download_webpage('http://www.gametrailers.com/feeds/mediagen/?' + data,
2933 video_id, u'Downloading video urls info')
2934
2935 self.report_extraction(video_id)
2936 info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
2937 <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
2938 <image>.*
2939 <url>(?P<thumb>.*?)</url>.*
2940 </image>'''
2941
2942 m_info = re.search(info_re, info_page, re.VERBOSE|re.DOTALL)
2943 if m_info is None:
2944 raise ExtractorError(u'Unable to extract video info')
2945 video_title = m_info.group('title')
2946 video_description = m_info.group('description')
2947 video_thumb = m_info.group('thumb')
2948
0251f9c9
FV
2949 m_urls = list(re.finditer(r'<src>(?P<url>.*)</src>', links_webpage))
2950 if m_urls is None or len(m_urls) == 0:
32aa88bc
JMF
2951 raise ExtractError(u'Unable to extrat video url')
2952 # They are sorted from worst to best quality
0251f9c9 2953 video_url = m_urls[-1].group('url')
32aa88bc
JMF
2954
2955 return {'url': video_url,
2956 'id': video_id,
2957 'title': video_title,
2958 # Videos are actually flv not mp4
2959 'ext': 'flv',
2960 'thumbnail': video_thumb,
2961 'description': video_description,
2962 }
2963
4aeae91f
PH
2964def gen_extractors():
2965 """ Return a list of an instance of every supported extractor.
2966 The order does matter; the first extractor matched is the one handling the URL.
2967 """
2968 return [
2969 YoutubePlaylistIE(),
2970 YoutubeChannelIE(),
2971 YoutubeUserIE(),
2972 YoutubeSearchIE(),
2973 YoutubeIE(),
2974 MetacafeIE(),
2975 DailymotionIE(),
2976 GoogleSearchIE(),
2977 PhotobucketIE(),
2978 YahooIE(),
2979 YahooSearchIE(),
2980 DepositFilesIE(),
2981 FacebookIE(),
4aeae91f 2982 BlipTVIE(),
1b2b22ed 2983 BlipTVUserIE(),
4aeae91f
PH
2984 VimeoIE(),
2985 MyVideoIE(),
2986 ComedyCentralIE(),
2987 EscapistIE(),
2988 CollegeHumorIE(),
2989 XVideosIE(),
5011cded 2990 SoundcloudSetIE(),
4aeae91f
PH
2991 SoundcloudIE(),
2992 InfoQIE(),
2993 MixcloudIE(),
2994 StanfordOpenClassroomIE(),
2995 MTVIE(),
2996 YoukuIE(),
2997 XNXXIE(),
18be482a
JC
2998 YouJizzIE(),
2999 PornotubeIE(),
3000 YouPornIE(),
4aeae91f
PH
3001 GooglePlusIE(),
3002 ArteTvIE(),
3003 NBAIE(),
40634747 3004 WorldStarHipHopIE(),
4aeae91f
PH
3005 JustinTVIE(),
3006 FunnyOrDieIE(),
4aeae91f
PH
3007 SteamIE(),
3008 UstreamIE(),
ca0a0bbe 3009 RBMARadioIE(),
ccf65f9d 3010 EightTracksIE(),
da06e2da 3011 KeekIE(),
3a468f2d 3012 TEDIE(),
58994225 3013 MySpassIE(),
e32b06e9 3014 SpiegelIE(),
0cd35867 3015 LiveLeakIE(),
df2dedee 3016 ARDIE(),
f2cd958c 3017 ZDFIE(),
c15e0241 3018 TumblrIE(),
aed523ec 3019 BandcampIE(),
c34407d1 3020 RedTubeIE(),
dfb9323c 3021 InaIE(),
d4f76f16 3022 HowcastIE(),
5b0d3cc0 3023 VineIE(),
afef36c9 3024 FlickrIE(),
45014296 3025 TeamcocoIE(),
71e458d4 3026 XHamsterIE(),
157b864a 3027 HypemIE(),
ecb3e676 3028 Vbox7IE(),
32aa88bc 3029 GametrailersIE(),
6b3f5a32 3030 StatigramIE(),
4aeae91f
PH
3031 GenericIE()
3032 ]
93412126
JMF
3033
3034def get_info_extractor(ie_name):
3035 """Returns the info extractor class with the given ie_name"""
3036 return globals()[ie_name+'IE']