]> jfr.im git - yt-dlp.git/blame - youtube_dl/InfoExtractors.py
Merge remote-tracking branch 'jaimeMF/color_error_messages'
[yt-dlp.git] / youtube_dl / InfoExtractors.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
9e8056d5
PH
4from __future__ import absolute_import
5
4fcca4bb 6import base64
d77c3dfd 7import datetime
ccf65f9d 8import itertools
d77c3dfd
FV
9import netrc
10import os
11import re
12import socket
13import time
d77c3dfd 14import email.utils
921a1455 15import xml.etree.ElementTree
302efc19 16import random
17import math
6324fd1d 18import operator
d77c3dfd 19
9e8056d5 20from .utils import *
d77c3dfd
FV
21
22
23class InfoExtractor(object):
59ae15a5 24 """Information Extractor class.
d77c3dfd 25
59ae15a5
PH
26 Information extractors are the classes that, given a URL, extract
27 information about the video (or videos) the URL refers to. This
28 information includes the real video URL, the video title, author and
cdb30764 29 others. The information is stored in a dictionary which is then
59ae15a5
PH
30 passed to the FileDownloader. The FileDownloader processes this
31 information possibly downloading the video to the file system, among
32 other possible outcomes.
717b1f72 33
59ae15a5 34 The dictionaries must include the following fields:
717b1f72 35
59ae15a5
PH
36 id: Video identifier.
37 url: Final video URL.
59ae15a5
PH
38 title: Video title, unescaped.
39 ext: Video filename extension.
717b1f72 40
59ae15a5 41 The following fields are optional:
717b1f72 42
59ae15a5
PH
43 format: The video format, defaults to ext (used for --get-format)
44 thumbnail: Full URL to a video thumbnail image.
45 description: One-line video description.
539679c7
PH
46 uploader: Full name of the video uploader.
47 upload_date: Video upload date (YYYYMMDD).
77c4beab 48 uploader_id: Nickname or id of the video uploader.
6119f78c 49 location: Physical location of the video.
59ae15a5 50 player_url: SWF Player URL (used for rtmpdump).
553d0974 51 subtitles: The subtitle file contents.
59ae15a5
PH
52 urlhandle: [internal] The urlHandle to be used to download the file,
53 like returned by urllib.request.urlopen
d77c3dfd 54
59ae15a5 55 The fields should all be Unicode strings.
9ce5d9ee 56
59ae15a5
PH
57 Subclasses of this one should re-define the _real_initialize() and
58 _real_extract() methods and define a _VALID_URL regexp.
59 Probably, they should also be added to the list of extractors.
717b1f72 60
59ae15a5
PH
61 _real_extract() must return a *list* of information dictionaries as
62 described above.
03c5b0fb 63
59ae15a5
PH
64 Finally, the _WORKING attribute should be set to False for broken IEs
65 in order to warn the users and skip the tests.
66 """
d77c3dfd 67
59ae15a5
PH
68 _ready = False
69 _downloader = None
70 _WORKING = True
d77c3dfd 71
59ae15a5
PH
72 def __init__(self, downloader=None):
73 """Constructor. Receives an optional downloader."""
74 self._ready = False
75 self.set_downloader(downloader)
d77c3dfd 76
89de9eb1
FV
77 @classmethod
78 def suitable(cls, url):
59ae15a5 79 """Receives a URL and returns True if suitable for this IE."""
89de9eb1 80 return re.match(cls._VALID_URL, url) is not None
d77c3dfd 81
89de9eb1
FV
82 @classmethod
83 def working(cls):
59ae15a5 84 """Getter method for _WORKING."""
89de9eb1 85 return cls._WORKING
03c5b0fb 86
59ae15a5
PH
87 def initialize(self):
88 """Initializes an instance (authentication, etc)."""
89 if not self._ready:
90 self._real_initialize()
91 self._ready = True
d77c3dfd 92
59ae15a5
PH
93 def extract(self, url):
94 """Extracts URL information and returns it in list of dicts."""
95 self.initialize()
96 return self._real_extract(url)
d77c3dfd 97
59ae15a5
PH
98 def set_downloader(self, downloader):
99 """Sets the downloader for this IE."""
100 self._downloader = downloader
d77c3dfd 101
59ae15a5
PH
102 def _real_initialize(self):
103 """Real initialization process. Redefine in subclasses."""
104 pass
d77c3dfd 105
59ae15a5
PH
106 def _real_extract(self, url):
107 """Real extraction process. Redefine in subclasses."""
108 pass
d77c3dfd 109
d0d4f277
PH
110 @property
111 def IE_NAME(self):
112 return type(self).__name__[:-2]
d77c3dfd 113
64ce2aad
PH
114 def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
115 """ Returns the response handle """
d830b7c2
PH
116 if note is None:
117 note = u'Downloading video webpage'
118 self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
119 try:
64ce2aad 120 return compat_urllib_request.urlopen(url_or_request)
d830b7c2
PH
121 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
122 if errnote is None:
123 errnote = u'Unable to download webpage'
01951dda 124 raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
d830b7c2 125
64ce2aad
PH
126 def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
127 """ Returns the data of the page as a string """
128 urlh = self._request_webpage(url_or_request, video_id, note, errnote)
e32b06e9
PH
129 content_type = urlh.headers.get('Content-Type', '')
130 m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
131 if m:
132 encoding = m.group(1)
133 else:
134 encoding = 'utf-8'
64ce2aad 135 webpage_bytes = urlh.read()
e32b06e9 136 return webpage_bytes.decode(encoding, 'replace')
64ce2aad 137
d830b7c2 138
d77c3dfd 139class YoutubeIE(InfoExtractor):
59ae15a5
PH
140 """Information extractor for youtube.com."""
141
142 _VALID_URL = r"""^
143 (
144 (?:https?://)? # http(s):// (optional)
145 (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
146 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
147 (?:.*?\#/)? # handle anchor (#/) redirect urls
59ae15a5
PH
148 (?: # the various things that can precede the ID:
149 (?:(?:v|embed|e)/) # v/ or embed/ or e/
150 |(?: # or the v= param in all its forms
151 (?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
152 (?:\?|\#!?) # the params delimiter ? or # or #!
3bb61659 153 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
59ae15a5
PH
154 v=
155 )
156 )? # optional -> youtube.com/xxxx is OK
157 )? # all until now is optional -> you can pass the naked ID
158 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
159 (?(1).+)? # if we found the ID, everything can follow
160 $"""
161 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
d3f5f9f6 162 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
59ae15a5
PH
163 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
164 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
165 _NETRC_MACHINE = 'youtube'
166 # Listed in order of quality
167 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
168 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
169 _video_extensions = {
170 '13': '3gp',
171 '17': 'mp4',
172 '18': 'mp4',
173 '22': 'mp4',
174 '37': 'mp4',
175 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
176 '43': 'webm',
177 '44': 'webm',
178 '45': 'webm',
179 '46': 'webm',
180 }
181 _video_dimensions = {
182 '5': '240x400',
183 '6': '???',
184 '13': '???',
185 '17': '144x176',
186 '18': '360x640',
187 '22': '720x1280',
188 '34': '360x640',
189 '35': '480x854',
190 '37': '1080x1920',
191 '38': '3072x4096',
192 '43': '360x640',
193 '44': '480x854',
194 '45': '720x1280',
195 '46': '1080x1920',
cdb30764 196 }
59ae15a5
PH
197 IE_NAME = u'youtube'
198
89de9eb1
FV
199 @classmethod
200 def suitable(cls, url):
59ae15a5 201 """Receives a URL and returns True if suitable for this IE."""
89de9eb1
FV
202 if YoutubePlaylistIE.suitable(url): return False
203 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
59ae15a5
PH
204
205 def report_lang(self):
206 """Report attempt to set language."""
207 self._downloader.to_screen(u'[youtube] Setting language')
208
209 def report_login(self):
210 """Report attempt to log in."""
211 self._downloader.to_screen(u'[youtube] Logging in')
212
213 def report_age_confirmation(self):
214 """Report attempt to confirm age."""
215 self._downloader.to_screen(u'[youtube] Confirming age')
216
217 def report_video_webpage_download(self, video_id):
218 """Report attempt to download video webpage."""
219 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
220
221 def report_video_info_webpage_download(self, video_id):
222 """Report attempt to download video info webpage."""
223 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
224
225 def report_video_subtitles_download(self, video_id):
226 """Report attempt to download video info webpage."""
2a4093ea 227 self._downloader.to_screen(u'[youtube] %s: Checking available subtitles' % video_id)
59ae15a5 228
2a4093ea 229 def report_video_subtitles_request(self, video_id, sub_lang, format):
ae608b80 230 """Report attempt to download video info webpage."""
2a4093ea
IM
231 self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
232
233 def report_video_subtitles_available(self, video_id, sub_lang_list):
234 """Report available subtitles."""
235 sub_lang = ",".join(list(sub_lang_list.keys()))
236 self._downloader.to_screen(u'[youtube] %s: Available subtitles for video: %s' % (video_id, sub_lang))
59ae15a5
PH
237
238 def report_information_extraction(self, video_id):
239 """Report attempt to extract video information."""
240 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
241
242 def report_unavailable_format(self, video_id, format):
243 """Report extracted video URL."""
244 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
245
246 def report_rtmp_download(self):
247 """Indicate the download will use the RTMP protocol."""
248 self._downloader.to_screen(u'[youtube] RTMP download detected')
249
ae608b80 250 def _get_available_subtitles(self, video_id):
056d8575
FV
251 self.report_video_subtitles_download(video_id)
252 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
253 try:
553d0974 254 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
056d8575
FV
255 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
256 return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
553d0974
IM
257 sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
258 sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
259 if not sub_lang_list:
c0ba1046 260 return (u'WARNING: video doesn\'t have subtitles', None)
553d0974 261 return sub_lang_list
ae608b80 262
2a4093ea
IM
263 def _list_available_subtitles(self, video_id):
264 sub_lang_list = self._get_available_subtitles(video_id)
265 self.report_video_subtitles_available(video_id, sub_lang_list)
266
9e62bc44 267 def _request_subtitle(self, sub_lang, sub_name, video_id, format):
2a4093ea 268 self.report_video_subtitles_request(video_id, sub_lang, format)
fb778e66 269 params = compat_urllib_parse.urlencode({
553d0974
IM
270 'lang': sub_lang,
271 'name': sub_name,
fb778e66 272 'v': video_id,
ae608b80 273 'fmt': format,
fb778e66
PH
274 })
275 url = 'http://www.youtube.com/api/timedtext?' + params
056d8575 276 try:
553d0974 277 sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
056d8575
FV
278 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
279 return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
553d0974 280 if not sub:
fb778e66 281 return (u'WARNING: Did not fetch video subtitles', None)
553d0974 282 return (None, sub_lang, sub)
ae608b80
IM
283
284 def _extract_subtitle(self, video_id):
553d0974 285 sub_lang_list = self._get_available_subtitles(video_id)
9e62bc44 286 sub_format = self._downloader.params.get('subtitlesformat')
ae608b80 287 if self._downloader.params.get('subtitleslang', False):
553d0974
IM
288 sub_lang = self._downloader.params.get('subtitleslang')
289 elif 'en' in sub_lang_list:
290 sub_lang = 'en'
ae608b80 291 else:
553d0974
IM
292 sub_lang = list(sub_lang_list.keys())[0]
293 if not sub_lang in sub_lang_list:
294 return (u'WARNING: no closed captions found in the specified language "%s"' % sub_lang, None)
ae608b80 295
9e62bc44 296 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
553d0974 297 return [subtitle]
ae608b80
IM
298
299 def _extract_all_subtitles(self, video_id):
553d0974 300 sub_lang_list = self._get_available_subtitles(video_id)
9e62bc44 301 sub_format = self._downloader.params.get('subtitlesformat')
553d0974
IM
302 subtitles = []
303 for sub_lang in sub_lang_list:
9e62bc44 304 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
553d0974
IM
305 subtitles.append(subtitle)
306 return subtitles
056d8575 307
59ae15a5
PH
308 def _print_formats(self, formats):
309 print('Available formats:')
310 for x in formats:
311 print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
312
313 def _real_initialize(self):
314 if self._downloader is None:
315 return
316
317 username = None
318 password = None
319 downloader_params = self._downloader.params
320
321 # Attempt to use provided username and password or .netrc data
322 if downloader_params.get('username', None) is not None:
323 username = downloader_params['username']
324 password = downloader_params['password']
325 elif downloader_params.get('usenetrc', False):
326 try:
327 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
328 if info is not None:
329 username = info[0]
330 password = info[2]
331 else:
332 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
333 except (IOError, netrc.NetrcParseError) as err:
2e5457be 334 self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
59ae15a5
PH
335 return
336
337 # Set language
338 request = compat_urllib_request.Request(self._LANG_URL)
339 try:
340 self.report_lang()
341 compat_urllib_request.urlopen(request).read()
342 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2e5457be 343 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
59ae15a5
PH
344 return
345
346 # No authentication to be performed
347 if username is None:
348 return
349
d3f5f9f6
PH
350 request = compat_urllib_request.Request(self._LOGIN_URL)
351 try:
352 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
353 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2e5457be 354 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
d3f5f9f6
PH
355 return
356
357 galx = None
358 dsh = None
359 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
360 if match:
361 galx = match.group(1)
362
363 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
364 if match:
365 dsh = match.group(1)
366
59ae15a5 367 # Log in
d3f5f9f6
PH
368 login_form_strs = {
369 u'continue': u'http://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
370 u'Email': username,
371 u'GALX': galx,
372 u'Passwd': password,
373 u'PersistentCookie': u'yes',
374 u'_utf8': u'霱',
375 u'bgresponse': u'js_disabled',
376 u'checkConnection': u'',
377 u'checkedDomains': u'youtube',
378 u'dnConn': u'',
379 u'dsh': dsh,
380 u'pstMsg': u'0',
381 u'rmShown': u'1',
382 u'secTok': u'',
383 u'signIn': u'Sign in',
384 u'timeStmp': u'',
385 u'service': u'youtube',
386 u'uilel': u'3',
387 u'hl': u'en_US',
388 }
389 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
390 # chokes on unicode
391 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
392 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
393 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
59ae15a5
PH
394 try:
395 self.report_login()
80d3177e 396 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
d3f5f9f6 397 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
2e5457be 398 self._downloader.report_warning(u'unable to log in: bad username or password')
59ae15a5
PH
399 return
400 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2e5457be 401 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
59ae15a5
PH
402 return
403
404 # Confirm age
405 age_form = {
406 'next_url': '/',
407 'action_confirm': 'Confirm',
408 }
409 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
410 try:
411 self.report_age_confirmation()
80d3177e 412 age_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
59ae15a5 413 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 414 self._downloader.report_error(u'unable to confirm age: %s' % compat_str(err))
59ae15a5
PH
415 return
416
3bb61659 417 def _extract_id(self, url):
59ae15a5
PH
418 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
419 if mobj is None:
e5f30ade 420 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
421 return
422 video_id = mobj.group(2)
3bb61659
PH
423 return video_id
424
425 def _real_extract(self, url):
426 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
427 mobj = re.search(self._NEXT_URL_RE, url)
428 if mobj:
429 url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
430 video_id = self._extract_id(url)
59ae15a5
PH
431
432 # Get video webpage
433 self.report_video_webpage_download(video_id)
3bb61659
PH
434 url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
435 request = compat_urllib_request.Request(url)
59ae15a5
PH
436 try:
437 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
438 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 439 self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err))
59ae15a5
PH
440 return
441
442 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
443
444 # Attempt to extract SWF player URL
445 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
446 if mobj is not None:
447 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
448 else:
449 player_url = None
450
451 # Get video info
452 self.report_video_info_webpage_download(video_id)
453 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
454 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
455 % (video_id, el_type))
456 request = compat_urllib_request.Request(video_info_url)
457 try:
458 video_info_webpage_bytes = compat_urllib_request.urlopen(request).read()
459 video_info_webpage = video_info_webpage_bytes.decode('utf-8', 'ignore')
460 video_info = compat_parse_qs(video_info_webpage)
461 if 'token' in video_info:
462 break
463 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 464 self._downloader.report_error(u'unable to download video info webpage: %s' % compat_str(err))
59ae15a5
PH
465 return
466 if 'token' not in video_info:
467 if 'reason' in video_info:
e5f30ade 468 self._downloader.report_error(u'YouTube said: %s' % video_info['reason'][0])
59ae15a5 469 else:
e5f30ade 470 self._downloader.report_error(u'"token" parameter not in video info for unknown reason')
59ae15a5
PH
471 return
472
473 # Check for "rental" videos
474 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
e5f30ade 475 self._downloader.report_error(u'"rental" videos not supported')
59ae15a5
PH
476 return
477
478 # Start extracting information
479 self.report_information_extraction(video_id)
480
481 # uploader
482 if 'author' not in video_info:
e5f30ade 483 self._downloader.report_error(u'unable to extract uploader name')
59ae15a5
PH
484 return
485 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
486
77c4beab
FV
487 # uploader_id
488 video_uploader_id = None
26cf0408 489 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
77c4beab
FV
490 if mobj is not None:
491 video_uploader_id = mobj.group(1)
492 else:
c9fa1cba 493 self._downloader.report_warning(u'unable to extract uploader nickname')
77c4beab 494
59ae15a5
PH
495 # title
496 if 'title' not in video_info:
e5f30ade 497 self._downloader.report_error(u'unable to extract video title')
59ae15a5
PH
498 return
499 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
500
501 # thumbnail image
502 if 'thumbnail_url' not in video_info:
c9fa1cba 503 self._downloader.report_warning(u'unable to extract video thumbnail')
59ae15a5
PH
504 video_thumbnail = ''
505 else: # don't panic if we can't find it
506 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
507
508 # upload date
509 upload_date = None
510 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
511 if mobj is not None:
512 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
513 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
514 for expression in format_expressions:
515 try:
516 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
517 except:
518 pass
519
520 # description
521 video_description = get_element_by_id("eow-description", video_webpage)
522 if video_description:
523 video_description = clean_html(video_description)
524 else:
525 video_description = ''
526
9e62bc44 527 # subtitles
59ae15a5 528 video_subtitles = None
ae608b80 529
59ae15a5 530 if self._downloader.params.get('writesubtitles', False):
ae608b80
IM
531 video_subtitles = self._extract_subtitle(video_id)
532 if video_subtitles:
553d0974
IM
533 (sub_error, sub_lang, sub) = video_subtitles[0]
534 if sub_error:
535 self._downloader.trouble(sub_error)
ae608b80
IM
536
537 if self._downloader.params.get('allsubtitles', False):
538 video_subtitles = self._extract_all_subtitles(video_id)
539 for video_subtitle in video_subtitles:
553d0974
IM
540 (sub_error, sub_lang, sub) = video_subtitle
541 if sub_error:
542 self._downloader.trouble(sub_error)
59ae15a5 543
2a4093ea
IM
544 if self._downloader.params.get('listsubtitles', False):
545 sub_lang_list = self._list_available_subtitles(video_id)
546 return
59ae15a5
PH
547
548 if 'length_seconds' not in video_info:
c9fa1cba 549 self._downloader.report_warning(u'unable to extract video duration')
59ae15a5
PH
550 video_duration = ''
551 else:
552 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
553
554 # token
555 video_token = compat_urllib_parse.unquote_plus(video_info['token'][0])
556
557 # Decide which formats to download
558 req_format = self._downloader.params.get('format', None)
559
560 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
561 self.report_rtmp_download()
562 video_url_list = [(None, video_info['conn'][0])]
563 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
564 url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
565 url_data = [compat_parse_qs(uds) for uds in url_data_strs]
1a2c3c0f 566 url_data = [ud for ud in url_data if 'itag' in ud and 'url' in ud]
59ae15a5
PH
567 url_map = dict((ud['itag'][0], ud['url'][0] + '&signature=' + ud['sig'][0]) for ud in url_data)
568
569 format_limit = self._downloader.params.get('format_limit', None)
570 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
571 if format_limit is not None and format_limit in available_formats:
572 format_list = available_formats[available_formats.index(format_limit):]
573 else:
574 format_list = available_formats
575 existing_formats = [x for x in format_list if x in url_map]
576 if len(existing_formats) == 0:
e5f30ade 577 self._downloader.report_error(u'no known formats available for video')
59ae15a5
PH
578 return
579 if self._downloader.params.get('listformats', None):
580 self._print_formats(existing_formats)
581 return
582 if req_format is None or req_format == 'best':
583 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
584 elif req_format == 'worst':
585 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
586 elif req_format in ('-1', 'all'):
587 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
588 else:
589 # Specific formats. We pick the first in a slash-delimeted sequence.
590 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
591 req_formats = req_format.split('/')
592 video_url_list = None
593 for rf in req_formats:
594 if rf in url_map:
595 video_url_list = [(rf, url_map[rf])]
596 break
597 if video_url_list is None:
e5f30ade 598 self._downloader.report_error(u'requested format not available')
59ae15a5
PH
599 return
600 else:
e5f30ade 601 self._downloader.report_error(u'no conn or url_encoded_fmt_stream_map information found in video info')
59ae15a5
PH
602 return
603
604 results = []
605 for format_param, video_real_url in video_url_list:
606 # Extension
607 video_extension = self._video_extensions.get(format_param, 'flv')
608
32761d86
FV
609 video_format = '{0} - {1}'.format(format_param if format_param else video_extension,
610 self._video_dimensions.get(format_param, '???'))
59ae15a5
PH
611
612 results.append({
613 'id': video_id,
614 'url': video_real_url,
615 'uploader': video_uploader,
77c4beab 616 'uploader_id': video_uploader_id,
59ae15a5
PH
617 'upload_date': upload_date,
618 'title': video_title,
619 'ext': video_extension,
620 'format': video_format,
621 'thumbnail': video_thumbnail,
622 'description': video_description,
623 'player_url': player_url,
624 'subtitles': video_subtitles,
625 'duration': video_duration
626 })
627 return results
d77c3dfd
FV
628
629
630class MetacafeIE(InfoExtractor):
59ae15a5
PH
631 """Information Extractor for metacafe.com."""
632
633 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
634 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
635 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
636 IE_NAME = u'metacafe'
637
638 def __init__(self, downloader=None):
639 InfoExtractor.__init__(self, downloader)
640
641 def report_disclaimer(self):
642 """Report disclaimer retrieval."""
643 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
644
645 def report_age_confirmation(self):
646 """Report attempt to confirm age."""
647 self._downloader.to_screen(u'[metacafe] Confirming age')
648
649 def report_download_webpage(self, video_id):
650 """Report webpage download."""
651 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
652
653 def report_extraction(self, video_id):
654 """Report information extraction."""
655 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
656
657 def _real_initialize(self):
658 # Retrieve disclaimer
659 request = compat_urllib_request.Request(self._DISCLAIMER)
660 try:
661 self.report_disclaimer()
662 disclaimer = compat_urllib_request.urlopen(request).read()
663 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 664 self._downloader.report_error(u'unable to retrieve disclaimer: %s' % compat_str(err))
59ae15a5
PH
665 return
666
667 # Confirm age
668 disclaimer_form = {
669 'filters': '0',
670 'submit': "Continue - I'm over 18",
671 }
672 request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
673 try:
674 self.report_age_confirmation()
675 disclaimer = compat_urllib_request.urlopen(request).read()
676 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 677 self._downloader.report_error(u'unable to confirm age: %s' % compat_str(err))
59ae15a5
PH
678 return
679
680 def _real_extract(self, url):
681 # Extract id and simplified title from URL
682 mobj = re.match(self._VALID_URL, url)
683 if mobj is None:
e5f30ade 684 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
685 return
686
687 video_id = mobj.group(1)
688
689 # Check if video comes from YouTube
690 mobj2 = re.match(r'^yt-(.*)$', video_id)
691 if mobj2 is not None:
692 self._downloader.download(['http://www.youtube.com/watch?v=%s' % mobj2.group(1)])
693 return
694
695 # Retrieve video webpage to extract further information
696 request = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
697 try:
698 self.report_download_webpage(video_id)
699 webpage = compat_urllib_request.urlopen(request).read()
700 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 701 self._downloader.report_error(u'unable retrieve video webpage: %s' % compat_str(err))
59ae15a5
PH
702 return
703
704 # Extract URL, uploader and title from webpage
705 self.report_extraction(video_id)
706 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
707 if mobj is not None:
708 mediaURL = compat_urllib_parse.unquote(mobj.group(1))
709 video_extension = mediaURL[-3:]
710
711 # Extract gdaKey if available
712 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
713 if mobj is None:
714 video_url = mediaURL
715 else:
716 gdaKey = mobj.group(1)
717 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
718 else:
719 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
720 if mobj is None:
e5f30ade 721 self._downloader.report_error(u'unable to extract media URL')
59ae15a5
PH
722 return
723 vardict = compat_parse_qs(mobj.group(1))
724 if 'mediaData' not in vardict:
e5f30ade 725 self._downloader.report_error(u'unable to extract media URL')
59ae15a5
PH
726 return
727 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
728 if mobj is None:
e5f30ade 729 self._downloader.report_error(u'unable to extract media URL')
59ae15a5
PH
730 return
731 mediaURL = mobj.group(1).replace('\\/', '/')
732 video_extension = mediaURL[-3:]
733 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
734
735 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
736 if mobj is None:
e5f30ade 737 self._downloader.report_error(u'unable to extract title')
59ae15a5
PH
738 return
739 video_title = mobj.group(1).decode('utf-8')
740
741 mobj = re.search(r'submitter=(.*?);', webpage)
742 if mobj is None:
e5f30ade 743 self._downloader.report_error(u'unable to extract uploader nickname')
59ae15a5
PH
744 return
745 video_uploader = mobj.group(1)
746
747 return [{
748 'id': video_id.decode('utf-8'),
749 'url': video_url.decode('utf-8'),
750 'uploader': video_uploader.decode('utf-8'),
751 'upload_date': None,
752 'title': video_title,
753 'ext': video_extension.decode('utf-8'),
754 }]
d77c3dfd
FV
755
756
757class DailymotionIE(InfoExtractor):
59ae15a5
PH
758 """Information Extractor for Dailymotion"""
759
760 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
761 IE_NAME = u'dailymotion'
b17c974a 762 _WORKING = False
59ae15a5
PH
763
764 def __init__(self, downloader=None):
765 InfoExtractor.__init__(self, downloader)
766
59ae15a5
PH
767 def report_extraction(self, video_id):
768 """Report information extraction."""
769 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
770
771 def _real_extract(self, url):
772 # Extract id and simplified title from URL
773 mobj = re.match(self._VALID_URL, url)
774 if mobj is None:
e5f30ade 775 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
776 return
777
778 video_id = mobj.group(1).split('_')[0].split('?')[0]
779
780 video_extension = 'mp4'
781
782 # Retrieve video webpage to extract further information
783 request = compat_urllib_request.Request(url)
784 request.add_header('Cookie', 'family_filter=off')
8e241d1a 785 webpage = self._download_webpage(request, video_id)
59ae15a5
PH
786
787 # Extract URL, uploader and title from webpage
788 self.report_extraction(video_id)
789 mobj = re.search(r'\s*var flashvars = (.*)', webpage)
790 if mobj is None:
e5f30ade 791 self._downloader.report_error(u'unable to extract media URL')
59ae15a5
PH
792 return
793 flashvars = compat_urllib_parse.unquote(mobj.group(1))
794
795 for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
796 if key in flashvars:
797 max_quality = key
798 self._downloader.to_screen(u'[dailymotion] Using %s' % key)
799 break
800 else:
e5f30ade 801 self._downloader.report_error(u'unable to extract video URL')
59ae15a5
PH
802 return
803
804 mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars)
805 if mobj is None:
e5f30ade 806 self._downloader.report_error(u'unable to extract video URL')
59ae15a5
PH
807 return
808
809 video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/')
810
811 # TODO: support choosing qualities
812
813 mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
814 if mobj is None:
e5f30ade 815 self._downloader.report_error(u'unable to extract title')
59ae15a5 816 return
28ca6b5a 817 video_title = unescapeHTML(mobj.group('title'))
59ae15a5
PH
818
819 video_uploader = None
820 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
821 if mobj is None:
822 # lookin for official user
823 mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage)
824 if mobj_official is None:
c9fa1cba 825 self._downloader.report_warning(u'unable to extract uploader nickname')
59ae15a5
PH
826 else:
827 video_uploader = mobj_official.group(1)
828 else:
829 video_uploader = mobj.group(1)
830
831 video_upload_date = None
832 mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
833 if mobj is not None:
834 video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
835
836 return [{
28ca6b5a
PH
837 'id': video_id,
838 'url': video_url,
839 'uploader': video_uploader,
59ae15a5
PH
840 'upload_date': video_upload_date,
841 'title': video_title,
28ca6b5a 842 'ext': video_extension,
59ae15a5 843 }]
d77c3dfd
FV
844
845
d77c3dfd 846class PhotobucketIE(InfoExtractor):
59ae15a5
PH
847 """Information extractor for photobucket.com."""
848
849 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
850 IE_NAME = u'photobucket'
851
852 def __init__(self, downloader=None):
853 InfoExtractor.__init__(self, downloader)
854
855 def report_download_webpage(self, video_id):
856 """Report webpage download."""
857 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
858
859 def report_extraction(self, video_id):
860 """Report information extraction."""
861 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
862
863 def _real_extract(self, url):
864 # Extract id from URL
865 mobj = re.match(self._VALID_URL, url)
866 if mobj is None:
e5f30ade 867 self._downloader.report_error(u'Invalid URL: %s' % url)
59ae15a5
PH
868 return
869
870 video_id = mobj.group(1)
871
872 video_extension = 'flv'
873
874 # Retrieve video webpage to extract further information
875 request = compat_urllib_request.Request(url)
876 try:
877 self.report_download_webpage(video_id)
878 webpage = compat_urllib_request.urlopen(request).read()
879 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 880 self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err))
59ae15a5
PH
881 return
882
883 # Extract URL, uploader, and title from webpage
884 self.report_extraction(video_id)
885 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
886 if mobj is None:
e5f30ade 887 self._downloader.report_error(u'unable to extract media URL')
59ae15a5
PH
888 return
889 mediaURL = compat_urllib_parse.unquote(mobj.group(1))
890
891 video_url = mediaURL
892
893 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
894 if mobj is None:
e5f30ade 895 self._downloader.report_error(u'unable to extract title')
59ae15a5
PH
896 return
897 video_title = mobj.group(1).decode('utf-8')
898
899 video_uploader = mobj.group(2).decode('utf-8')
900
901 return [{
902 'id': video_id.decode('utf-8'),
903 'url': video_url.decode('utf-8'),
904 'uploader': video_uploader,
905 'upload_date': None,
906 'title': video_title,
907 'ext': video_extension.decode('utf-8'),
908 }]
d77c3dfd
FV
909
910
911class YahooIE(InfoExtractor):
59ae15a5
PH
912 """Information extractor for video.yahoo.com."""
913
93702113 914 _WORKING = False
59ae15a5
PH
915 # _VALID_URL matches all Yahoo! Video URLs
916 # _VPAGE_URL matches only the extractable '/watch/' URLs
917 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
918 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
919 IE_NAME = u'video.yahoo'
920
921 def __init__(self, downloader=None):
922 InfoExtractor.__init__(self, downloader)
923
924 def report_download_webpage(self, video_id):
925 """Report webpage download."""
926 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
927
928 def report_extraction(self, video_id):
929 """Report information extraction."""
930 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
931
932 def _real_extract(self, url, new_video=True):
933 # Extract ID from URL
934 mobj = re.match(self._VALID_URL, url)
935 if mobj is None:
e5f30ade 936 self._downloader.report_error(u'Invalid URL: %s' % url)
59ae15a5
PH
937 return
938
939 video_id = mobj.group(2)
940 video_extension = 'flv'
941
942 # Rewrite valid but non-extractable URLs as
943 # extractable English language /watch/ URLs
944 if re.match(self._VPAGE_URL, url) is None:
945 request = compat_urllib_request.Request(url)
946 try:
947 webpage = compat_urllib_request.urlopen(request).read()
948 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 949 self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err))
59ae15a5
PH
950 return
951
952 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
953 if mobj is None:
e5f30ade 954 self._downloader.report_error(u'Unable to extract id field')
59ae15a5
PH
955 return
956 yahoo_id = mobj.group(1)
957
958 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
959 if mobj is None:
e5f30ade 960 self._downloader.report_error(u'Unable to extract vid field')
59ae15a5
PH
961 return
962 yahoo_vid = mobj.group(1)
963
964 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
965 return self._real_extract(url, new_video=False)
966
967 # Retrieve video webpage to extract further information
968 request = compat_urllib_request.Request(url)
969 try:
970 self.report_download_webpage(video_id)
971 webpage = compat_urllib_request.urlopen(request).read()
972 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 973 self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err))
59ae15a5
PH
974 return
975
976 # Extract uploader and title from webpage
977 self.report_extraction(video_id)
978 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
979 if mobj is None:
e5f30ade 980 self._downloader.report_error(u'unable to extract video title')
59ae15a5
PH
981 return
982 video_title = mobj.group(1).decode('utf-8')
983
984 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
985 if mobj is None:
e5f30ade 986 self._downloader.report_error(u'unable to extract video uploader')
59ae15a5
PH
987 return
988 video_uploader = mobj.group(1).decode('utf-8')
989
990 # Extract video thumbnail
991 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
992 if mobj is None:
e5f30ade 993 self._downloader.report_error(u'unable to extract video thumbnail')
59ae15a5
PH
994 return
995 video_thumbnail = mobj.group(1).decode('utf-8')
996
997 # Extract video description
998 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
999 if mobj is None:
e5f30ade 1000 self._downloader.report_error(u'unable to extract video description')
59ae15a5
PH
1001 return
1002 video_description = mobj.group(1).decode('utf-8')
1003 if not video_description:
1004 video_description = 'No description available.'
1005
1006 # Extract video height and width
1007 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1008 if mobj is None:
e5f30ade 1009 self._downloader.report_error(u'unable to extract video height')
59ae15a5
PH
1010 return
1011 yv_video_height = mobj.group(1)
1012
1013 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1014 if mobj is None:
e5f30ade 1015 self._downloader.report_error(u'unable to extract video width')
59ae15a5
PH
1016 return
1017 yv_video_width = mobj.group(1)
1018
1019 # Retrieve video playlist to extract media URL
1020 # I'm not completely sure what all these options are, but we
1021 # seem to need most of them, otherwise the server sends a 401.
1022 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1023 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1024 request = compat_urllib_request.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1025 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1026 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1027 try:
1028 self.report_download_webpage(video_id)
1029 webpage = compat_urllib_request.urlopen(request).read()
1030 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1031 self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err))
59ae15a5
PH
1032 return
1033
1034 # Extract media URL from playlist XML
1035 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1036 if mobj is None:
e5f30ade 1037 self._downloader.report_error(u'Unable to extract media URL')
59ae15a5
PH
1038 return
1039 video_url = compat_urllib_parse.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1040 video_url = unescapeHTML(video_url)
1041
1042 return [{
1043 'id': video_id.decode('utf-8'),
1044 'url': video_url,
1045 'uploader': video_uploader,
1046 'upload_date': None,
1047 'title': video_title,
1048 'ext': video_extension.decode('utf-8'),
1049 'thumbnail': video_thumbnail.decode('utf-8'),
1050 'description': video_description,
1051 }]
d77c3dfd
FV
1052
1053
1054class VimeoIE(InfoExtractor):
59ae15a5
PH
1055 """Information extractor for vimeo.com."""
1056
1057 # _VALID_URL matches Vimeo URLs
8edc2cf8 1058 _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
59ae15a5
PH
1059 IE_NAME = u'vimeo'
1060
1061 def __init__(self, downloader=None):
1062 InfoExtractor.__init__(self, downloader)
1063
1064 def report_download_webpage(self, video_id):
1065 """Report webpage download."""
1066 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
1067
1068 def report_extraction(self, video_id):
1069 """Report information extraction."""
1070 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
1071
1072 def _real_extract(self, url, new_video=True):
1073 # Extract ID from URL
1074 mobj = re.match(self._VALID_URL, url)
1075 if mobj is None:
e5f30ade 1076 self._downloader.report_error(u'Invalid URL: %s' % url)
59ae15a5
PH
1077 return
1078
8edc2cf8
PH
1079 video_id = mobj.group('id')
1080 if not mobj.group('proto'):
1081 url = 'https://' + url
1082 if mobj.group('direct_link'):
1083 url = 'https://vimeo.com/' + video_id
59ae15a5
PH
1084
1085 # Retrieve video webpage to extract further information
1086 request = compat_urllib_request.Request(url, None, std_headers)
1087 try:
1088 self.report_download_webpage(video_id)
f1171f7c
PH
1089 webpage_bytes = compat_urllib_request.urlopen(request).read()
1090 webpage = webpage_bytes.decode('utf-8')
59ae15a5 1091 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1092 self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err))
59ae15a5
PH
1093 return
1094
1095 # Now we begin extracting as much information as we can from what we
1096 # retrieved. First we extract the information common to all extractors,
1097 # and latter we extract those that are Vimeo specific.
1098 self.report_extraction(video_id)
1099
1100 # Extract the config JSON
59ae15a5 1101 try:
1ca63e3a 1102 config = webpage.split(' = {config:')[1].split(',assets:')[0]
59ae15a5
PH
1103 config = json.loads(config)
1104 except:
e5f30ade 1105 self._downloader.report_error(u'unable to extract info section')
59ae15a5 1106 return
cdb30764 1107
59ae15a5
PH
1108 # Extract title
1109 video_title = config["video"]["title"]
1110
77c4beab 1111 # Extract uploader and uploader_id
59ae15a5 1112 video_uploader = config["video"]["owner"]["name"]
77c4beab 1113 video_uploader_id = config["video"]["owner"]["url"].split('/')[-1]
59ae15a5
PH
1114
1115 # Extract video thumbnail
1116 video_thumbnail = config["video"]["thumbnail"]
1117
1118 # Extract video description
0dcfb234 1119 video_description = get_element_by_attribute("itemprop", "description", webpage)
59ae15a5
PH
1120 if video_description: video_description = clean_html(video_description)
1121 else: video_description = ''
1122
1123 # Extract upload date
1124 video_upload_date = None
6b3aef80 1125 mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
59ae15a5 1126 if mobj is not None:
6b3aef80 1127 video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
59ae15a5
PH
1128
1129 # Vimeo specific: extract request signature and timestamp
1130 sig = config['request']['signature']
1131 timestamp = config['request']['timestamp']
1132
1133 # Vimeo specific: extract video codec and quality information
1134 # First consider quality, then codecs, then take everything
1135 # TODO bind to format param
1136 codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
1137 files = { 'hd': [], 'sd': [], 'other': []}
1138 for codec_name, codec_extension in codecs:
1139 if codec_name in config["video"]["files"]:
1140 if 'hd' in config["video"]["files"][codec_name]:
1141 files['hd'].append((codec_name, codec_extension, 'hd'))
1142 elif 'sd' in config["video"]["files"][codec_name]:
1143 files['sd'].append((codec_name, codec_extension, 'sd'))
1144 else:
1145 files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
1146
1147 for quality in ('hd', 'sd', 'other'):
1148 if len(files[quality]) > 0:
1149 video_quality = files[quality][0][2]
1150 video_codec = files[quality][0][0]
1151 video_extension = files[quality][0][1]
1152 self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
1153 break
1154 else:
e5f30ade 1155 self._downloader.report_error(u'no known codec found')
59ae15a5
PH
1156 return
1157
1158 video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
1159 %(video_id, sig, timestamp, video_quality, video_codec.upper())
1160
1161 return [{
1162 'id': video_id,
1163 'url': video_url,
1164 'uploader': video_uploader,
77c4beab 1165 'uploader_id': video_uploader_id,
59ae15a5
PH
1166 'upload_date': video_upload_date,
1167 'title': video_title,
1168 'ext': video_extension,
1169 'thumbnail': video_thumbnail,
1170 'description': video_description,
1171 }]
d77c3dfd
FV
1172
1173
f2ad10a9 1174class ArteTvIE(InfoExtractor):
59ae15a5
PH
1175 """arte.tv information extractor."""
1176
1177 _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
1178 _LIVE_URL = r'index-[0-9]+\.html$'
1179
1180 IE_NAME = u'arte.tv'
1181
1182 def __init__(self, downloader=None):
1183 InfoExtractor.__init__(self, downloader)
1184
1185 def report_download_webpage(self, video_id):
1186 """Report webpage download."""
1187 self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
1188
1189 def report_extraction(self, video_id):
1190 """Report information extraction."""
1191 self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
1192
1193 def fetch_webpage(self, url):
59ae15a5
PH
1194 request = compat_urllib_request.Request(url)
1195 try:
1196 self.report_download_webpage(url)
1197 webpage = compat_urllib_request.urlopen(request).read()
1198 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1199 self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err))
59ae15a5
PH
1200 return
1201 except ValueError as err:
e5f30ade 1202 self._downloader.report_error(u'Invalid URL: %s' % url)
59ae15a5
PH
1203 return
1204 return webpage
1205
1206 def grep_webpage(self, url, regex, regexFlags, matchTuples):
1207 page = self.fetch_webpage(url)
1208 mobj = re.search(regex, page, regexFlags)
1209 info = {}
1210
1211 if mobj is None:
e5f30ade 1212 self._downloader.report_error(u'Invalid URL: %s' % url)
59ae15a5
PH
1213 return
1214
1215 for (i, key, err) in matchTuples:
1216 if mobj.group(i) is None:
1217 self._downloader.trouble(err)
1218 return
1219 else:
1220 info[key] = mobj.group(i)
1221
1222 return info
1223
1224 def extractLiveStream(self, url):
1225 video_lang = url.split('/')[-4]
1226 info = self.grep_webpage(
1227 url,
1228 r'src="(.*?/videothek_js.*?\.js)',
1229 0,
1230 [
1231 (1, 'url', u'ERROR: Invalid URL: %s' % url)
1232 ]
1233 )
1234 http_host = url.split('/')[2]
1235 next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
1236 info = self.grep_webpage(
1237 next_url,
1238 r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
1239 '(http://.*?\.swf).*?' +
1240 '(rtmp://.*?)\'',
1241 re.DOTALL,
1242 [
1243 (1, 'path', u'ERROR: could not extract video path: %s' % url),
1244 (2, 'player', u'ERROR: could not extract video player: %s' % url),
1245 (3, 'url', u'ERROR: could not extract video url: %s' % url)
1246 ]
1247 )
1248 video_url = u'%s/%s' % (info.get('url'), info.get('path'))
1249
1250 def extractPlus7Stream(self, url):
1251 video_lang = url.split('/')[-3]
1252 info = self.grep_webpage(
1253 url,
1254 r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
1255 0,
1256 [
1257 (1, 'url', u'ERROR: Invalid URL: %s' % url)
1258 ]
1259 )
1260 next_url = compat_urllib_parse.unquote(info.get('url'))
1261 info = self.grep_webpage(
1262 next_url,
1263 r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
1264 0,
1265 [
1266 (1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
1267 ]
1268 )
1269 next_url = compat_urllib_parse.unquote(info.get('url'))
1270
1271 info = self.grep_webpage(
1272 next_url,
1273 r'<video id="(.*?)".*?>.*?' +
1274 '<name>(.*?)</name>.*?' +
1275 '<dateVideo>(.*?)</dateVideo>.*?' +
1276 '<url quality="hd">(.*?)</url>',
1277 re.DOTALL,
1278 [
1279 (1, 'id', u'ERROR: could not extract video id: %s' % url),
1280 (2, 'title', u'ERROR: could not extract video title: %s' % url),
1281 (3, 'date', u'ERROR: could not extract video date: %s' % url),
1282 (4, 'url', u'ERROR: could not extract video url: %s' % url)
1283 ]
1284 )
1285
1286 return {
1287 'id': info.get('id'),
1288 'url': compat_urllib_parse.unquote(info.get('url')),
1289 'uploader': u'arte.tv',
1290 'upload_date': info.get('date'),
93702113 1291 'title': info.get('title').decode('utf-8'),
59ae15a5
PH
1292 'ext': u'mp4',
1293 'format': u'NA',
1294 'player_url': None,
1295 }
1296
1297 def _real_extract(self, url):
1298 video_id = url.split('/')[-1]
1299 self.report_extraction(video_id)
1300
1301 if re.search(self._LIVE_URL, video_id) is not None:
1302 self.extractLiveStream(url)
1303 return
1304 else:
1305 info = self.extractPlus7Stream(url)
1306
1307 return [info]
f2ad10a9
CA
1308
1309
d77c3dfd 1310class GenericIE(InfoExtractor):
59ae15a5
PH
1311 """Generic last-resort information extractor."""
1312
1313 _VALID_URL = r'.*'
1314 IE_NAME = u'generic'
1315
1316 def __init__(self, downloader=None):
1317 InfoExtractor.__init__(self, downloader)
1318
1319 def report_download_webpage(self, video_id):
1320 """Report webpage download."""
3d342357
PH
1321 if not self._downloader.params.get('test', False):
1322 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
59ae15a5
PH
1323 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1324
1325 def report_extraction(self, video_id):
1326 """Report information extraction."""
1327 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1328
1329 def report_following_redirect(self, new_url):
1330 """Report information extraction."""
1331 self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
cdb30764 1332
59ae15a5
PH
1333 def _test_redirect(self, url):
1334 """Check if it is a redirect, like url shorteners, in case restart chain."""
1335 class HeadRequest(compat_urllib_request.Request):
1336 def get_method(self):
1337 return "HEAD"
1338
1339 class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1340 """
cdb30764 1341 Subclass the HTTPRedirectHandler to make it use our
59ae15a5
PH
1342 HeadRequest also on the redirected URL
1343 """
cdb30764 1344 def redirect_request(self, req, fp, code, msg, headers, newurl):
59ae15a5 1345 if code in (301, 302, 303, 307):
cdb30764 1346 newurl = newurl.replace(' ', '%20')
59ae15a5
PH
1347 newheaders = dict((k,v) for k,v in req.headers.items()
1348 if k.lower() not in ("content-length", "content-type"))
cdb30764 1349 return HeadRequest(newurl,
59ae15a5 1350 headers=newheaders,
cdb30764
ND
1351 origin_req_host=req.get_origin_req_host(),
1352 unverifiable=True)
1353 else:
1354 raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
59ae15a5
PH
1355
1356 class HTTPMethodFallback(compat_urllib_request.BaseHandler):
1357 """
1358 Fallback to GET if HEAD is not allowed (405 HTTP error)
1359 """
cdb30764 1360 def http_error_405(self, req, fp, code, msg, headers):
59ae15a5
PH
1361 fp.read()
1362 fp.close()
1363
1364 newheaders = dict((k,v) for k,v in req.headers.items()
1365 if k.lower() not in ("content-length", "content-type"))
cdb30764
ND
1366 return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
1367 headers=newheaders,
1368 origin_req_host=req.get_origin_req_host(),
59ae15a5
PH
1369 unverifiable=True))
1370
1371 # Build our opener
cdb30764 1372 opener = compat_urllib_request.OpenerDirector()
59ae15a5
PH
1373 for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
1374 HTTPMethodFallback, HEADRedirectHandler,
7c038b3c 1375 compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
59ae15a5
PH
1376 opener.add_handler(handler())
1377
1378 response = opener.open(HeadRequest(url))
1379 new_url = response.geturl()
1380
1381 if url == new_url:
1382 return False
1383
1384 self.report_following_redirect(new_url)
1385 self._downloader.download([new_url])
1386 return True
1387
1388 def _real_extract(self, url):
1389 if self._test_redirect(url): return
1390
1391 video_id = url.split('/')[-1]
59ae15a5 1392 try:
3d342357 1393 webpage = self._download_webpage(url, video_id)
59ae15a5
PH
1394 except ValueError as err:
1395 # since this is the last-resort InfoExtractor, if
1396 # this error is thrown, it'll be thrown here
e5f30ade 1397 self._downloader.report_error(u'Invalid URL: %s' % url)
59ae15a5
PH
1398 return
1399
1400 self.report_extraction(video_id)
1401 # Start with something easy: JW Player in SWFObject
1402 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1403 if mobj is None:
1404 # Broaden the search a little bit
1405 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1013186a
PH
1406 if mobj is None:
1407 # Broaden the search a little bit: JWPlayer JS loader
1408 mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage)
59ae15a5 1409 if mobj is None:
e5f30ade 1410 self._downloader.report_error(u'Invalid URL: %s' % url)
59ae15a5
PH
1411 return
1412
1413 # It's possible that one of the regexes
1414 # matched, but returned an empty group:
1415 if mobj.group(1) is None:
e5f30ade 1416 self._downloader.report_error(u'Invalid URL: %s' % url)
59ae15a5
PH
1417 return
1418
1419 video_url = compat_urllib_parse.unquote(mobj.group(1))
1420 video_id = os.path.basename(video_url)
1421
1422 # here's a fun little line of code for you:
1423 video_extension = os.path.splitext(video_id)[1][1:]
1424 video_id = os.path.splitext(video_id)[0]
1425
1426 # it's tempting to parse this further, but you would
1427 # have to take into account all the variations like
1428 # Video Title - Site Name
1429 # Site Name | Video Title
1430 # Video Title - Tagline | Site Name
1431 # and so on and so forth; it's just not practical
1432 mobj = re.search(r'<title>(.*)</title>', webpage)
1433 if mobj is None:
e5f30ade 1434 self._downloader.report_error(u'unable to extract title')
59ae15a5 1435 return
f1171f7c 1436 video_title = mobj.group(1)
59ae15a5
PH
1437
1438 # video uploader is domain name
1439 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1440 if mobj is None:
e5f30ade 1441 self._downloader.report_error(u'unable to extract title')
59ae15a5 1442 return
f1171f7c 1443 video_uploader = mobj.group(1)
59ae15a5
PH
1444
1445 return [{
f1171f7c
PH
1446 'id': video_id,
1447 'url': video_url,
59ae15a5
PH
1448 'uploader': video_uploader,
1449 'upload_date': None,
1450 'title': video_title,
f1171f7c 1451 'ext': video_extension,
59ae15a5 1452 }]
d77c3dfd
FV
1453
1454
1455class YoutubeSearchIE(InfoExtractor):
59ae15a5
PH
1456 """Information Extractor for YouTube search queries."""
1457 _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
1458 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1459 _max_youtube_results = 1000
1460 IE_NAME = u'youtube:search'
1461
1462 def __init__(self, downloader=None):
1463 InfoExtractor.__init__(self, downloader)
1464
1465 def report_download_page(self, query, pagenum):
1466 """Report attempt to download search page with given number."""
1467 query = query.decode(preferredencoding())
1468 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1469
1470 def _real_extract(self, query):
1471 mobj = re.match(self._VALID_URL, query)
1472 if mobj is None:
e5f30ade 1473 self._downloader.report_error(u'invalid search query "%s"' % query)
59ae15a5
PH
1474 return
1475
1476 prefix, query = query.split(':')
1477 prefix = prefix[8:]
1478 query = query.encode('utf-8')
1479 if prefix == '':
1480 self._download_n_results(query, 1)
1481 return
1482 elif prefix == 'all':
1483 self._download_n_results(query, self._max_youtube_results)
1484 return
1485 else:
1486 try:
1487 n = int(prefix)
1488 if n <= 0:
e5f30ade 1489 self._downloader.report_error(u'invalid download number %s for query "%s"' % (n, query))
59ae15a5
PH
1490 return
1491 elif n > self._max_youtube_results:
2e5457be 1492 self._downloader.report_warning(u'ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
59ae15a5
PH
1493 n = self._max_youtube_results
1494 self._download_n_results(query, n)
1495 return
1496 except ValueError: # parsing prefix as integer fails
1497 self._download_n_results(query, 1)
1498 return
1499
1500 def _download_n_results(self, query, n):
1501 """Downloads a specified number of results for a query"""
1502
1503 video_ids = []
1504 pagenum = 0
1505 limit = n
1506
1507 while (50 * pagenum) < limit:
1508 self.report_download_page(query, pagenum+1)
1509 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1510 request = compat_urllib_request.Request(result_url)
1511 try:
d1b7a243 1512 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
59ae15a5 1513 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1514 self._downloader.report_error(u'unable to download API page: %s' % compat_str(err))
59ae15a5
PH
1515 return
1516 api_response = json.loads(data)['data']
1517
9e07cf29
J
1518 if not 'items' in api_response:
1519 self._downloader.trouble(u'[youtube] No video results')
1520 return
1521
59ae15a5
PH
1522 new_ids = list(video['id'] for video in api_response['items'])
1523 video_ids += new_ids
1524
1525 limit = min(n, api_response['totalItems'])
1526 pagenum += 1
1527
1528 if len(video_ids) > n:
1529 video_ids = video_ids[:n]
1530 for id in video_ids:
1531 self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
1532 return
d77c3dfd
FV
1533
1534
1535class GoogleSearchIE(InfoExtractor):
59ae15a5
PH
1536 """Information Extractor for Google Video search queries."""
1537 _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
1538 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1539 _VIDEO_INDICATOR = r'<a href="http://video\.google\.com/videoplay\?docid=([^"\&]+)'
1540 _MORE_PAGES_INDICATOR = r'class="pn" id="pnnext"'
1541 _max_google_results = 1000
1542 IE_NAME = u'video.google:search'
1543
1544 def __init__(self, downloader=None):
1545 InfoExtractor.__init__(self, downloader)
1546
1547 def report_download_page(self, query, pagenum):
1548 """Report attempt to download playlist page with given number."""
1549 query = query.decode(preferredencoding())
1550 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1551
1552 def _real_extract(self, query):
1553 mobj = re.match(self._VALID_URL, query)
1554 if mobj is None:
e5f30ade 1555 self._downloader.report_error(u'invalid search query "%s"' % query)
59ae15a5
PH
1556 return
1557
1558 prefix, query = query.split(':')
1559 prefix = prefix[8:]
1560 query = query.encode('utf-8')
1561 if prefix == '':
1562 self._download_n_results(query, 1)
1563 return
1564 elif prefix == 'all':
1565 self._download_n_results(query, self._max_google_results)
1566 return
1567 else:
1568 try:
1569 n = int(prefix)
1570 if n <= 0:
e5f30ade 1571 self._downloader.report_error(u'invalid download number %s for query "%s"' % (n, query))
59ae15a5
PH
1572 return
1573 elif n > self._max_google_results:
2e5457be 1574 self._downloader.report_warning(u'gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
59ae15a5
PH
1575 n = self._max_google_results
1576 self._download_n_results(query, n)
1577 return
1578 except ValueError: # parsing prefix as integer fails
1579 self._download_n_results(query, 1)
1580 return
1581
1582 def _download_n_results(self, query, n):
1583 """Downloads a specified number of results for a query"""
1584
1585 video_ids = []
1586 pagenum = 0
1587
1588 while True:
1589 self.report_download_page(query, pagenum)
1590 result_url = self._TEMPLATE_URL % (compat_urllib_parse.quote_plus(query), pagenum*10)
1591 request = compat_urllib_request.Request(result_url)
1592 try:
1593 page = compat_urllib_request.urlopen(request).read()
1594 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1595 self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err))
59ae15a5
PH
1596 return
1597
1598 # Extract video identifiers
1599 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1600 video_id = mobj.group(1)
1601 if video_id not in video_ids:
1602 video_ids.append(video_id)
1603 if len(video_ids) == n:
1604 # Specified n videos reached
1605 for id in video_ids:
1606 self._downloader.download(['http://video.google.com/videoplay?docid=%s' % id])
1607 return
1608
1609 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1610 for id in video_ids:
1611 self._downloader.download(['http://video.google.com/videoplay?docid=%s' % id])
1612 return
1613
1614 pagenum = pagenum + 1
d77c3dfd
FV
1615
1616
1617class YahooSearchIE(InfoExtractor):
59ae15a5 1618 """Information Extractor for Yahoo! Video search queries."""
93702113
FV
1619
1620 _WORKING = False
59ae15a5
PH
1621 _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
1622 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1623 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1624 _MORE_PAGES_INDICATOR = r'\s*Next'
1625 _max_yahoo_results = 1000
1626 IE_NAME = u'video.yahoo:search'
1627
1628 def __init__(self, downloader=None):
1629 InfoExtractor.__init__(self, downloader)
1630
1631 def report_download_page(self, query, pagenum):
1632 """Report attempt to download playlist page with given number."""
1633 query = query.decode(preferredencoding())
1634 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1635
1636 def _real_extract(self, query):
1637 mobj = re.match(self._VALID_URL, query)
1638 if mobj is None:
e5f30ade 1639 self._downloader.report_error(u'invalid search query "%s"' % query)
59ae15a5
PH
1640 return
1641
1642 prefix, query = query.split(':')
1643 prefix = prefix[8:]
1644 query = query.encode('utf-8')
1645 if prefix == '':
1646 self._download_n_results(query, 1)
1647 return
1648 elif prefix == 'all':
1649 self._download_n_results(query, self._max_yahoo_results)
1650 return
1651 else:
1652 try:
1653 n = int(prefix)
1654 if n <= 0:
e5f30ade 1655 self._downloader.report_error(u'invalid download number %s for query "%s"' % (n, query))
59ae15a5
PH
1656 return
1657 elif n > self._max_yahoo_results:
2e5457be 1658 self._downloader.report_warning(u'yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
59ae15a5
PH
1659 n = self._max_yahoo_results
1660 self._download_n_results(query, n)
1661 return
1662 except ValueError: # parsing prefix as integer fails
1663 self._download_n_results(query, 1)
1664 return
1665
1666 def _download_n_results(self, query, n):
1667 """Downloads a specified number of results for a query"""
1668
1669 video_ids = []
1670 already_seen = set()
1671 pagenum = 1
1672
1673 while True:
1674 self.report_download_page(query, pagenum)
1675 result_url = self._TEMPLATE_URL % (compat_urllib_parse.quote_plus(query), pagenum)
1676 request = compat_urllib_request.Request(result_url)
1677 try:
1678 page = compat_urllib_request.urlopen(request).read()
1679 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1680 self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err))
59ae15a5
PH
1681 return
1682
1683 # Extract video identifiers
1684 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1685 video_id = mobj.group(1)
1686 if video_id not in already_seen:
1687 video_ids.append(video_id)
1688 already_seen.add(video_id)
1689 if len(video_ids) == n:
1690 # Specified n videos reached
1691 for id in video_ids:
1692 self._downloader.download(['http://video.yahoo.com/watch/%s' % id])
1693 return
1694
1695 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1696 for id in video_ids:
1697 self._downloader.download(['http://video.yahoo.com/watch/%s' % id])
1698 return
1699
1700 pagenum = pagenum + 1
d77c3dfd
FV
1701
1702
1703class YoutubePlaylistIE(InfoExtractor):
59ae15a5
PH
1704 """Information Extractor for YouTube playlists."""
1705
6324fd1d
FV
1706 _VALID_URL = r"""(?:
1707 (?:https?://)?
1708 (?:\w+\.)?
1709 youtube\.com/
1710 (?:
89de9eb1
FV
1711 (?:course|view_play_list|my_playlists|artist|playlist|watch)
1712 \? (?:.*?&)*? (?:p|a|list)=
6324fd1d
FV
1713 | user/.*?/user/
1714 | p/
1715 | user/.*?#[pg]/c/
1716 )
89de9eb1
FV
1717 ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,})
1718 .*
1719 |
1720 ((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
1721 )"""
6324fd1d
FV
1722 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json'
1723 _MAX_RESULTS = 50
59ae15a5
PH
1724 IE_NAME = u'youtube:playlist'
1725
1726 def __init__(self, downloader=None):
1727 InfoExtractor.__init__(self, downloader)
1728
89de9eb1
FV
1729 @classmethod
1730 def suitable(cls, url):
6324fd1d 1731 """Receives a URL and returns True if suitable for this IE."""
89de9eb1 1732 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
6324fd1d 1733
59ae15a5
PH
1734 def report_download_page(self, playlist_id, pagenum):
1735 """Report attempt to download playlist page with given number."""
1736 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1737
1738 def _real_extract(self, url):
1739 # Extract playlist id
6324fd1d 1740 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
59ae15a5 1741 if mobj is None:
e5f30ade 1742 self._downloader.report_error(u'invalid url: %s' % url)
59ae15a5
PH
1743 return
1744
6324fd1d 1745 # Download playlist videos from API
89de9eb1 1746 playlist_id = mobj.group(1) or mobj.group(2)
6324fd1d
FV
1747 page_num = 1
1748 videos = []
59ae15a5
PH
1749
1750 while True:
6324fd1d
FV
1751 self.report_download_page(playlist_id, page_num)
1752
1753 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1)
59ae15a5 1754 try:
6324fd1d 1755 page = compat_urllib_request.urlopen(url).read().decode('utf8')
59ae15a5 1756 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1757 self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err))
59ae15a5
PH
1758 return
1759
6324fd1d
FV
1760 try:
1761 response = json.loads(page)
1762 except ValueError as err:
e5f30ade 1763 self._downloader.report_error(u'Invalid JSON in API response: ' + compat_str(err))
6324fd1d 1764 return
59ae15a5 1765
89de9eb1 1766 if not 'feed' in response or not 'entry' in response['feed']:
e5f30ade 1767 self._downloader.report_error(u'Got a malformed response from YouTube API')
89de9eb1
FV
1768 return
1769 videos += [ (entry['yt$position']['$t'], entry['content']['src'])
1770 for entry in response['feed']['entry']
1771 if 'content' in entry ]
6324fd1d
FV
1772
1773 if len(response['feed']['entry']) < self._MAX_RESULTS:
59ae15a5 1774 break
6324fd1d 1775 page_num += 1
59ae15a5 1776
691db5ba 1777 videos = [v[1] for v in sorted(videos)]
6324fd1d 1778 total = len(videos)
9789a05c 1779
59ae15a5
PH
1780 playliststart = self._downloader.params.get('playliststart', 1) - 1
1781 playlistend = self._downloader.params.get('playlistend', -1)
1782 if playlistend == -1:
6324fd1d 1783 videos = videos[playliststart:]
59ae15a5 1784 else:
6324fd1d 1785 videos = videos[playliststart:playlistend]
59ae15a5 1786
6324fd1d 1787 if len(videos) == total:
9789a05c
FV
1788 self._downloader.to_screen(u'[youtube] PL %s: Found %i videos' % (playlist_id, total))
1789 else:
6324fd1d 1790 self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos)))
9789a05c 1791
6324fd1d
FV
1792 for video in videos:
1793 self._downloader.download([video])
59ae15a5 1794 return
d77c3dfd
FV
1795
1796
902b2a0a 1797class YoutubeChannelIE(InfoExtractor):
59ae15a5
PH
1798 """Information Extractor for YouTube channels."""
1799
1800 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)(?:/.*)?$"
1801 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
9789a05c 1802 _MORE_PAGES_INDICATOR = u"Next \N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}"
59ae15a5
PH
1803 IE_NAME = u'youtube:channel'
1804
1805 def report_download_page(self, channel_id, pagenum):
1806 """Report attempt to download channel page with given number."""
1807 self._downloader.to_screen(u'[youtube] Channel %s: Downloading page #%s' % (channel_id, pagenum))
1808
1809 def _real_extract(self, url):
1810 # Extract channel id
1811 mobj = re.match(self._VALID_URL, url)
1812 if mobj is None:
e5f30ade 1813 self._downloader.report_error(u'invalid url: %s' % url)
59ae15a5
PH
1814 return
1815
1816 # Download channel pages
1817 channel_id = mobj.group(1)
1818 video_ids = []
1819 pagenum = 1
1820
1821 while True:
1822 self.report_download_page(channel_id, pagenum)
1823 url = self._TEMPLATE_URL % (channel_id, pagenum)
1824 request = compat_urllib_request.Request(url)
1825 try:
9789a05c 1826 page = compat_urllib_request.urlopen(request).read().decode('utf8')
59ae15a5 1827 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1828 self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err))
59ae15a5
PH
1829 return
1830
1831 # Extract video identifiers
1832 ids_in_page = []
1833 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&', page):
1834 if mobj.group(1) not in ids_in_page:
1835 ids_in_page.append(mobj.group(1))
1836 video_ids.extend(ids_in_page)
1837
9789a05c 1838 if self._MORE_PAGES_INDICATOR not in page:
59ae15a5
PH
1839 break
1840 pagenum = pagenum + 1
1841
9789a05c
FV
1842 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1843
59ae15a5
PH
1844 for id in video_ids:
1845 self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
1846 return
902b2a0a
FV
1847
1848
d77c3dfd 1849class YoutubeUserIE(InfoExtractor):
59ae15a5 1850 """Information Extractor for YouTube users."""
d77c3dfd 1851
59ae15a5
PH
1852 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
1853 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1854 _GDATA_PAGE_SIZE = 50
1855 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
1856 _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
1857 IE_NAME = u'youtube:user'
d77c3dfd 1858
59ae15a5
PH
1859 def __init__(self, downloader=None):
1860 InfoExtractor.__init__(self, downloader)
d77c3dfd 1861
59ae15a5
PH
1862 def report_download_page(self, username, start_index):
1863 """Report attempt to download user page."""
1864 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
1865 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
d77c3dfd 1866
59ae15a5
PH
1867 def _real_extract(self, url):
1868 # Extract username
1869 mobj = re.match(self._VALID_URL, url)
1870 if mobj is None:
e5f30ade 1871 self._downloader.report_error(u'invalid url: %s' % url)
59ae15a5 1872 return
d77c3dfd 1873
59ae15a5 1874 username = mobj.group(1)
d77c3dfd 1875
59ae15a5
PH
1876 # Download video ids using YouTube Data API. Result size per
1877 # query is limited (currently to 50 videos) so we need to query
1878 # page by page until there are no video ids - it means we got
1879 # all of them.
d77c3dfd 1880
59ae15a5
PH
1881 video_ids = []
1882 pagenum = 0
d77c3dfd 1883
59ae15a5
PH
1884 while True:
1885 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1886 self.report_download_page(username, start_index)
d77c3dfd 1887
59ae15a5 1888 request = compat_urllib_request.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
d77c3dfd 1889
59ae15a5 1890 try:
80d3177e 1891 page = compat_urllib_request.urlopen(request).read().decode('utf-8')
59ae15a5 1892 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1893 self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err))
59ae15a5 1894 return
d77c3dfd 1895
59ae15a5
PH
1896 # Extract video identifiers
1897 ids_in_page = []
d77c3dfd 1898
59ae15a5
PH
1899 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1900 if mobj.group(1) not in ids_in_page:
1901 ids_in_page.append(mobj.group(1))
d77c3dfd 1902
59ae15a5 1903 video_ids.extend(ids_in_page)
d77c3dfd 1904
59ae15a5
PH
1905 # A little optimization - if current page is not
1906 # "full", ie. does not contain PAGE_SIZE video ids then
1907 # we can assume that this page is the last one - there
1908 # are no more ids on further pages - no need to query
1909 # again.
d77c3dfd 1910
59ae15a5
PH
1911 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1912 break
d77c3dfd 1913
59ae15a5 1914 pagenum += 1
d77c3dfd 1915
59ae15a5
PH
1916 all_ids_count = len(video_ids)
1917 playliststart = self._downloader.params.get('playliststart', 1) - 1
1918 playlistend = self._downloader.params.get('playlistend', -1)
d77c3dfd 1919
59ae15a5
PH
1920 if playlistend == -1:
1921 video_ids = video_ids[playliststart:]
1922 else:
1923 video_ids = video_ids[playliststart:playlistend]
d77c3dfd 1924
59ae15a5
PH
1925 self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
1926 (username, all_ids_count, len(video_ids)))
d77c3dfd 1927
59ae15a5
PH
1928 for video_id in video_ids:
1929 self._downloader.download(['http://www.youtube.com/watch?v=%s' % video_id])
d77c3dfd
FV
1930
1931
eeeb4daa 1932class BlipTVUserIE(InfoExtractor):
59ae15a5 1933 """Information Extractor for blip.tv users."""
eeeb4daa 1934
59ae15a5
PH
1935 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
1936 _PAGE_SIZE = 12
1937 IE_NAME = u'blip.tv:user'
eeeb4daa 1938
59ae15a5
PH
1939 def __init__(self, downloader=None):
1940 InfoExtractor.__init__(self, downloader)
eeeb4daa 1941
59ae15a5
PH
1942 def report_download_page(self, username, pagenum):
1943 """Report attempt to download user page."""
1944 self._downloader.to_screen(u'[%s] user %s: Downloading video ids from page %d' %
1945 (self.IE_NAME, username, pagenum))
eeeb4daa 1946
59ae15a5
PH
1947 def _real_extract(self, url):
1948 # Extract username
1949 mobj = re.match(self._VALID_URL, url)
1950 if mobj is None:
e5f30ade 1951 self._downloader.report_error(u'invalid url: %s' % url)
59ae15a5 1952 return
eeeb4daa 1953
59ae15a5 1954 username = mobj.group(1)
eeeb4daa 1955
59ae15a5 1956 page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
eeeb4daa 1957
59ae15a5 1958 request = compat_urllib_request.Request(url)
eeeb4daa 1959
59ae15a5
PH
1960 try:
1961 page = compat_urllib_request.urlopen(request).read().decode('utf-8')
1962 mobj = re.search(r'data-users-id="([^"]+)"', page)
1963 page_base = page_base % mobj.group(1)
1964 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1965 self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err))
59ae15a5 1966 return
eeeb4daa
JCGS
1967
1968
59ae15a5
PH
1969 # Download video ids using BlipTV Ajax calls. Result size per
1970 # query is limited (currently to 12 videos) so we need to query
1971 # page by page until there are no video ids - it means we got
1972 # all of them.
eeeb4daa 1973
59ae15a5
PH
1974 video_ids = []
1975 pagenum = 1
eeeb4daa 1976
59ae15a5
PH
1977 while True:
1978 self.report_download_page(username, pagenum)
450e7099
PH
1979 url = page_base + "&page=" + str(pagenum)
1980 request = compat_urllib_request.Request( url )
59ae15a5
PH
1981 try:
1982 page = compat_urllib_request.urlopen(request).read().decode('utf-8')
1983 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 1984 self._downloader.report_error(u'unable to download webpage: %s' % str(err))
59ae15a5 1985 return
eeeb4daa 1986
59ae15a5
PH
1987 # Extract video identifiers
1988 ids_in_page = []
eeeb4daa 1989
59ae15a5
PH
1990 for mobj in re.finditer(r'href="/([^"]+)"', page):
1991 if mobj.group(1) not in ids_in_page:
1992 ids_in_page.append(unescapeHTML(mobj.group(1)))
eeeb4daa 1993
59ae15a5 1994 video_ids.extend(ids_in_page)
eeeb4daa 1995
59ae15a5
PH
1996 # A little optimization - if current page is not
1997 # "full", ie. does not contain PAGE_SIZE video ids then
1998 # we can assume that this page is the last one - there
1999 # are no more ids on further pages - no need to query
2000 # again.
eeeb4daa 2001
59ae15a5
PH
2002 if len(ids_in_page) < self._PAGE_SIZE:
2003 break
eeeb4daa 2004
59ae15a5 2005 pagenum += 1
eeeb4daa 2006
59ae15a5
PH
2007 all_ids_count = len(video_ids)
2008 playliststart = self._downloader.params.get('playliststart', 1) - 1
2009 playlistend = self._downloader.params.get('playlistend', -1)
eeeb4daa 2010
59ae15a5
PH
2011 if playlistend == -1:
2012 video_ids = video_ids[playliststart:]
2013 else:
2014 video_ids = video_ids[playliststart:playlistend]
eeeb4daa 2015
59ae15a5
PH
2016 self._downloader.to_screen(u"[%s] user %s: Collected %d video ids (downloading %d of them)" %
2017 (self.IE_NAME, username, all_ids_count, len(video_ids)))
eeeb4daa 2018
59ae15a5
PH
2019 for video_id in video_ids:
2020 self._downloader.download([u'http://blip.tv/'+video_id])
eeeb4daa
JCGS
2021
2022
d77c3dfd 2023class DepositFilesIE(InfoExtractor):
59ae15a5
PH
2024 """Information extractor for depositfiles.com"""
2025
2026 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
59ae15a5
PH
2027
2028 def report_download_webpage(self, file_id):
2029 """Report webpage download."""
2030 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2031
2032 def report_extraction(self, file_id):
2033 """Report information extraction."""
2034 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2035
2036 def _real_extract(self, url):
2037 file_id = url.split('/')[-1]
2038 # Rebuild url in english locale
2039 url = 'http://depositfiles.com/en/files/' + file_id
2040
2041 # Retrieve file webpage with 'Free download' button pressed
2042 free_download_indication = { 'gateway_result' : '1' }
2043 request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
2044 try:
2045 self.report_download_webpage(file_id)
2046 webpage = compat_urllib_request.urlopen(request).read()
2047 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2048 self._downloader.report_error(u'Unable to retrieve file webpage: %s' % compat_str(err))
59ae15a5
PH
2049 return
2050
2051 # Search for the real file URL
2052 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2053 if (mobj is None) or (mobj.group(1) is None):
2054 # Try to figure out reason of the error.
2055 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2056 if (mobj is not None) and (mobj.group(1) is not None):
2057 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
e5f30ade 2058 self._downloader.report_error(u'%s' % restriction_message)
59ae15a5 2059 else:
e5f30ade 2060 self._downloader.report_error(u'unable to extract download URL from: %s' % url)
59ae15a5
PH
2061 return
2062
2063 file_url = mobj.group(1)
2064 file_extension = os.path.splitext(file_url)[1][1:]
2065
2066 # Search for file title
2067 mobj = re.search(r'<b title="(.*?)">', webpage)
2068 if mobj is None:
e5f30ade 2069 self._downloader.report_error(u'unable to extract title')
59ae15a5
PH
2070 return
2071 file_title = mobj.group(1).decode('utf-8')
2072
2073 return [{
2074 'id': file_id.decode('utf-8'),
2075 'url': file_url.decode('utf-8'),
2076 'uploader': None,
2077 'upload_date': None,
2078 'title': file_title,
2079 'ext': file_extension.decode('utf-8'),
2080 }]
d77c3dfd
FV
2081
2082
2083class FacebookIE(InfoExtractor):
59ae15a5
PH
2084 """Information Extractor for Facebook"""
2085
59ae15a5
PH
2086 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2087 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2088 _NETRC_MACHINE = 'facebook'
59ae15a5
PH
2089 IE_NAME = u'facebook'
2090
59ae15a5
PH
2091 def report_login(self):
2092 """Report attempt to log in."""
b954070d 2093 self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)
59ae15a5
PH
2094
2095 def _real_initialize(self):
2096 if self._downloader is None:
2097 return
2098
2099 useremail = None
2100 password = None
2101 downloader_params = self._downloader.params
2102
2103 # Attempt to use provided username and password or .netrc data
2104 if downloader_params.get('username', None) is not None:
2105 useremail = downloader_params['username']
2106 password = downloader_params['password']
2107 elif downloader_params.get('usenetrc', False):
2108 try:
2109 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2110 if info is not None:
2111 useremail = info[0]
2112 password = info[2]
2113 else:
2114 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2115 except (IOError, netrc.NetrcParseError) as err:
2e5457be 2116 self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
59ae15a5
PH
2117 return
2118
2119 if useremail is None:
2120 return
2121
2122 # Log in
2123 login_form = {
2124 'email': useremail,
2125 'pass': password,
2126 'login': 'Log+In'
2127 }
2128 request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
2129 try:
2130 self.report_login()
2131 login_results = compat_urllib_request.urlopen(request).read()
2132 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2e5457be 2133 self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
59ae15a5
PH
2134 return
2135 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2e5457be 2136 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
59ae15a5
PH
2137 return
2138
2139 def _real_extract(self, url):
2140 mobj = re.match(self._VALID_URL, url)
2141 if mobj is None:
e5f30ade 2142 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
2143 return
2144 video_id = mobj.group('ID')
2145
b954070d
PH
2146 url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
2147 webpage = self._download_webpage(url, video_id)
2148
2149 BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
2150 AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
2151 m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
2152 if not m:
2153 raise ExtractorError(u'Cannot parse data')
2154 data = dict(json.loads(m.group(1)))
edba5137
PH
2155 params_raw = compat_urllib_parse.unquote(data['params'])
2156 params = json.loads(params_raw)
2157 video_url = params['hd_src']
7796e8c2
PH
2158 if not video_url:
2159 video_url = params['sd_src']
2160 if not video_url:
2161 raise ExtractorError(u'Cannot find video URL')
edba5137 2162 video_duration = int(params['video_duration'])
b954070d
PH
2163
2164 m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
2165 if not m:
2166 raise ExtractorError(u'Cannot find title in webpage')
2167 video_title = unescapeHTML(m.group(1))
2168
2169 info = {
2170 'id': video_id,
2171 'title': video_title,
2172 'url': video_url,
2173 'ext': 'mp4',
2174 'duration': video_duration,
edba5137 2175 'thumbnail': params['thumbnail_src'],
b954070d
PH
2176 }
2177 return [info]
59ae15a5 2178
d77c3dfd
FV
2179
2180class BlipTVIE(InfoExtractor):
59ae15a5
PH
2181 """Information extractor for blip.tv"""
2182
2183 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
2184 _URL_EXT = r'^.*\.([a-z0-9]+)$'
2185 IE_NAME = u'blip.tv'
2186
2187 def report_extraction(self, file_id):
2188 """Report information extraction."""
2189 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
2190
2191 def report_direct_download(self, title):
2192 """Report information extraction."""
2193 self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title))
2194
2195 def _real_extract(self, url):
2196 mobj = re.match(self._VALID_URL, url)
2197 if mobj is None:
e5f30ade 2198 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
2199 return
2200
f7b567ff
PH
2201 urlp = compat_urllib_parse_urlparse(url)
2202 if urlp.path.startswith('/play/'):
7f9d41a5
JCGS
2203 request = compat_urllib_request.Request(url)
2204 response = compat_urllib_request.urlopen(request)
2205 redirecturl = response.geturl()
f7b567ff
PH
2206 rurlp = compat_urllib_parse_urlparse(redirecturl)
2207 file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
2208 url = 'http://blip.tv/a/a-' + file_id
2209 return self._real_extract(url)
2210
7f9d41a5 2211
59ae15a5
PH
2212 if '?' in url:
2213 cchar = '&'
2214 else:
2215 cchar = '?'
2216 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
55c05398 2217 request = compat_urllib_request.Request(json_url)
3446dfb7 2218 request.add_header('User-Agent', 'iTunes/10.6.1')
59ae15a5
PH
2219 self.report_extraction(mobj.group(1))
2220 info = None
2221 try:
2222 urlh = compat_urllib_request.urlopen(request)
2223 if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
2224 basename = url.split('/')[-1]
2225 title,ext = os.path.splitext(basename)
2226 title = title.decode('UTF-8')
2227 ext = ext.replace('.', '')
2228 self.report_direct_download(title)
2229 info = {
2230 'id': title,
2231 'url': url,
2232 'uploader': None,
2233 'upload_date': None,
2234 'title': title,
2235 'ext': ext,
2236 'urlhandle': urlh
2237 }
2238 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
3446dfb7 2239 raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
59ae15a5
PH
2240 if info is None: # Regular URL
2241 try:
55c05398
PH
2242 json_code_bytes = urlh.read()
2243 json_code = json_code_bytes.decode('utf-8')
59ae15a5 2244 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2245 self._downloader.report_error(u'unable to read video info webpage: %s' % compat_str(err))
59ae15a5
PH
2246 return
2247
2248 try:
2249 json_data = json.loads(json_code)
2250 if 'Post' in json_data:
2251 data = json_data['Post']
2252 else:
2253 data = json_data
2254
2255 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2256 video_url = data['media']['url']
2257 umobj = re.match(self._URL_EXT, video_url)
2258 if umobj is None:
2259 raise ValueError('Can not determine filename extension')
2260 ext = umobj.group(1)
2261
2262 info = {
2263 'id': data['item_id'],
2264 'url': video_url,
2265 'uploader': data['display_name'],
2266 'upload_date': upload_date,
2267 'title': data['title'],
2268 'ext': ext,
2269 'format': data['media']['mimeType'],
2270 'thumbnail': data['thumbnailUrl'],
2271 'description': data['description'],
3446dfb7
PH
2272 'player_url': data['embedUrl'],
2273 'user_agent': 'iTunes/10.6.1',
59ae15a5
PH
2274 }
2275 except (ValueError,KeyError) as err:
e5f30ade 2276 self._downloader.report_error(u'unable to parse video information: %s' % repr(err))
59ae15a5
PH
2277 return
2278
59ae15a5 2279 return [info]
d77c3dfd
FV
2280
2281
2282class MyVideoIE(InfoExtractor):
59ae15a5
PH
2283 """Information Extractor for myvideo.de."""
2284
2285 _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
2286 IE_NAME = u'myvideo'
2287
2288 def __init__(self, downloader=None):
2289 InfoExtractor.__init__(self, downloader)
cdb30764 2290
59ae15a5
PH
2291 def report_extraction(self, video_id):
2292 """Report information extraction."""
2293 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
2294
2295 def _real_extract(self,url):
2296 mobj = re.match(self._VALID_URL, url)
2297 if mobj is None:
e5f30ade 2298 self._download.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
2299 return
2300
2301 video_id = mobj.group(1)
2302
2303 # Get video webpage
5f955171
PH
2304 webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
2305 webpage = self._download_webpage(webpage_url, video_id)
59ae15a5
PH
2306
2307 self.report_extraction(video_id)
6d436336 2308 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/.*?\.jpg\' />',
59ae15a5
PH
2309 webpage)
2310 if mobj is None:
e5f30ade 2311 self._downloader.report_error(u'unable to extract media URL')
59ae15a5
PH
2312 return
2313 video_url = mobj.group(1) + ('/%s.flv' % video_id)
2314
2315 mobj = re.search('<title>([^<]+)</title>', webpage)
2316 if mobj is None:
e5f30ade 2317 self._downloader.report_error(u'unable to extract title')
59ae15a5
PH
2318 return
2319
2320 video_title = mobj.group(1)
2321
2322 return [{
2323 'id': video_id,
2324 'url': video_url,
2325 'uploader': None,
2326 'upload_date': None,
2327 'title': video_title,
2328 'ext': u'flv',
2329 }]
d77c3dfd
FV
2330
2331class ComedyCentralIE(InfoExtractor):
59ae15a5
PH
2332 """Information extractor for The Daily Show and Colbert Report """
2333
ca6849e6 2334 # urls can be abbreviations like :thedailyshow or :colbert
cdb30764 2335 # urls for episodes like:
ca6849e6 2336 # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
2337 # or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
cdb30764 2338 # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
ca6849e6 2339 _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
2340 |(https?://)?(www\.)?
2341 (?P<showname>thedailyshow|colbertnation)\.com/
2342 (full-episodes/(?P<episode>.*)|
2343 (?P<clip>
2344 (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
2345 |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
cdb30764 2346 $"""
59ae15a5
PH
2347
2348 _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
2349
2350 _video_extensions = {
2351 '3500': 'mp4',
2352 '2200': 'mp4',
2353 '1700': 'mp4',
2354 '1200': 'mp4',
2355 '750': 'mp4',
2356 '400': 'mp4',
2357 }
2358 _video_dimensions = {
2359 '3500': '1280x720',
2360 '2200': '960x540',
2361 '1700': '768x432',
2362 '1200': '640x360',
2363 '750': '512x288',
2364 '400': '384x216',
2365 }
2366
89de9eb1
FV
2367 @classmethod
2368 def suitable(cls, url):
ca6849e6 2369 """Receives a URL and returns True if suitable for this IE."""
89de9eb1 2370 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
ca6849e6 2371
59ae15a5
PH
2372 def report_extraction(self, episode_id):
2373 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
2374
32635ec6
PH
2375 def report_config_download(self, episode_id, media_id):
2376 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration for %s' % (episode_id, media_id))
59ae15a5
PH
2377
2378 def report_index_download(self, episode_id):
2379 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
2380
59ae15a5
PH
2381 def _print_formats(self, formats):
2382 print('Available formats:')
2383 for x in formats:
2384 print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
2385
2386
2387 def _real_extract(self, url):
ca6849e6 2388 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
59ae15a5 2389 if mobj is None:
e5f30ade 2390 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
2391 return
2392
2393 if mobj.group('shortname'):
2394 if mobj.group('shortname') in ('tds', 'thedailyshow'):
2395 url = u'http://www.thedailyshow.com/full-episodes/'
2396 else:
2397 url = u'http://www.colbertnation.com/full-episodes/'
ca6849e6 2398 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
59ae15a5
PH
2399 assert mobj is not None
2400
ca6849e6 2401 if mobj.group('clip'):
2402 if mobj.group('showname') == 'thedailyshow':
2403 epTitle = mobj.group('tdstitle')
2404 else:
2405 epTitle = mobj.group('cntitle')
2406 dlNewest = False
59ae15a5 2407 else:
ca6849e6 2408 dlNewest = not mobj.group('episode')
2409 if dlNewest:
2410 epTitle = mobj.group('showname')
2411 else:
2412 epTitle = mobj.group('episode')
59ae15a5
PH
2413
2414 req = compat_urllib_request.Request(url)
2415 self.report_extraction(epTitle)
2416 try:
2417 htmlHandle = compat_urllib_request.urlopen(req)
2418 html = htmlHandle.read()
93148102 2419 webpage = html.decode('utf-8')
59ae15a5 2420 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2421 self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err))
59ae15a5
PH
2422 return
2423 if dlNewest:
2424 url = htmlHandle.geturl()
ca6849e6 2425 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
59ae15a5 2426 if mobj is None:
e5f30ade 2427 self._downloader.report_error(u'Invalid redirected URL: ' + url)
59ae15a5
PH
2428 return
2429 if mobj.group('episode') == '':
e5f30ade 2430 self._downloader.report_error(u'Redirected URL is still not specific: ' + url)
59ae15a5
PH
2431 return
2432 epTitle = mobj.group('episode')
2433
93148102 2434 mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
59ae15a5
PH
2435
2436 if len(mMovieParams) == 0:
2437 # The Colbert Report embeds the information in a without
2438 # a URL prefix; so extract the alternate reference
2439 # and then add the URL prefix manually.
2440
93148102 2441 altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
59ae15a5 2442 if len(altMovieParams) == 0:
e5f30ade 2443 self._downloader.report_error(u'unable to find Flash URL in webpage ' + url)
59ae15a5
PH
2444 return
2445 else:
2446 mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
cdb30764 2447
59ae15a5
PH
2448 uri = mMovieParams[0][1]
2449 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
2450 self.report_index_download(epTitle)
2451 try:
2452 indexXml = compat_urllib_request.urlopen(indexUrl).read()
2453 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2454 self._downloader.report_error(u'unable to download episode index: ' + compat_str(err))
59ae15a5
PH
2455 return
2456
2457 results = []
2458
2459 idoc = xml.etree.ElementTree.fromstring(indexXml)
2460 itemEls = idoc.findall('.//item')
7717ae19 2461 for partNum,itemEl in enumerate(itemEls):
59ae15a5
PH
2462 mediaId = itemEl.findall('./guid')[0].text
2463 shortMediaId = mediaId.split(':')[-1]
2464 showId = mediaId.split(':')[-2].replace('.com', '')
2465 officialTitle = itemEl.findall('./title')[0].text
2466 officialDate = itemEl.findall('./pubDate')[0].text
2467
2468 configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
2469 compat_urllib_parse.urlencode({'uri': mediaId}))
2470 configReq = compat_urllib_request.Request(configUrl)
32635ec6 2471 self.report_config_download(epTitle, shortMediaId)
59ae15a5
PH
2472 try:
2473 configXml = compat_urllib_request.urlopen(configReq).read()
2474 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2475 self._downloader.report_error(u'unable to download webpage: %s' % compat_str(err))
59ae15a5
PH
2476 return
2477
2478 cdoc = xml.etree.ElementTree.fromstring(configXml)
2479 turls = []
2480 for rendition in cdoc.findall('.//rendition'):
2481 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
2482 turls.append(finfo)
2483
2484 if len(turls) == 0:
c9fa1cba 2485 self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')
59ae15a5 2486 continue
cdb30764 2487
59ae15a5
PH
2488 if self._downloader.params.get('listformats', None):
2489 self._print_formats([i[0] for i in turls])
2490 return
2491
2492 # For now, just pick the highest bitrate
32635ec6 2493 format,rtmp_video_url = turls[-1]
59ae15a5
PH
2494
2495 # Get the format arg from the arg stream
2496 req_format = self._downloader.params.get('format', None)
2497
2498 # Select format if we can find one
2499 for f,v in turls:
2500 if f == req_format:
32635ec6 2501 format, rtmp_video_url = f, v
59ae15a5
PH
2502 break
2503
32635ec6
PH
2504 m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
2505 if not m:
2506 raise ExtractorError(u'Cannot transform RTMP url')
2507 base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
2508 video_url = base + m.group('finalid')
59ae15a5 2509
7717ae19 2510 effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
59ae15a5
PH
2511 info = {
2512 'id': shortMediaId,
2513 'url': video_url,
2514 'uploader': showId,
2515 'upload_date': officialDate,
2516 'title': effTitle,
2517 'ext': 'mp4',
2518 'format': format,
2519 'thumbnail': None,
2520 'description': officialTitle,
59ae15a5 2521 }
59ae15a5 2522 results.append(info)
cdb30764 2523
59ae15a5 2524 return results
d77c3dfd
FV
2525
2526
2527class EscapistIE(InfoExtractor):
59ae15a5
PH
2528 """Information extractor for The Escapist """
2529
2530 _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
2531 IE_NAME = u'escapist'
2532
2533 def report_extraction(self, showName):
2534 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
2535
2536 def report_config_download(self, showName):
2537 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
2538
2539 def _real_extract(self, url):
2540 mobj = re.match(self._VALID_URL, url)
2541 if mobj is None:
e5f30ade 2542 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
2543 return
2544 showName = mobj.group('showname')
2545 videoId = mobj.group('episode')
2546
2547 self.report_extraction(showName)
2548 try:
2549 webPage = compat_urllib_request.urlopen(url)
2550 webPageBytes = webPage.read()
2551 m = re.match(r'text/html; charset="?([^"]+)"?', webPage.headers['Content-Type'])
2552 webPage = webPageBytes.decode(m.group(1) if m else 'utf-8')
2553 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2554 self._downloader.report_error(u'unable to download webpage: ' + compat_str(err))
59ae15a5
PH
2555 return
2556
2557 descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
2558 description = unescapeHTML(descMatch.group(1))
2559 imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
2560 imgUrl = unescapeHTML(imgMatch.group(1))
2561 playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
2562 playerUrl = unescapeHTML(playerUrlMatch.group(1))
2563 configUrlMatch = re.search('config=(.*)$', playerUrl)
2564 configUrl = compat_urllib_parse.unquote(configUrlMatch.group(1))
2565
2566 self.report_config_download(showName)
2567 try:
93702113
FV
2568 configJSON = compat_urllib_request.urlopen(configUrl)
2569 m = re.match(r'text/html; charset="?([^"]+)"?', configJSON.headers['Content-Type'])
2570 configJSON = configJSON.read().decode(m.group(1) if m else 'utf-8')
59ae15a5 2571 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2572 self._downloader.report_error(u'unable to download configuration: ' + compat_str(err))
59ae15a5
PH
2573 return
2574
2575 # Technically, it's JavaScript, not JSON
2576 configJSON = configJSON.replace("'", '"')
2577
2578 try:
2579 config = json.loads(configJSON)
2580 except (ValueError,) as err:
e5f30ade 2581 self._downloader.report_error(u'Invalid JSON in configuration file: ' + compat_str(err))
59ae15a5
PH
2582 return
2583
2584 playlist = config['playlist']
2585 videoUrl = playlist[1]['url']
2586
2587 info = {
2588 'id': videoId,
2589 'url': videoUrl,
2590 'uploader': showName,
2591 'upload_date': None,
2592 'title': showName,
47dcd621 2593 'ext': 'mp4',
59ae15a5
PH
2594 'thumbnail': imgUrl,
2595 'description': description,
2596 'player_url': playerUrl,
2597 }
2598
2599 return [info]
d77c3dfd 2600
d77c3dfd 2601class CollegeHumorIE(InfoExtractor):
59ae15a5
PH
2602 """Information extractor for collegehumor.com"""
2603
0eb0faa2 2604 _WORKING = False
59ae15a5
PH
2605 _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
2606 IE_NAME = u'collegehumor'
2607
799c0763 2608 def report_manifest(self, video_id):
59ae15a5 2609 """Report information extraction."""
799c0763 2610 self._downloader.to_screen(u'[%s] %s: Downloading XML manifest' % (self.IE_NAME, video_id))
59ae15a5
PH
2611
2612 def report_extraction(self, video_id):
2613 """Report information extraction."""
2614 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
2615
2616 def _real_extract(self, url):
2617 mobj = re.match(self._VALID_URL, url)
2618 if mobj is None:
e5f30ade 2619 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
2620 return
2621 video_id = mobj.group('videoid')
2622
59ae15a5
PH
2623 info = {
2624 'id': video_id,
59ae15a5
PH
2625 'uploader': None,
2626 'upload_date': None,
2627 }
2628
2629 self.report_extraction(video_id)
799c0763 2630 xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
59ae15a5
PH
2631 try:
2632 metaXml = compat_urllib_request.urlopen(xmlUrl).read()
2633 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2634 self._downloader.report_error(u'unable to download video info XML: %s' % compat_str(err))
59ae15a5
PH
2635 return
2636
2637 mdoc = xml.etree.ElementTree.fromstring(metaXml)
2638 try:
2639 videoNode = mdoc.findall('./video')[0]
2640 info['description'] = videoNode.findall('./description')[0].text
2641 info['title'] = videoNode.findall('./caption')[0].text
59ae15a5 2642 info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
799c0763 2643 manifest_url = videoNode.findall('./file')[0].text
59ae15a5 2644 except IndexError:
c9fa1cba 2645 self._downloader.report_error(u'Invalid metadata XML file')
59ae15a5
PH
2646 return
2647
799c0763
PH
2648 manifest_url += '?hdcore=2.10.3'
2649 self.report_manifest(video_id)
2650 try:
2651 manifestXml = compat_urllib_request.urlopen(manifest_url).read()
2652 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2653 self._downloader.report_error(u'unable to download video info XML: %s' % compat_str(err))
799c0763
PH
2654 return
2655
2656 adoc = xml.etree.ElementTree.fromstring(manifestXml)
2657 try:
2658 media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
2659 node_id = media_node.attrib['url']
2660 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
2661 except IndexError as err:
c9fa1cba 2662 self._downloader.report_error(u'Invalid manifest file')
799c0763
PH
2663 return
2664
2665 url_pr = compat_urllib_parse_urlparse(manifest_url)
2666 url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
2667
2668 info['url'] = url
2669 info['ext'] = 'f4f'
59ae15a5 2670 return [info]
d77c3dfd
FV
2671
2672
2673class XVideosIE(InfoExtractor):
59ae15a5 2674 """Information extractor for xvideos.com"""
d77c3dfd 2675
59ae15a5
PH
2676 _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
2677 IE_NAME = u'xvideos'
d77c3dfd 2678
59ae15a5
PH
2679 def report_extraction(self, video_id):
2680 """Report information extraction."""
2681 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
d77c3dfd 2682
59ae15a5
PH
2683 def _real_extract(self, url):
2684 mobj = re.match(self._VALID_URL, url)
2685 if mobj is None:
e5f30ade 2686 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5 2687 return
8588a86f 2688 video_id = mobj.group(1)
d77c3dfd 2689
5f955171 2690 webpage = self._download_webpage(url, video_id)
d77c3dfd 2691
59ae15a5 2692 self.report_extraction(video_id)
d77c3dfd
FV
2693
2694
59ae15a5
PH
2695 # Extract video URL
2696 mobj = re.search(r'flv_url=(.+?)&', webpage)
2697 if mobj is None:
e5f30ade 2698 self._downloader.report_error(u'unable to extract video url')
59ae15a5 2699 return
8588a86f 2700 video_url = compat_urllib_parse.unquote(mobj.group(1))
d77c3dfd
FV
2701
2702
59ae15a5
PH
2703 # Extract title
2704 mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
2705 if mobj is None:
e5f30ade 2706 self._downloader.report_error(u'unable to extract video title')
59ae15a5 2707 return
8588a86f 2708 video_title = mobj.group(1)
d77c3dfd
FV
2709
2710
59ae15a5
PH
2711 # Extract video thumbnail
2712 mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)', webpage)
2713 if mobj is None:
e5f30ade 2714 self._downloader.report_error(u'unable to extract video thumbnail')
59ae15a5 2715 return
8588a86f 2716 video_thumbnail = mobj.group(0)
d77c3dfd 2717
59ae15a5
PH
2718 info = {
2719 'id': video_id,
2720 'url': video_url,
2721 'uploader': None,
2722 'upload_date': None,
2723 'title': video_title,
2724 'ext': 'flv',
2725 'thumbnail': video_thumbnail,
2726 'description': None,
2727 }
d77c3dfd 2728
59ae15a5 2729 return [info]
d77c3dfd
FV
2730
2731
2732class SoundcloudIE(InfoExtractor):
59ae15a5
PH
2733 """Information extractor for soundcloud.com
2734 To access the media, the uid of the song and a stream token
2735 must be extracted from the page source and the script must make
2736 a request to media.soundcloud.com/crossdomain.xml. Then
2737 the media can be grabbed by requesting from an url composed
2738 of the stream token and uid
2739 """
2740
2741 _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
2742 IE_NAME = u'soundcloud'
2743
2744 def __init__(self, downloader=None):
2745 InfoExtractor.__init__(self, downloader)
2746
8fd3afd5 2747 def report_resolve(self, video_id):
59ae15a5 2748 """Report information extraction."""
8fd3afd5 2749 self._downloader.to_screen(u'[%s] %s: Resolving id' % (self.IE_NAME, video_id))
59ae15a5
PH
2750
2751 def report_extraction(self, video_id):
2752 """Report information extraction."""
8fd3afd5 2753 self._downloader.to_screen(u'[%s] %s: Retrieving stream' % (self.IE_NAME, video_id))
59ae15a5
PH
2754
2755 def _real_extract(self, url):
2756 mobj = re.match(self._VALID_URL, url)
2757 if mobj is None:
e5f30ade 2758 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
2759 return
2760
2761 # extract uploader (which is in the url)
15c8d833 2762 uploader = mobj.group(1)
59ae15a5 2763 # extract simple title (uploader + slug of song title)
15c8d833 2764 slug_title = mobj.group(2)
59ae15a5
PH
2765 simple_title = uploader + u'-' + slug_title
2766
8fd3afd5 2767 self.report_resolve('%s/%s' % (uploader, slug_title))
59ae15a5 2768
8fd3afd5
PH
2769 url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
2770 resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
2771 request = compat_urllib_request.Request(resolv_url)
59ae15a5 2772 try:
8fd3afd5
PH
2773 info_json_bytes = compat_urllib_request.urlopen(request).read()
2774 info_json = info_json_bytes.decode('utf-8')
59ae15a5 2775 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2776 self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err))
59ae15a5
PH
2777 return
2778
8fd3afd5
PH
2779 info = json.loads(info_json)
2780 video_id = info['id']
59ae15a5
PH
2781 self.report_extraction('%s/%s' % (uploader, slug_title))
2782
8fd3afd5 2783 streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
c7214f9a 2784 request = compat_urllib_request.Request(streams_url)
8fd3afd5
PH
2785 try:
2786 stream_json_bytes = compat_urllib_request.urlopen(request).read()
2787 stream_json = stream_json_bytes.decode('utf-8')
2788 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2789 self._downloader.report_error(u'unable to download stream definitions: %s' % compat_str(err))
b4cd069d 2790 return
59ae15a5 2791
8fd3afd5 2792 streams = json.loads(stream_json)
c7214f9a 2793 mediaURL = streams['http_mp3_128_url']
59ae15a5
PH
2794
2795 return [{
c7214f9a 2796 'id': info['id'],
59ae15a5 2797 'url': mediaURL,
c7214f9a
PH
2798 'uploader': info['user']['username'],
2799 'upload_date': info['created_at'],
2800 'title': info['title'],
59ae15a5 2801 'ext': u'mp3',
c7214f9a 2802 'description': info['description'],
59ae15a5 2803 }]
d77c3dfd
FV
2804
2805
2806class InfoQIE(InfoExtractor):
59ae15a5 2807 """Information extractor for infoq.com"""
59ae15a5 2808 _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
59ae15a5 2809
59ae15a5
PH
2810 def report_extraction(self, video_id):
2811 """Report information extraction."""
2812 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
2813
2814 def _real_extract(self, url):
2815 mobj = re.match(self._VALID_URL, url)
2816 if mobj is None:
e5f30ade 2817 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
2818 return
2819
4fcca4bb 2820 webpage = self._download_webpage(url, video_id=url)
59ae15a5
PH
2821 self.report_extraction(url)
2822
59ae15a5
PH
2823 # Extract video URL
2824 mobj = re.search(r"jsclassref='([^']*)'", webpage)
2825 if mobj is None:
e5f30ade 2826 self._downloader.report_error(u'unable to extract video url')
59ae15a5 2827 return
4fcca4bb
PH
2828 real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
2829 video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
59ae15a5
PH
2830
2831 # Extract title
2832 mobj = re.search(r'contentTitle = "(.*?)";', webpage)
2833 if mobj is None:
e5f30ade 2834 self._downloader.report_error(u'unable to extract video title')
59ae15a5 2835 return
4fcca4bb 2836 video_title = mobj.group(1)
59ae15a5
PH
2837
2838 # Extract description
2839 video_description = u'No description available.'
2840 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
2841 if mobj is not None:
4fcca4bb 2842 video_description = mobj.group(1)
59ae15a5
PH
2843
2844 video_filename = video_url.split('/')[-1]
2845 video_id, extension = video_filename.split('.')
2846
2847 info = {
2848 'id': video_id,
2849 'url': video_url,
2850 'uploader': None,
2851 'upload_date': None,
2852 'title': video_title,
2853 'ext': extension, # Extension is always(?) mp4, but seems to be flv
2854 'thumbnail': None,
2855 'description': video_description,
2856 }
2857
2858 return [info]
d77c3dfd
FV
2859
2860class MixcloudIE(InfoExtractor):
59ae15a5 2861 """Information extractor for www.mixcloud.com"""
93702113
FV
2862
2863 _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
59ae15a5
PH
2864 _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
2865 IE_NAME = u'mixcloud'
2866
2867 def __init__(self, downloader=None):
2868 InfoExtractor.__init__(self, downloader)
2869
2870 def report_download_json(self, file_id):
2871 """Report JSON download."""
2872 self._downloader.to_screen(u'[%s] Downloading json' % self.IE_NAME)
2873
2874 def report_extraction(self, file_id):
2875 """Report information extraction."""
2876 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
2877
2878 def get_urls(self, jsonData, fmt, bitrate='best'):
2879 """Get urls from 'audio_formats' section in json"""
2880 file_url = None
2881 try:
2882 bitrate_list = jsonData[fmt]
2883 if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
2884 bitrate = max(bitrate_list) # select highest
2885
2886 url_list = jsonData[fmt][bitrate]
2887 except TypeError: # we have no bitrate info.
2888 url_list = jsonData[fmt]
2889 return url_list
2890
2891 def check_urls(self, url_list):
2892 """Returns 1st active url from list"""
2893 for url in url_list:
2894 try:
2895 compat_urllib_request.urlopen(url)
2896 return url
2897 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2898 url = None
2899
2900 return None
2901
2902 def _print_formats(self, formats):
2903 print('Available formats:')
2904 for fmt in formats.keys():
2905 for b in formats[fmt]:
2906 try:
2907 ext = formats[fmt][b][0]
2908 print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
2909 except TypeError: # we have no bitrate info
2910 ext = formats[fmt][0]
2911 print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
2912 break
2913
2914 def _real_extract(self, url):
2915 mobj = re.match(self._VALID_URL, url)
2916 if mobj is None:
e5f30ade 2917 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
2918 return
2919 # extract uploader & filename from url
2920 uploader = mobj.group(1).decode('utf-8')
2921 file_id = uploader + "-" + mobj.group(2).decode('utf-8')
2922
2923 # construct API request
2924 file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
2925 # retrieve .json file with links to files
2926 request = compat_urllib_request.Request(file_url)
2927 try:
2928 self.report_download_json(file_url)
2929 jsonData = compat_urllib_request.urlopen(request).read()
2930 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 2931 self._downloader.report_error(u'Unable to retrieve file: %s' % compat_str(err))
59ae15a5
PH
2932 return
2933
2934 # parse JSON
2935 json_data = json.loads(jsonData)
2936 player_url = json_data['player_swf_url']
2937 formats = dict(json_data['audio_formats'])
2938
2939 req_format = self._downloader.params.get('format', None)
2940 bitrate = None
2941
2942 if self._downloader.params.get('listformats', None):
2943 self._print_formats(formats)
2944 return
2945
2946 if req_format is None or req_format == 'best':
2947 for format_param in formats.keys():
2948 url_list = self.get_urls(formats, format_param)
2949 # check urls
2950 file_url = self.check_urls(url_list)
2951 if file_url is not None:
2952 break # got it!
2953 else:
99b0a129 2954 if req_format not in formats:
e5f30ade 2955 self._downloader.report_error(u'format is not available')
59ae15a5
PH
2956 return
2957
2958 url_list = self.get_urls(formats, req_format)
2959 file_url = self.check_urls(url_list)
2960 format_param = req_format
2961
2962 return [{
2963 'id': file_id.decode('utf-8'),
2964 'url': file_url.decode('utf-8'),
2965 'uploader': uploader.decode('utf-8'),
2966 'upload_date': None,
2967 'title': json_data['name'],
2968 'ext': file_url.split('.')[-1].decode('utf-8'),
2969 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2970 'thumbnail': json_data['thumbnail_url'],
2971 'description': json_data['description'],
2972 'player_url': player_url.decode('utf-8'),
2973 }]
d77c3dfd
FV
2974
2975class StanfordOpenClassroomIE(InfoExtractor):
59ae15a5
PH
2976 """Information extractor for Stanford's Open ClassRoom"""
2977
2978 _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
2979 IE_NAME = u'stanfordoc'
2980
2981 def report_download_webpage(self, objid):
2982 """Report information extraction."""
2983 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, objid))
2984
2985 def report_extraction(self, video_id):
2986 """Report information extraction."""
2987 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
2988
2989 def _real_extract(self, url):
2990 mobj = re.match(self._VALID_URL, url)
2991 if mobj is None:
f0bad2b0 2992 raise ExtractorError(u'Invalid URL: %s' % url)
59ae15a5
PH
2993
2994 if mobj.group('course') and mobj.group('video'): # A specific video
2995 course = mobj.group('course')
2996 video = mobj.group('video')
2997 info = {
2998 'id': course + '_' + video,
2999 'uploader': None,
3000 'upload_date': None,
3001 }
3002
3003 self.report_extraction(info['id'])
3004 baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
3005 xmlUrl = baseUrl + video + '.xml'
3006 try:
3007 metaXml = compat_urllib_request.urlopen(xmlUrl).read()
3008 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 3009 self._downloader.report_error(u'unable to download video info XML: %s' % compat_str(err))
59ae15a5
PH
3010 return
3011 mdoc = xml.etree.ElementTree.fromstring(metaXml)
3012 try:
3013 info['title'] = mdoc.findall('./title')[0].text
3014 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
3015 except IndexError:
c9fa1cba 3016 self._downloader.report_error(u'Invalid metadata XML file')
59ae15a5
PH
3017 return
3018 info['ext'] = info['url'].rpartition('.')[2]
3019 return [info]
3020 elif mobj.group('course'): # A course page
3021 course = mobj.group('course')
3022 info = {
3023 'id': course,
3024 'type': 'playlist',
3025 'uploader': None,
3026 'upload_date': None,
3027 }
3028
f0bad2b0
PH
3029 coursepage = self._download_webpage(url, info['id'],
3030 note='Downloading course info page',
3031 errnote='Unable to download course info page')
59ae15a5
PH
3032
3033 m = re.search('<h1>([^<]+)</h1>', coursepage)
3034 if m:
3035 info['title'] = unescapeHTML(m.group(1))
3036 else:
3037 info['title'] = info['id']
3038
3039 m = re.search('<description>([^<]+)</description>', coursepage)
3040 if m:
3041 info['description'] = unescapeHTML(m.group(1))
3042
3043 links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
3044 info['list'] = [
3045 {
3046 'type': 'reference',
3047 'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
3048 }
3049 for vpage in links]
3050 results = []
3051 for entry in info['list']:
3052 assert entry['type'] == 'reference'
3053 results += self.extract(entry['url'])
3054 return results
59ae15a5
PH
3055 else: # Root page
3056 info = {
3057 'id': 'Stanford OpenClassroom',
3058 'type': 'playlist',
3059 'uploader': None,
3060 'upload_date': None,
3061 }
3062
3063 self.report_download_webpage(info['id'])
3064 rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
3065 try:
3066 rootpage = compat_urllib_request.urlopen(rootURL).read()
3067 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 3068 self._downloader.report_error(u'unable to download course info page: ' + compat_str(err))
59ae15a5
PH
3069 return
3070
3071 info['title'] = info['id']
3072
3073 links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
3074 info['list'] = [
3075 {
3076 'type': 'reference',
3077 'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
3078 }
3079 for cpage in links]
3080
3081 results = []
3082 for entry in info['list']:
3083 assert entry['type'] == 'reference'
3084 results += self.extract(entry['url'])
3085 return results
d77c3dfd
FV
3086
3087class MTVIE(InfoExtractor):
59ae15a5
PH
3088 """Information extractor for MTV.com"""
3089
3090 _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
3091 IE_NAME = u'mtv'
3092
59ae15a5
PH
3093 def report_extraction(self, video_id):
3094 """Report information extraction."""
3095 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3096
3097 def _real_extract(self, url):
3098 mobj = re.match(self._VALID_URL, url)
3099 if mobj is None:
e5f30ade 3100 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
3101 return
3102 if not mobj.group('proto'):
3103 url = 'http://' + url
3104 video_id = mobj.group('videoid')
59ae15a5 3105
5f955171 3106 webpage = self._download_webpage(url, video_id)
59ae15a5
PH
3107
3108 mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
3109 if mobj is None:
e5f30ade 3110 self._downloader.report_error(u'unable to extract song name')
59ae15a5
PH
3111 return
3112 song_name = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
3113 mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage)
3114 if mobj is None:
e5f30ade 3115 self._downloader.report_error(u'unable to extract performer')
59ae15a5
PH
3116 return
3117 performer = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
cdb30764 3118 video_title = performer + ' - ' + song_name
59ae15a5
PH
3119
3120 mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
3121 if mobj is None:
e5f30ade 3122 self._downloader.report_error(u'unable to mtvn_uri')
59ae15a5
PH
3123 return
3124 mtvn_uri = mobj.group(1)
3125
3126 mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage)
3127 if mobj is None:
e5f30ade 3128 self._downloader.report_error(u'unable to extract content id')
59ae15a5
PH
3129 return
3130 content_id = mobj.group(1)
3131
3132 videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
3133 self.report_extraction(video_id)
3134 request = compat_urllib_request.Request(videogen_url)
3135 try:
3136 metadataXml = compat_urllib_request.urlopen(request).read()
3137 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 3138 self._downloader.report_error(u'unable to download video metadata: %s' % compat_str(err))
59ae15a5
PH
3139 return
3140
3141 mdoc = xml.etree.ElementTree.fromstring(metadataXml)
3142 renditions = mdoc.findall('.//rendition')
3143
3144 # For now, always pick the highest quality.
3145 rendition = renditions[-1]
3146
3147 try:
3148 _,_,ext = rendition.attrib['type'].partition('/')
3149 format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
3150 video_url = rendition.find('./src').text
3151 except KeyError:
3152 self._downloader.trouble('Invalid rendition field.')
3153 return
3154
3155 info = {
3156 'id': video_id,
3157 'url': video_url,
3158 'uploader': performer,
3159 'upload_date': None,
3160 'title': video_title,
3161 'ext': ext,
3162 'format': format,
3163 }
3164
3165 return [info]
6de7ef9b 3166
302efc19 3167
302efc19 3168class YoukuIE(InfoExtractor):
59ae15a5 3169 _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
59ae15a5
PH
3170
3171 def report_download_webpage(self, file_id):
3172 """Report webpage download."""
a34dd63b 3173 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, file_id))
59ae15a5
PH
3174
3175 def report_extraction(self, file_id):
3176 """Report information extraction."""
a34dd63b 3177 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
59ae15a5
PH
3178
3179 def _gen_sid(self):
3180 nowTime = int(time.time() * 1000)
3181 random1 = random.randint(1000,1998)
3182 random2 = random.randint(1000,9999)
3183
3184 return "%d%d%d" %(nowTime,random1,random2)
3185
3186 def _get_file_ID_mix_string(self, seed):
3187 mixed = []
3188 source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
3189 seed = float(seed)
3190 for i in range(len(source)):
3191 seed = (seed * 211 + 30031 ) % 65536
3192 index = math.floor(seed / 65536 * len(source) )
3193 mixed.append(source[int(index)])
3194 source.remove(source[int(index)])
3195 #return ''.join(mixed)
3196 return mixed
3197
3198 def _get_file_id(self, fileId, seed):
3199 mixed = self._get_file_ID_mix_string(seed)
3200 ids = fileId.split('*')
3201 realId = []
3202 for ch in ids:
3203 if ch:
3204 realId.append(mixed[int(ch)])
3205 return ''.join(realId)
3206
3207 def _real_extract(self, url):
3208 mobj = re.match(self._VALID_URL, url)
3209 if mobj is None:
e5f30ade 3210 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5
PH
3211 return
3212 video_id = mobj.group('ID')
3213
3214 info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
3215
3216 request = compat_urllib_request.Request(info_url, None, std_headers)
3217 try:
3218 self.report_download_webpage(video_id)
3219 jsondata = compat_urllib_request.urlopen(request).read()
3220 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 3221 self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err))
59ae15a5
PH
3222 return
3223
3224 self.report_extraction(video_id)
3225 try:
8f6f40d9
PH
3226 jsonstr = jsondata.decode('utf-8')
3227 config = json.loads(jsonstr)
59ae15a5
PH
3228
3229 video_title = config['data'][0]['title']
3230 seed = config['data'][0]['seed']
3231
3232 format = self._downloader.params.get('format', None)
1a2c3c0f 3233 supported_format = list(config['data'][0]['streamfileids'].keys())
59ae15a5
PH
3234
3235 if format is None or format == 'best':
3236 if 'hd2' in supported_format:
3237 format = 'hd2'
3238 else:
3239 format = 'flv'
3240 ext = u'flv'
3241 elif format == 'worst':
3242 format = 'mp4'
3243 ext = u'mp4'
3244 else:
3245 format = 'flv'
3246 ext = u'flv'
3247
3248
3249 fileid = config['data'][0]['streamfileids'][format]
e2a8ff24 3250 keys = [s['k'] for s in config['data'][0]['segs'][format]]
8f6f40d9 3251 except (UnicodeDecodeError, ValueError, KeyError):
e5f30ade 3252 self._downloader.report_error(u'unable to extract info section')
59ae15a5
PH
3253 return
3254
3255 files_info=[]
3256 sid = self._gen_sid()
3257 fileid = self._get_file_id(fileid, seed)
3258
3259 #column 8,9 of fileid represent the segment number
3260 #fileid[7:9] should be changed
3261 for index, key in enumerate(keys):
3262
3263 temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
3264 download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
3265
3266 info = {
3267 'id': '%s_part%02d' % (video_id, index),
3268 'url': download_url,
3269 'uploader': None,
3270 'upload_date': None,
3271 'title': video_title,
3272 'ext': ext,
3273 }
3274 files_info.append(info)
3275
3276 return files_info
5dc846fa
FV
3277
3278
6de7ef9b 3279class XNXXIE(InfoExtractor):
59ae15a5
PH
3280 """Information extractor for xnxx.com"""
3281
caec7618 3282 _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
59ae15a5
PH
3283 IE_NAME = u'xnxx'
3284 VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
3285 VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
3286 VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
3287
3288 def report_webpage(self, video_id):
3289 """Report information extraction"""
3290 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3291
3292 def report_extraction(self, video_id):
3293 """Report information extraction"""
3294 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3295
3296 def _real_extract(self, url):
3297 mobj = re.match(self._VALID_URL, url)
3298 if mobj is None:
e5f30ade 3299 self._downloader.report_error(u'invalid URL: %s' % url)
59ae15a5 3300 return
bec102a8 3301 video_id = mobj.group(1)
59ae15a5
PH
3302
3303 self.report_webpage(video_id)
3304
3305 # Get webpage content
3306 try:
bec102a8
PH
3307 webpage_bytes = compat_urllib_request.urlopen(url).read()
3308 webpage = webpage_bytes.decode('utf-8')
59ae15a5 3309 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 3310 self._downloader.report_error(u'unable to download video webpage: %s' % err)
59ae15a5
PH
3311 return
3312
3313 result = re.search(self.VIDEO_URL_RE, webpage)
3314 if result is None:
e5f30ade 3315 self._downloader.report_error(u'unable to extract video url')
59ae15a5 3316 return
bec102a8 3317 video_url = compat_urllib_parse.unquote(result.group(1))
59ae15a5
PH
3318
3319 result = re.search(self.VIDEO_TITLE_RE, webpage)
3320 if result is None:
e5f30ade 3321 self._downloader.report_error(u'unable to extract video title')
59ae15a5 3322 return
bec102a8 3323 video_title = result.group(1)
59ae15a5
PH
3324
3325 result = re.search(self.VIDEO_THUMB_RE, webpage)
3326 if result is None:
e5f30ade 3327 self._downloader.report_error(u'unable to extract video thumbnail')
59ae15a5 3328 return
bec102a8 3329 video_thumbnail = result.group(1)
59ae15a5
PH
3330
3331 return [{
3332 'id': video_id,
3333 'url': video_url,
3334 'uploader': None,
3335 'upload_date': None,
3336 'title': video_title,
3337 'ext': 'flv',
3338 'thumbnail': video_thumbnail,
3339 'description': None,
3340 }]
fd873c69
FV
3341
3342
d443aca8 3343class GooglePlusIE(InfoExtractor):
59ae15a5
PH
3344 """Information extractor for plus.google.com."""
3345
93702113 3346 _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
59ae15a5
PH
3347 IE_NAME = u'plus.google'
3348
3349 def __init__(self, downloader=None):
3350 InfoExtractor.__init__(self, downloader)
3351
3352 def report_extract_entry(self, url):
3353 """Report downloading extry"""
93702113 3354 self._downloader.to_screen(u'[plus.google] Downloading entry: %s' % url)
59ae15a5
PH
3355
3356 def report_date(self, upload_date):
3357 """Report downloading extry"""
3358 self._downloader.to_screen(u'[plus.google] Entry date: %s' % upload_date)
3359
3360 def report_uploader(self, uploader):
3361 """Report downloading extry"""
93702113 3362 self._downloader.to_screen(u'[plus.google] Uploader: %s' % uploader)
59ae15a5
PH
3363
3364 def report_title(self, video_title):
3365 """Report downloading extry"""
93702113 3366 self._downloader.to_screen(u'[plus.google] Title: %s' % video_title)
59ae15a5
PH
3367
3368 def report_extract_vid_page(self, video_page):
3369 """Report information extraction."""
93702113 3370 self._downloader.to_screen(u'[plus.google] Extracting video page: %s' % video_page)
59ae15a5
PH
3371
3372 def _real_extract(self, url):
3373 # Extract id from URL
3374 mobj = re.match(self._VALID_URL, url)
3375 if mobj is None:
e5f30ade 3376 self._downloader.report_error(u'Invalid URL: %s' % url)
59ae15a5
PH
3377 return
3378
3379 post_url = mobj.group(0)
93702113 3380 video_id = mobj.group(1)
59ae15a5
PH
3381
3382 video_extension = 'flv'
3383
3384 # Step 1, Retrieve post webpage to extract further information
3385 self.report_extract_entry(post_url)
3386 request = compat_urllib_request.Request(post_url)
3387 try:
93702113 3388 webpage = compat_urllib_request.urlopen(request).read().decode('utf-8')
59ae15a5 3389 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 3390 self._downloader.report_error(u'Unable to retrieve entry webpage: %s' % compat_str(err))
59ae15a5
PH
3391 return
3392
3393 # Extract update date
3394 upload_date = None
3395 pattern = 'title="Timestamp">(.*?)</a>'
3396 mobj = re.search(pattern, webpage)
3397 if mobj:
3398 upload_date = mobj.group(1)
3399 # Convert timestring to a format suitable for filename
3400 upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
3401 upload_date = upload_date.strftime('%Y%m%d')
3402 self.report_date(upload_date)
3403
3404 # Extract uploader
3405 uploader = None
3406 pattern = r'rel\="author".*?>(.*?)</a>'
3407 mobj = re.search(pattern, webpage)
3408 if mobj:
3409 uploader = mobj.group(1)
3410 self.report_uploader(uploader)
3411
3412 # Extract title
3413 # Get the first line for title
3414 video_title = u'NA'
3415 pattern = r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]'
3416 mobj = re.search(pattern, webpage)
3417 if mobj:
3418 video_title = mobj.group(1)
3419 self.report_title(video_title)
3420
3421 # Step 2, Stimulate clicking the image box to launch video
3422 pattern = '"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]'
3423 mobj = re.search(pattern, webpage)
3424 if mobj is None:
e5f30ade 3425 self._downloader.report_error(u'unable to extract video page URL')
59ae15a5
PH
3426
3427 video_page = mobj.group(1)
3428 request = compat_urllib_request.Request(video_page)
3429 try:
93702113 3430 webpage = compat_urllib_request.urlopen(request).read().decode('utf-8')
59ae15a5 3431 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 3432 self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err))
59ae15a5
PH
3433 return
3434 self.report_extract_vid_page(video_page)
3435
3436
3437 # Extract video links on video page
3438 """Extract video links of all sizes"""
3439 pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
3440 mobj = re.findall(pattern, webpage)
3441 if len(mobj) == 0:
e5f30ade 3442 self._downloader.report_error(u'unable to extract video links')
59ae15a5
PH
3443
3444 # Sort in resolution
3445 links = sorted(mobj)
3446
3447 # Choose the lowest of the sort, i.e. highest resolution
3448 video_url = links[-1]
3449 # Only get the url. The resolution part in the tuple has no use anymore
3450 video_url = video_url[-1]
3451 # Treat escaped \u0026 style hex
93702113
FV
3452 try:
3453 video_url = video_url.decode("unicode_escape")
3454 except AttributeError: # Python 3
3455 video_url = bytes(video_url, 'ascii').decode('unicode-escape')
59ae15a5
PH
3456
3457
3458 return [{
93702113 3459 'id': video_id,
59ae15a5 3460 'url': video_url,
93702113
FV
3461 'uploader': uploader,
3462 'upload_date': upload_date,
3463 'title': video_title,
3464 'ext': video_extension,
59ae15a5 3465 }]
4cc3d074
PH
3466
3467class NBAIE(InfoExtractor):
3468 _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*)(\?.*)?$'
3469 IE_NAME = u'nba'
3470
4cc3d074
PH
3471 def _real_extract(self, url):
3472 mobj = re.match(self._VALID_URL, url)
3473 if mobj is None:
e5f30ade 3474 self._downloader.report_error(u'invalid URL: %s' % url)
4cc3d074
PH
3475 return
3476
3477 video_id = mobj.group(1)
3478 if video_id.endswith('/index.html'):
3479 video_id = video_id[:-len('/index.html')]
3480
5f955171 3481 webpage = self._download_webpage(url, video_id)
4cc3d074
PH
3482
3483 video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
3484 def _findProp(rexp, default=None):
3485 m = re.search(rexp, webpage)
3486 if m:
3487 return unescapeHTML(m.group(1))
3488 else:
3489 return default
3490
3491 shortened_video_id = video_id.rpartition('/')[2]
3492 title = _findProp(r'<meta property="og:title" content="(.*?)"', shortened_video_id).replace('NBA.com: ', '')
3493 info = {
3494 'id': shortened_video_id,
3495 'url': video_url,
3496 'ext': 'mp4',
3497 'title': title,
3498 'uploader_date': _findProp(r'<b>Date:</b> (.*?)</div>'),
3499 'description': _findProp(r'<div class="description">(.*?)</h1>'),
3500 }
3501 return [info]
0b40544f
DV
3502
3503class JustinTVIE(InfoExtractor):
3504 """Information extractor for justin.tv and twitch.tv"""
2ab1c5ed
DV
3505 # TODO: One broadcast may be split into multiple videos. The key
3506 # 'broadcast_id' is the same for all parts, and 'broadcast_part'
3507 # starts at 1 and increases. Can we treat all parts as one video?
3508
4096b609
DV
3509 _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
3510 ([^/]+)(?:/b/([^/]+))?/?(?:\#.*)?$"""
3511 _JUSTIN_PAGE_LIMIT = 100
0b40544f
DV
3512 IE_NAME = u'justin.tv'
3513
3514 def report_extraction(self, file_id):
3515 """Report information extraction."""
3516 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
3517
4096b609
DV
3518 def report_download_page(self, channel, offset):
3519 """Report attempt to download a single page of videos."""
3520 self._downloader.to_screen(u'[%s] %s: Downloading video information from %d to %d' %
3521 (self.IE_NAME, channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
3522
2ab1c5ed
DV
3523 # Return count of items, list of *valid* items
3524 def _parse_page(self, url):
0b40544f 3525 try:
2ab1c5ed 3526 urlh = compat_urllib_request.urlopen(url)
0b40544f
DV
3527 webpage_bytes = urlh.read()
3528 webpage = webpage_bytes.decode('utf-8', 'ignore')
3529 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
e5f30ade 3530 self._downloader.report_error(u'unable to download video info JSON: %s' % compat_str(err))
0b40544f 3531 return
cdb30764 3532
0b40544f 3533 response = json.loads(webpage)
fa1bf9c6 3534 if type(response) != list:
3535 error_text = response.get('error', 'unknown error')
e5f30ade 3536 self._downloader.report_error(u'Justin.tv API: %s' % error_text)
fa1bf9c6 3537 return
0b40544f
DV
3538 info = []
3539 for clip in response:
3540 video_url = clip['video_file_url']
3541 if video_url:
3542 video_extension = os.path.splitext(video_url)[1][1:]
fa1bf9c6 3543 video_date = re.sub('-', '', clip['start_time'][:10])
3544 video_uploader_id = clip.get('user_id', clip.get('channel_id'))
97f194c1
PH
3545 video_id = clip['id']
3546 video_title = clip.get('title', video_id)
0b40544f 3547 info.append({
97f194c1 3548 'id': video_id,
0b40544f 3549 'url': video_url,
97f194c1 3550 'title': video_title,
fa1bf9c6 3551 'uploader': clip.get('channel_name', video_uploader_id),
3552 'uploader_id': video_uploader_id,
0b40544f
DV
3553 'upload_date': video_date,
3554 'ext': video_extension,
3555 })
2ab1c5ed
DV
3556 return (len(response), info)
3557
3558 def _real_extract(self, url):
3559 mobj = re.match(self._VALID_URL, url)
3560 if mobj is None:
e5f30ade 3561 self._downloader.report_error(u'invalid URL: %s' % url)
2ab1c5ed 3562 return
cdb30764 3563
2ab1c5ed
DV
3564 api = 'http://api.justin.tv'
3565 video_id = mobj.group(mobj.lastindex)
3566 paged = False
3567 if mobj.lastindex == 1:
3568 paged = True
3569 api += '/channel/archives/%s.json'
3570 else:
fa1bf9c6 3571 api += '/broadcast/by_archive/%s.json'
2ab1c5ed 3572 api = api % (video_id,)
cdb30764 3573
2ab1c5ed 3574 self.report_extraction(video_id)
cdb30764 3575
2ab1c5ed
DV
3576 info = []
3577 offset = 0
4096b609
DV
3578 limit = self._JUSTIN_PAGE_LIMIT
3579 while True:
3580 if paged:
3581 self.report_download_page(video_id, offset)
2ab1c5ed
DV
3582 page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
3583 page_count, page_info = self._parse_page(page_url)
3584 info.extend(page_info)
3585 if not paged or page_count != limit:
3586 break
3587 offset += limit
0b40544f 3588 return info
21a9c6aa
PH
3589
3590class FunnyOrDieIE(InfoExtractor):
3591 _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
21a9c6aa 3592
21a9c6aa
PH
3593 def _real_extract(self, url):
3594 mobj = re.match(self._VALID_URL, url)
3595 if mobj is None:
e5f30ade 3596 self._downloader.report_error(u'invalid URL: %s' % url)
21a9c6aa
PH
3597 return
3598
3599 video_id = mobj.group('id')
5f955171 3600 webpage = self._download_webpage(url, video_id)
21a9c6aa
PH
3601
3602 m = re.search(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', webpage, re.DOTALL)
3603 if not m:
e5f30ade 3604 self._downloader.report_error(u'unable to find video information')
21a9c6aa 3605 video_url = unescapeHTML(m.group('url'))
21a9c6aa
PH
3606
3607 m = re.search(r"class='player_page_h1'>\s+<a.*?>(?P<title>.*?)</a>", webpage)
3608 if not m:
3609 self._downloader.trouble(u'Cannot find video title')
3610 title = unescapeHTML(m.group('title'))
3611
3612 m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
3613 if m:
3614 desc = unescapeHTML(m.group('desc'))
3615 else:
3616 desc = None
3617
3618 info = {
3619 'id': video_id,
3620 'url': video_url,
3621 'ext': 'mp4',
3622 'title': title,
3623 'description': desc,
3624 }
3625 return [info]
d0d4f277 3626
e314ba67 3627class SteamIE(InfoExtractor):
6324fd1d 3628 _VALID_URL = r"""http://store.steampowered.com/
e314ba67
JMF
3629 (?P<urltype>video|app)/ #If the page is only for videos or for a game
3630 (?P<gameID>\d+)/?
3631 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
3632 """
4aeae91f 3633
89de9eb1
FV
3634 @classmethod
3635 def suitable(cls, url):
e314ba67 3636 """Receives a URL and returns True if suitable for this IE."""
89de9eb1 3637 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
5f955171 3638
e314ba67
JMF
3639 def _real_extract(self, url):
3640 m = re.match(self._VALID_URL, url, re.VERBOSE)
3641 urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
3642 gameID = m.group('gameID')
3643 videourl = 'http://store.steampowered.com/video/%s/' % gameID
5f955171 3644 webpage = self._download_webpage(videourl, gameID)
e314ba67 3645 mweb = re.finditer(urlRE, webpage)
5e9d042d
JMF
3646 namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
3647 titles = re.finditer(namesRE, webpage)
60bd48b1
JMF
3648 thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
3649 thumbs = re.finditer(thumbsRE, webpage)
e314ba67 3650 videos = []
60bd48b1 3651 for vid,vtitle,thumb in zip(mweb,titles,thumbs):
e314ba67 3652 video_id = vid.group('videoID')
5f955171
PH
3653 title = vtitle.group('videoName')
3654 video_url = vid.group('videoURL')
60bd48b1 3655 video_thumb = thumb.group('thumbnail')
e314ba67 3656 if not video_url:
e5f30ade 3657 self._downloader.report_error(u'Cannot find video url for %s' % video_id)
e314ba67
JMF
3658 info = {
3659 'id':video_id,
3660 'url':video_url,
3661 'ext': 'flv',
60bd48b1
JMF
3662 'title': unescapeHTML(title),
3663 'thumbnail': video_thumb
e314ba67
JMF
3664 }
3665 videos.append(info)
3666 return videos
ef0c8d5f 3667
278986ea 3668class UstreamIE(InfoExtractor):
ef0c8d5f 3669 _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
278986ea 3670 IE_NAME = u'ustream'
ef0c8d5f 3671
278986ea
JMF
3672 def _real_extract(self, url):
3673 m = re.match(self._VALID_URL, url)
3674 video_id = m.group('videoID')
3675 video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
d830b7c2 3676 webpage = self._download_webpage(url, video_id)
278986ea
JMF
3677 m = re.search(r'data-title="(?P<title>.+)"',webpage)
3678 title = m.group('title')
3679 m = re.search(r'<a class="state" data-content-type="channel" data-content-id="(?P<uploader>\d+)"',webpage)
3680 uploader = m.group('uploader')
3681 info = {
3682 'id':video_id,
3683 'url':video_url,
3684 'ext': 'flv',
3685 'title': title,
3686 'uploader': uploader
3687 }
3688 return [info]
4aeae91f 3689
ca0a0bbe
PH
3690class RBMARadioIE(InfoExtractor):
3691 _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
3692
3693 def _real_extract(self, url):
3694 m = re.match(self._VALID_URL, url)
3695 video_id = m.group('videoID')
3696
3697 webpage = self._download_webpage(url, video_id)
3698 m = re.search(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>', webpage)
3699 if not m:
3700 raise ExtractorError(u'Cannot find metadata')
3701 json_data = m.group(1)
3702
3703 try:
3704 data = json.loads(json_data)
3705 except ValueError as e:
3706 raise ExtractorError(u'Invalid JSON: ' + str(e))
3707
3708 video_url = data['akamai_url'] + '&cbr=256'
3709 url_parts = compat_urllib_parse_urlparse(video_url)
3710 video_ext = url_parts.path.rpartition('.')[2]
3711 info = {
3712 'id': video_id,
3713 'url': video_url,
3714 'ext': video_ext,
3715 'title': data['title'],
3716 'description': data.get('teaser_text'),
3717 'location': data.get('country_of_origin'),
3718 'uploader': data.get('host', {}).get('name'),
3719 'uploader_id': data.get('host', {}).get('slug'),
187f491a 3720 'thumbnail': data.get('image', {}).get('large_url_2x'),
ca0a0bbe
PH
3721 'duration': data.get('duration'),
3722 }
3723 return [info]
4aeae91f 3724
991ba7fa
JC
3725
3726class YouPornIE(InfoExtractor):
3727 """Information extractor for youporn.com."""
991ba7fa 3728 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
6324fd1d 3729
991ba7fa
JC
3730 def _print_formats(self, formats):
3731 """Print all available formats"""
565f7519 3732 print(u'Available formats:')
ca6710ee
JC
3733 print(u'ext\t\tformat')
3734 print(u'---------------------------------')
991ba7fa 3735 for format in formats:
ca6710ee 3736 print(u'%s\t\t%s' % (format['ext'], format['format']))
991ba7fa
JC
3737
3738 def _specific(self, req_format, formats):
3739 for x in formats:
3740 if(x["format"]==req_format):
3741 return x
3742 return None
3743
991ba7fa
JC
3744 def _real_extract(self, url):
3745 mobj = re.match(self._VALID_URL, url)
3746 if mobj is None:
e5f30ade 3747 self._downloader.report_error(u'invalid URL: %s' % url)
991ba7fa
JC
3748 return
3749
ca6710ee 3750 video_id = mobj.group('videoid')
991ba7fa 3751
629fcdd1
PH
3752 req = compat_urllib_request.Request(url)
3753 req.add_header('Cookie', 'age_verified=1')
3754 webpage = self._download_webpage(req, video_id)
991ba7fa
JC
3755
3756 # Get the video title
e711babb 3757 result = re.search(r'<h1.*?>(?P<title>.*)</h1>', webpage)
991ba7fa 3758 if result is None:
e711babb 3759 raise ExtractorError(u'Unable to extract video title')
ca6710ee 3760 video_title = result.group('title').strip()
991ba7fa
JC
3761
3762 # Get the video date
e711babb 3763 result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
991ba7fa 3764 if result is None:
2e5457be 3765 self._downloader.report_warning(u'unable to extract video date')
629fcdd1
PH
3766 upload_date = None
3767 else:
3768 upload_date = result.group('date').strip()
991ba7fa
JC
3769
3770 # Get the video uploader
e711babb 3771 result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
991ba7fa 3772 if result is None:
2e5457be 3773 self._downloader.report_warning(u'unable to extract uploader')
629fcdd1
PH
3774 video_uploader = None
3775 else:
3776 video_uploader = result.group('uploader').strip()
3777 video_uploader = clean_html( video_uploader )
991ba7fa
JC
3778
3779 # Get all of the formats available
ca6710ee
JC
3780 DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
3781 result = re.search(DOWNLOAD_LIST_RE, webpage)
991ba7fa 3782 if result is None:
629fcdd1 3783 raise ExtractorError(u'Unable to extract download list')
ca6710ee 3784 download_list_html = result.group('download_list').strip()
991ba7fa
JC
3785
3786 # Get all of the links from the page
ca6710ee
JC
3787 LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
3788 links = re.findall(LINK_RE, download_list_html)
991ba7fa 3789 if(len(links) == 0):
629fcdd1 3790 raise ExtractorError(u'ERROR: no known formats available for video')
6324fd1d
FV
3791
3792 self._downloader.to_screen(u'[youporn] Links found: %d' % len(links))
991ba7fa
JC
3793
3794 formats = []
3795 for link in links:
3796
3797 # A link looks like this:
3798 # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
3799 # A path looks like this:
3800 # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
ca6710ee
JC
3801 video_url = unescapeHTML( link )
3802 path = compat_urllib_parse_urlparse( video_url ).path
991ba7fa
JC
3803 extension = os.path.splitext( path )[1][1:]
3804 format = path.split('/')[4].split('_')[:2]
3805 size = format[0]
3806 bitrate = format[1]
3807 format = "-".join( format )
3808 title = u'%s-%s-%s' % (video_title, size, bitrate)
3809
3810 formats.append({
3811 'id': video_id,
3812 'url': video_url,
3813 'uploader': video_uploader,
3814 'upload_date': upload_date,
3815 'title': title,
3816 'ext': extension,
3817 'format': format,
3818 'thumbnail': None,
3819 'description': None,
3820 'player_url': None
3821 })
3822
3823 if self._downloader.params.get('listformats', None):
3824 self._print_formats(formats)
3825 return
3826
3827 req_format = self._downloader.params.get('format', None)
991ba7fa
JC
3828 self._downloader.to_screen(u'[youporn] Format: %s' % req_format)
3829
991ba7fa
JC
3830 if req_format is None or req_format == 'best':
3831 return [formats[0]]
3832 elif req_format == 'worst':
3833 return [formats[-1]]
3834 elif req_format in ('-1', 'all'):
3835 return formats
3836 else:
3837 format = self._specific( req_format, formats )
3838 if result is None:
e5f30ade 3839 self._downloader.report_error(u'requested format not available')
991ba7fa
JC
3840 return
3841 return [format]
3842
6324fd1d 3843
991ba7fa
JC
3844
3845class PornotubeIE(InfoExtractor):
3846 """Information extractor for pornotube.com."""
991ba7fa 3847 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
991ba7fa 3848
991ba7fa
JC
3849 def _real_extract(self, url):
3850 mobj = re.match(self._VALID_URL, url)
3851 if mobj is None:
e5f30ade 3852 self._downloader.report_error(u'invalid URL: %s' % url)
991ba7fa
JC
3853 return
3854
ca6710ee
JC
3855 video_id = mobj.group('videoid')
3856 video_title = mobj.group('title')
991ba7fa
JC
3857
3858 # Get webpage content
ca6710ee 3859 webpage = self._download_webpage(url, video_id)
991ba7fa
JC
3860
3861 # Get the video URL
ca6710ee
JC
3862 VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
3863 result = re.search(VIDEO_URL_RE, webpage)
991ba7fa 3864 if result is None:
e5f30ade 3865 self._downloader.report_error(u'unable to extract video url')
991ba7fa 3866 return
ca6710ee 3867 video_url = compat_urllib_parse.unquote(result.group('url'))
991ba7fa
JC
3868
3869 #Get the uploaded date
ca6710ee
JC
3870 VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
3871 result = re.search(VIDEO_UPLOADED_RE, webpage)
991ba7fa 3872 if result is None:
e5f30ade 3873 self._downloader.report_error(u'unable to extract video title')
991ba7fa 3874 return
ca6710ee 3875 upload_date = result.group('date')
991ba7fa
JC
3876
3877 info = {'id': video_id,
3878 'url': video_url,
3879 'uploader': None,
3880 'upload_date': upload_date,
3881 'title': video_title,
3882 'ext': 'flv',
565f7519 3883 'format': 'flv'}
991ba7fa
JC
3884
3885 return [info]
3886
991ba7fa
JC
3887class YouJizzIE(InfoExtractor):
3888 """Information extractor for youjizz.com."""
ca6710ee 3889 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
991ba7fa 3890
991ba7fa 3891 def _real_extract(self, url):
ca6710ee
JC
3892 mobj = re.match(self._VALID_URL, url)
3893 if mobj is None:
e5f30ade 3894 self._downloader.report_error(u'invalid URL: %s' % url)
991ba7fa 3895 return
ca6710ee
JC
3896
3897 video_id = mobj.group('videoid')
3898
3899 # Get webpage content
3900 webpage = self._download_webpage(url, video_id)
991ba7fa
JC
3901
3902 # Get the video title
db16276b 3903 result = re.search(r'<title>(?P<title>.*)</title>', webpage)
991ba7fa 3904 if result is None:
db16276b 3905 raise ExtractorError(u'ERROR: unable to extract video title')
ca6710ee 3906 video_title = result.group('title').strip()
991ba7fa
JC
3907
3908 # Get the embed page
db16276b 3909 result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
991ba7fa 3910 if result is None:
db16276b 3911 raise ExtractorError(u'ERROR: unable to extract embed page')
991ba7fa 3912
ca6710ee
JC
3913 embed_page_url = result.group(0).strip()
3914 video_id = result.group('videoid')
6324fd1d 3915
ca6710ee
JC
3916 webpage = self._download_webpage(embed_page_url, video_id)
3917
991ba7fa 3918 # Get the video URL
db16276b 3919 result = re.search(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', webpage)
991ba7fa 3920 if result is None:
db16276b 3921 raise ExtractorError(u'ERROR: unable to extract video url')
ca6710ee 3922 video_url = result.group('source')
991ba7fa
JC
3923
3924 info = {'id': video_id,
3925 'url': video_url,
991ba7fa
JC
3926 'title': video_title,
3927 'ext': 'flv',
3928 'format': 'flv',
991ba7fa
JC
3929 'player_url': embed_page_url}
3930
3931 return [info]
3932
ccf65f9d
PH
3933class EightTracksIE(InfoExtractor):
3934 IE_NAME = '8tracks'
25580f32 3935 _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
ccf65f9d
PH
3936
3937 def _real_extract(self, url):
3938 mobj = re.match(self._VALID_URL, url)
3939 if mobj is None:
3940 raise ExtractorError(u'Invalid URL: %s' % url)
3941 playlist_id = mobj.group('id')
3942
3943 webpage = self._download_webpage(url, playlist_id)
3944
2a9983b7 3945 m = re.search(r"PAGE.mix = (.*?);\n", webpage, flags=re.DOTALL)
ccf65f9d
PH
3946 if not m:
3947 raise ExtractorError(u'Cannot find trax information')
3948 json_like = m.group(1)
3949 data = json.loads(json_like)
3950
3951 session = str(random.randint(0, 1000000000))
3952 mix_id = data['id']
3953 track_count = data['tracks_count']
3954 first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
3955 next_url = first_url
3956 res = []
3957 for i in itertools.count():
3958 api_json = self._download_webpage(next_url, playlist_id,
3959 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
3960 errnote=u'Failed to download song information')
3961 api_data = json.loads(api_json)
3962 track_data = api_data[u'set']['track']
3963 info = {
3964 'id': track_data['id'],
3965 'url': track_data['track_file_stream_url'],
da4de959
PH
3966 'title': track_data['performer'] + u' - ' + track_data['name'],
3967 'raw_title': track_data['name'],
3968 'uploader_id': data['user']['login'],
ccf65f9d
PH
3969 'ext': 'm4a',
3970 }
3971 res.append(info)
3972 if api_data['set']['at_last_track']:
3973 break
3974 next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
3975 return res
991ba7fa 3976
da06e2da
OK
3977class KeekIE(InfoExtractor):
3978 _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
3979 IE_NAME = u'keek'
3980
3981 def _real_extract(self, url):
3982 m = re.match(self._VALID_URL, url)
3983 video_id = m.group('videoID')
3984 video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
3985 thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
3986 webpage = self._download_webpage(url, video_id)
3987 m = re.search(r'<meta property="og:title" content="(?P<title>.+)"', webpage)
f0877a44 3988 title = unescapeHTML(m.group('title'))
f10b2a9c
FV
3989 m = re.search(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', webpage)
3990 uploader = clean_html(m.group('uploader'))
da06e2da 3991 info = {
f10b2a9c
FV
3992 'id': video_id,
3993 'url': video_url,
da06e2da
OK
3994 'ext': 'mp4',
3995 'title': title,
3996 'thumbnail': thumbnail,
3997 'uploader': uploader
f0877a44 3998 }
da06e2da
OK
3999 return [info]
4000
3a468f2d 4001class TEDIE(InfoExtractor):
414638cd
JMF
4002 _VALID_URL=r'''http://www.ted.com/
4003 (
4004 ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
4005 |
4006 ((?P<type_talk>talks)) # We have a simple talk
4007 )
4008 /(?P<name>\w+) # Here goes the name and then ".html"
4009 '''
4010
89de9eb1
FV
4011 @classmethod
4012 def suitable(cls, url):
414638cd 4013 """Receives a URL and returns True if suitable for this IE."""
89de9eb1 4014 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
414638cd 4015
3a468f2d 4016 def _real_extract(self, url):
414638cd
JMF
4017 m=re.match(self._VALID_URL, url, re.VERBOSE)
4018 if m.group('type_talk'):
4019 return [self._talk_info(url)]
4020 else :
4021 playlist_id=m.group('playlist_id')
4022 name=m.group('name')
4023 self._downloader.to_screen(u'[%s] Getting info of playlist %s: "%s"' % (self.IE_NAME,playlist_id,name))
4024 return self._playlist_videos_info(url,name,playlist_id)
4025
4026 def _talk_video_link(self,mediaSlug):
4027 '''Returns the video link for that mediaSlug'''
4028 return 'http://download.ted.com/talks/%s.mp4' % mediaSlug
4029
4030 def _playlist_videos_info(self,url,name,playlist_id=0):
4031 '''Returns the videos of the playlist'''
4032 video_RE=r'''
4033 <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
4034 ([.\s]*?)data-playlist_item_id="(\d+)"
4035 ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
4036 '''
c85538db 4037 video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
414638cd
JMF
4038 webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
4039 m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
4040 m_names=re.finditer(video_name_RE,webpage)
4041 info=[]
4042 for m_video, m_name in zip(m_videos,m_names):
c85538db
JMF
4043 video_id=m_video.group('video_id')
4044 talk_url='http://www.ted.com%s' % m_name.group('talk_url')
4045 info.append(self._talk_info(talk_url,video_id))
414638cd 4046 return info
c85538db 4047
414638cd
JMF
4048 def _talk_info(self, url, video_id=0):
4049 """Return the video for the talk in the url"""
4050 m=re.match(self._VALID_URL, url,re.VERBOSE)
4051 videoName=m.group('name')
4052 webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName)
4053 # If the url includes the language we get the title translated
c85538db 4054 title_RE=r'<h1><span id="altHeadline" >(?P<title>.*)</span></h1>'
3a468f2d
JMF
4055 title=re.search(title_RE, webpage).group('title')
4056 info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
4057 "id":(?P<videoID>[\d]+).*?
4058 "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
c85538db
JMF
4059 thumb_RE=r'</span>[\s.]*</div>[\s.]*<img src="(?P<thumbnail>.*?)"'
4060 thumb_match=re.search(thumb_RE,webpage)
3a468f2d
JMF
4061 info_match=re.search(info_RE,webpage,re.VERBOSE)
4062 video_id=info_match.group('videoID')
4063 mediaSlug=info_match.group('mediaSlug')
414638cd 4064 video_url=self._talk_video_link(mediaSlug)
3a468f2d 4065 info = {
414638cd
JMF
4066 'id': video_id,
4067 'url': video_url,
3a468f2d 4068 'ext': 'mp4',
c85538db
JMF
4069 'title': title,
4070 'thumbnail': thumb_match.group('thumbnail')
414638cd
JMF
4071 }
4072 return info
da06e2da 4073
58994225 4074class MySpassIE(InfoExtractor):
1ad5d872 4075 _VALID_URL = r'http://www.myspass.de/.*'
6324fd1d 4076
1ad5d872 4077 def _real_extract(self, url):
4078 META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
58994225 4079
1ad5d872 4080 # video id is the last path element of the URL
4081 # usually there is a trailing slash, so also try the second but last
4082 url_path = compat_urllib_parse_urlparse(url).path
4083 url_parent_path, video_id = os.path.split(url_path)
4084 if not video_id:
4085 _, video_id = os.path.split(url_parent_path)
6324fd1d 4086
1ad5d872 4087 # get metadata
4088 metadata_url = META_DATA_URL_TEMPLATE % video_id
4089 metadata_text = self._download_webpage(metadata_url, video_id)
4090 metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
6324fd1d 4091
1ad5d872 4092 # extract values from metadata
4093 url_flv_el = metadata.find('url_flv')
4094 if url_flv_el is None:
e5f30ade 4095 self._downloader.report_error(u'unable to extract download url')
1ad5d872 4096 return
4097 video_url = url_flv_el.text
4098 extension = os.path.splitext(video_url)[1][1:]
4099 title_el = metadata.find('title')
4100 if title_el is None:
e5f30ade 4101 self._downloader.report_error(u'unable to extract title')
1ad5d872 4102 return
4103 title = title_el.text
4104 format_id_el = metadata.find('format_id')
4105 if format_id_el is None:
4106 format = ext
4107 else:
4108 format = format_id_el.text
4109 description_el = metadata.find('description')
4110 if description_el is not None:
4111 description = description_el.text
4112 else:
4113 description = None
4114 imagePreview_el = metadata.find('imagePreview')
4115 if imagePreview_el is not None:
4116 thumbnail = imagePreview_el.text
4117 else:
4118 thumbnail = None
4119 info = {
4120 'id': video_id,
4121 'url': video_url,
4122 'title': title,
4123 'ext': extension,
4124 'format': format,
4125 'thumbnail': thumbnail,
4126 'description': description
4127 }
4128 return [info]
4129
e32b06e9 4130class SpiegelIE(InfoExtractor):
c3971870 4131 _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)$'
e32b06e9
PH
4132
4133 def _real_extract(self, url):
4134 m = re.match(self._VALID_URL, url)
4135 video_id = m.group('videoID')
4136
4137 webpage = self._download_webpage(url, video_id)
4138 m = re.search(r'<div class="spVideoTitle">(.*?)</div>', webpage)
4139 if not m:
4140 raise ExtractorError(u'Cannot find title')
4141 video_title = unescapeHTML(m.group(1))
4142
4143 xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
4144 xml_code = self._download_webpage(xml_url, video_id,
4145 note=u'Downloading XML', errnote=u'Failed to download XML')
4146
4147 idoc = xml.etree.ElementTree.fromstring(xml_code)
4148 last_type = idoc[-1]
4149 filename = last_type.findall('./filename')[0].text
4150 duration = float(last_type.findall('./duration')[0].text)
4151
4152 video_url = 'http://video2.spiegel.de/flash/' + filename
4153 video_ext = filename.rpartition('.')[2]
4154 info = {
4155 'id': video_id,
4156 'url': video_url,
4157 'ext': video_ext,
4158 'title': video_title,
4159 'duration': duration,
4160 }
4161 return [info]
4162
4163
4aeae91f
PH
4164def gen_extractors():
4165 """ Return a list of an instance of every supported extractor.
4166 The order does matter; the first extractor matched is the one handling the URL.
4167 """
4168 return [
4169 YoutubePlaylistIE(),
4170 YoutubeChannelIE(),
4171 YoutubeUserIE(),
4172 YoutubeSearchIE(),
4173 YoutubeIE(),
4174 MetacafeIE(),
4175 DailymotionIE(),
4176 GoogleSearchIE(),
4177 PhotobucketIE(),
4178 YahooIE(),
4179 YahooSearchIE(),
4180 DepositFilesIE(),
4181 FacebookIE(),
4182 BlipTVUserIE(),
4183 BlipTVIE(),
4184 VimeoIE(),
4185 MyVideoIE(),
4186 ComedyCentralIE(),
4187 EscapistIE(),
4188 CollegeHumorIE(),
4189 XVideosIE(),
4190 SoundcloudIE(),
4191 InfoQIE(),
4192 MixcloudIE(),
4193 StanfordOpenClassroomIE(),
4194 MTVIE(),
4195 YoukuIE(),
4196 XNXXIE(),
18be482a
JC
4197 YouJizzIE(),
4198 PornotubeIE(),
4199 YouPornIE(),
4aeae91f
PH
4200 GooglePlusIE(),
4201 ArteTvIE(),
4202 NBAIE(),
4203 JustinTVIE(),
4204 FunnyOrDieIE(),
4aeae91f
PH
4205 SteamIE(),
4206 UstreamIE(),
ca0a0bbe 4207 RBMARadioIE(),
ccf65f9d 4208 EightTracksIE(),
da06e2da 4209 KeekIE(),
3a468f2d 4210 TEDIE(),
58994225 4211 MySpassIE(),
e32b06e9 4212 SpiegelIE(),
4aeae91f
PH
4213 GenericIE()
4214 ]
4215
4216