]> jfr.im git - yt-dlp.git/blame - youtube-dl
Fix stty detection
[yt-dlp.git] / youtube-dl
CommitLineData
4fa74b52
RG
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
2770590d
GV
3
4__author__ = (
2c8d32de
PH
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
2770590d
GV
12 )
13
2c8d32de
PH
14__license__ = 'Public Domain'
15__version__ = '2011.08.24-phihag'
2770590d 16
80066952 17import cookielib
a1f03c7b 18import datetime
1987c232 19import gzip
4fa74b52
RG
20import htmlentitydefs
21import httplib
2546e767 22import locale
4fa74b52
RG
23import math
24import netrc
25import os
26import os.path
27import re
28import socket
29import string
0487b407 30import subprocess
4fa74b52
RG
31import sys
32import time
33import urllib
34import urllib2
c6b55a8d 35import warnings
1987c232 36import zlib
a04e80a4 37
0a3c8b62
PH
38if os.name == 'nt':
39 import ctypes
40
41try:
42 import email.utils
43except ImportError: # Python 2.4
44 import email.Utils
c6b55a8d
PH
45try:
46 import cStringIO as StringIO
47except ImportError:
48 import StringIO
49
a04e80a4
RG
50# parse_qs was moved from the cgi module to the urlparse module recently.
51try:
52 from urlparse import parse_qs
53except ImportError:
54 from cgi import parse_qs
4fa74b52 55
c6b55a8d
PH
56try:
57 import lxml.etree
2b70537d 58except ImportError:
c6b55a8d
PH
59 pass # Handled below
60
f995f712 61std_headers = {
c44b9ee9 62 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
4fa74b52 63 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
96942e62 64 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
a57ed21f 65 'Accept-Encoding': 'gzip, deflate',
4fa74b52
RG
66 'Accept-Language': 'en-us,en;q=0.5',
67}
68
69simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
70
437d76c1
PH
71try:
72 import json
91e6a385 73except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
437d76c1
PH
74 import re
75 class json(object):
76 @staticmethod
77 def loads(s):
78 s = s.decode('UTF-8')
79 def raiseError(msg, i):
80 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
81 def skipSpace(i, expectMore=True):
82 while i < len(s) and s[i] in ' \t\r\n':
83 i += 1
84 if expectMore:
85 if i >= len(s):
86 raiseError('Premature end', i)
87 return i
88 def decodeEscape(match):
89 esc = match.group(1)
90 _STATIC = {
91 '"': '"',
92 '\\': '\\',
93 '/': '/',
94 'b': unichr(0x8),
95 'f': unichr(0xc),
96 'n': '\n',
97 'r': '\r',
98 't': '\t',
99 }
100 if esc in _STATIC:
101 return _STATIC[esc]
102 if esc[0] == 'u':
103 if len(esc) == 1+4:
104 return unichr(int(esc[1:5], 16))
105 if len(esc) == 5+6 and esc[5:7] == '\\u':
106 hi = int(esc[1:5], 16)
107 low = int(esc[7:11], 16)
108 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
109 raise ValueError('Unknown escape ' + str(esc))
110 def parseString(i):
111 i += 1
112 e = i
113 while True:
114 e = s.index('"', e)
115 bslashes = 0
116 while s[e-bslashes-1] == '\\':
117 bslashes += 1
118 if bslashes % 2 == 1:
119 e += 1
120 continue
121 break
122 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
123 stri = rexp.sub(decodeEscape, s[i:e])
124 return (e+1,stri)
125 def parseObj(i):
126 i += 1
127 res = {}
128 i = skipSpace(i)
129 if s[i] == '}': # Empty dictionary
130 return (i+1,res)
131 while True:
132 if s[i] != '"':
133 raiseError('Expected a string object key', i)
134 i,key = parseString(i)
135 i = skipSpace(i)
136 if i >= len(s) or s[i] != ':':
137 raiseError('Expected a colon', i)
138 i,val = parse(i+1)
139 res[key] = val
140 i = skipSpace(i)
141 if s[i] == '}':
142 return (i+1, res)
143 if s[i] != ',':
144 raiseError('Expected comma or closing curly brace', i)
145 i = skipSpace(i+1)
146 def parseArray(i):
147 res = []
148 i = skipSpace(i+1)
149 if s[i] == ']': # Empty array
150 return (i+1,res)
151 while True:
152 i,val = parse(i)
153 res.append(val)
154 i = skipSpace(i) # Raise exception if premature end
155 if s[i] == ']':
156 return (i+1, res)
157 if s[i] != ',':
158 raiseError('Expected a comma or closing bracket', i)
159 i = skipSpace(i+1)
160 def parseDiscrete(i):
161 for k,v in {'true': True, 'false': False, 'null': None}.items():
162 if s.startswith(k, i):
163 return (i+len(k), v)
164 raiseError('Not a boolean (or null)', i)
165 def parseNumber(i):
166 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
167 if mobj is None:
168 raiseError('Not a number', i)
169 nums = mobj.group(1)
170 if '.' in nums or 'e' in nums or 'E' in nums:
171 return (i+len(nums), float(nums))
172 return (i+len(nums), int(nums))
173 CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
174 def parse(i):
175 i = skipSpace(i)
176 i,res = CHARMAP.get(s[i], parseNumber)(i)
177 i = skipSpace(i, False)
178 return (i,res)
179 i,res = parse(0)
180 if i < len(s):
181 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
182 return res
183
eae2666c
RG
184def preferredencoding():
185 """Get preferred encoding.
186
187 Returns the best encoding scheme for the system, based on
188 locale.getpreferredencoding() and some further tweaks.
189 """
f94b636c
RG
190 def yield_preferredencoding():
191 try:
192 pref = locale.getpreferredencoding()
193 u'TEST'.encode(pref)
194 except:
195 pref = 'UTF-8'
196 while True:
197 yield pref
198 return yield_preferredencoding().next()
eae2666c 199
490fd7ae
RG
200def htmlentity_transform(matchobj):
201 """Transforms an HTML entity to a Unicode character.
d3975459 202
490fd7ae
RG
203 This function receives a match object and is intended to be used with
204 the re.sub() function.
205 """
206 entity = matchobj.group(1)
207
208 # Known non-numeric HTML entity
209 if entity in htmlentitydefs.name2codepoint:
210 return unichr(htmlentitydefs.name2codepoint[entity])
211
212 # Unicode character
213 mobj = re.match(ur'(?u)#(x?\d+)', entity)
214 if mobj is not None:
215 numstr = mobj.group(1)
216 if numstr.startswith(u'x'):
217 base = 16
218 numstr = u'0%s' % numstr
219 else:
220 base = 10
221 return unichr(long(numstr, base))
222
223 # Unknown entity in name, return its literal representation
224 return (u'&%s;' % entity)
225
226def sanitize_title(utitle):
31bcb480 227 """Sanitizes a video title so it could be used as part of a filename."""
490fd7ae 228 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
490fd7ae
RG
229 return utitle.replace(unicode(os.sep), u'%')
230
31bcb480
RG
231def sanitize_open(filename, open_mode):
232 """Try to open the given filename, and slightly tweak it if this fails.
233
234 Attempts to open the given filename. If this fails, it tries to change
235 the filename slightly, step by step, until it's either able to open it
236 or it fails and raises a final exception, like the standard open()
237 function.
238
239 It returns the tuple (stream, definitive_file_name).
240 """
241 try:
131bc765 242 if filename == u'-':
e08878f4
RG
243 if sys.platform == 'win32':
244 import msvcrt
245 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
131bc765 246 return (sys.stdout, filename)
31bcb480
RG
247 stream = open(filename, open_mode)
248 return (stream, filename)
249 except (IOError, OSError), err:
250 # In case of error, try to remove win32 forbidden chars
ca6a11fa 251 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
31bcb480
RG
252
253 # An exception here should be caught in the caller
254 stream = open(filename, open_mode)
255 return (stream, filename)
256
09bd408c
GI
257def timeconvert(timestr):
258 """Convert RFC 2822 defined time string into system timestamp"""
259 timestamp = None
260 timetuple = email.utils.parsedate_tz(timestr)
261 if timetuple is not None:
262 timestamp = email.utils.mktime_tz(timetuple)
263 return timestamp
264
e5bf0f55
RG
265class DownloadError(Exception):
266 """Download Error exception.
d3975459 267
e5bf0f55
RG
268 This exception may be thrown by FileDownloader objects if they are not
269 configured to continue on errors. They will contain the appropriate
270 error message.
271 """
272 pass
273
274class SameFileError(Exception):
275 """Same File exception.
276
277 This exception will be thrown by FileDownloader objects if they detect
278 multiple files would have to be downloaded to the same file on disk.
279 """
280 pass
281
65cd34c5
RG
282class PostProcessingError(Exception):
283 """Post Processing exception.
284
285 This exception may be raised by PostProcessor's .run() method to
286 indicate an error in the postprocessing task.
287 """
288 pass
289
73f4e7af 290class UnavailableVideoError(Exception):
7b7759f5 291 """Unavailable Format exception.
292
293 This exception will be thrown when a video is requested
294 in a format that is not available for that video.
295 """
d69a1c91
RG
296 pass
297
298class ContentTooShortError(Exception):
299 """Content Too Short exception.
300
301 This exception may be raised by FileDownloader objects when a file they
302 download is too small for what the server announced first, indicating
303 the connection was probably interrupted.
304 """
305 # Both in bytes
306 downloaded = None
307 expected = None
308
309 def __init__(self, downloaded, expected):
310 self.downloaded = downloaded
311 self.expected = expected
7b7759f5 312
1987c232
RG
313class YoutubeDLHandler(urllib2.HTTPHandler):
314 """Handler for HTTP requests and responses.
315
316 This class, when installed with an OpenerDirector, automatically adds
317 the standard headers to every HTTP request and handles gzipped and
318 deflated responses from web servers. If compression is to be avoided in
319 a particular request, the original request in the program code only has
320 to include the HTTP header "Youtubedl-No-Compression", which will be
321 removed before making the real request.
322
323 Part of this code was copied from:
324
325 http://techknack.net/python-urllib2-handlers/
326
327 Andrew Rowls, the author of that code, agreed to release it to the
328 public domain.
329 """
330
331 @staticmethod
332 def deflate(data):
333 try:
334 return zlib.decompress(data, -zlib.MAX_WBITS)
335 except zlib.error:
336 return zlib.decompress(data)
337
7b531c0b
RG
338 @staticmethod
339 def addinfourl_wrapper(stream, headers, url, code):
340 if hasattr(urllib2.addinfourl, 'getcode'):
341 return urllib2.addinfourl(stream, headers, url, code)
0f6b00b5
RG
342 ret = urllib2.addinfourl(stream, headers, url)
343 ret.code = code
344 return ret
7b531c0b 345
1987c232
RG
346 def http_request(self, req):
347 for h in std_headers:
348 if h in req.headers:
349 del req.headers[h]
350 req.add_header(h, std_headers[h])
351 if 'Youtubedl-no-compression' in req.headers:
352 if 'Accept-encoding' in req.headers:
353 del req.headers['Accept-encoding']
354 del req.headers['Youtubedl-no-compression']
355 return req
356
357 def http_response(self, req, resp):
358 old_resp = resp
359 # gzip
360 if resp.headers.get('Content-encoding', '') == 'gzip':
361 gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
7b531c0b 362 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
363 resp.msg = old_resp.msg
364 # deflate
365 if resp.headers.get('Content-encoding', '') == 'deflate':
366 gz = StringIO.StringIO(self.deflate(resp.read()))
7b531c0b 367 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
368 resp.msg = old_resp.msg
369 return resp
370
4fa74b52
RG
371class FileDownloader(object):
372 """File Downloader class.
373
374 File downloader objects are the ones responsible of downloading the
375 actual video file and writing it to disk if the user has requested
376 it, among some other tasks. In most cases there should be one per
377 program. As, given a video URL, the downloader doesn't know how to
378 extract all the needed information, task that InfoExtractors do, it
379 has to pass the URL to one of them.
380
381 For this, file downloader objects have a method that allows
382 InfoExtractors to be registered in a given order. When it is passed
383 a URL, the file downloader handles it to the first InfoExtractor it
2851b2ca
RG
384 finds that reports being able to handle it. The InfoExtractor extracts
385 all the information about the video or videos the URL refers to, and
386 asks the FileDownloader to process the video information, possibly
387 downloading the video.
4fa74b52
RG
388
389 File downloaders accept a lot of parameters. In order not to saturate
390 the object constructor with arguments, it receives a dictionary of
d0a9affb
RG
391 options instead. These options are available through the params
392 attribute for the InfoExtractors to use. The FileDownloader also
393 registers itself as the downloader in charge for the InfoExtractors
394 that are added to it, so this is a "mutual registration".
4fa74b52
RG
395
396 Available options:
397
80066952
RG
398 username: Username for authentication purposes.
399 password: Password for authentication purposes.
400 usenetrc: Use netrc for authentication instead.
401 quiet: Do not print messages to stdout.
402 forceurl: Force printing final URL.
403 forcetitle: Force printing title.
404 forcethumbnail: Force printing thumbnail URL.
405 forcedescription: Force printing description.
9f796346 406 forcefilename: Force printing final filename.
80066952
RG
407 simulate: Do not download the video files.
408 format: Video format code.
409 format_limit: Highest quality format to try.
410 outtmpl: Template for output names.
411 ignoreerrors: Do not stop on download errors.
412 ratelimit: Download speed limit, in bytes/sec.
413 nooverwrites: Prevent overwriting files.
414 retries: Number of times to retry for HTTP error 5xx
415 continuedl: Try to continue downloads if possible.
416 noprogress: Do not print the progress bar.
417 playliststart: Playlist item to start at.
8cc44341 418 playlistend: Playlist item to end at.
331ce0a0 419 logtostderr: Log messages to stderr instead of stdout.
ccbd296b 420 consoletitle: Display progress in console window's titlebar.
3fb2c487 421 nopart: Do not use temporary .part files.
e3018902 422 updatetime: Use the Last-modified header to set output file timestamps.
8b95c387 423 writedescription: Write the video description to a .description file
6eb08fbf 424 writeinfojson: Write the video description to a .info.json file
4fa74b52
RG
425 """
426
d0a9affb 427 params = None
4fa74b52 428 _ies = []
65cd34c5 429 _pps = []
9bf386d7 430 _download_retcode = None
7d8d0612 431 _num_downloads = None
331ce0a0 432 _screen_file = None
4fa74b52
RG
433
434 def __init__(self, params):
1c5e2302 435 """Create a FileDownloader object with the given options."""
4fa74b52 436 self._ies = []
65cd34c5 437 self._pps = []
9bf386d7 438 self._download_retcode = 0
7d8d0612 439 self._num_downloads = 0
331ce0a0 440 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
d0a9affb 441 self.params = params
d3975459 442
4fa74b52
RG
443 @staticmethod
444 def pmkdir(filename):
445 """Create directory components in filename. Similar to Unix "mkdir -p"."""
446 components = filename.split(os.sep)
447 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
3af1e172 448 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
4fa74b52
RG
449 for dir in aggregate:
450 if not os.path.exists(dir):
451 os.mkdir(dir)
3fb2c487 452
4fa74b52
RG
453 @staticmethod
454 def format_bytes(bytes):
455 if bytes is None:
456 return 'N/A'
8497c36d
RG
457 if type(bytes) is str:
458 bytes = float(bytes)
459 if bytes == 0.0:
4fa74b52
RG
460 exponent = 0
461 else:
8497c36d 462 exponent = long(math.log(bytes, 1024.0))
4fa74b52 463 suffix = 'bkMGTPEZY'[exponent]
4fa74b52
RG
464 converted = float(bytes) / float(1024**exponent)
465 return '%.2f%s' % (converted, suffix)
466
467 @staticmethod
468 def calc_percent(byte_counter, data_len):
469 if data_len is None:
470 return '---.-%'
471 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
472
473 @staticmethod
474 def calc_eta(start, now, total, current):
475 if total is None:
476 return '--:--'
477 dif = now - start
478 if current == 0 or dif < 0.001: # One millisecond
479 return '--:--'
480 rate = float(current) / dif
481 eta = long((float(total) - float(current)) / rate)
482 (eta_mins, eta_secs) = divmod(eta, 60)
483 if eta_mins > 99:
484 return '--:--'
485 return '%02d:%02d' % (eta_mins, eta_secs)
486
5121ef20 487 @staticmethod
4fa74b52
RG
488 def calc_speed(start, now, bytes):
489 dif = now - start
490 if bytes == 0 or dif < 0.001: # One millisecond
9fcd8355 491 return '%10s' % '---b/s'
4fa74b52
RG
492 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
493
494 @staticmethod
495 def best_block_size(elapsed_time, bytes):
496 new_min = max(bytes / 2.0, 1.0)
497 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
498 if elapsed_time < 0.001:
e1f18b8a 499 return long(new_max)
4fa74b52
RG
500 rate = bytes / elapsed_time
501 if rate > new_max:
e1f18b8a 502 return long(new_max)
4fa74b52 503 if rate < new_min:
e1f18b8a
RG
504 return long(new_min)
505 return long(rate)
4fa74b52 506
acd3d842
RG
507 @staticmethod
508 def parse_bytes(bytestr):
509 """Parse a string indicating a byte quantity into a long integer."""
510 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
511 if matchobj is None:
512 return None
513 number = float(matchobj.group(1))
514 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
515 return long(round(number * multiplier))
516
4fa74b52
RG
517 def add_info_extractor(self, ie):
518 """Add an InfoExtractor object to the end of the list."""
519 self._ies.append(ie)
520 ie.set_downloader(self)
d3975459 521
65cd34c5
RG
522 def add_post_processor(self, pp):
523 """Add a PostProcessor object to the end of the chain."""
524 self._pps.append(pp)
525 pp.set_downloader(self)
d3975459 526
331ce0a0 527 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
9fcd8355 528 """Print message to stdout if not in quiet mode."""
43ab0ca4
RG
529 try:
530 if not self.params.get('quiet', False):
331ce0a0
RG
531 terminator = [u'\n', u''][skip_eol]
532 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
533 self._screen_file.flush()
43ab0ca4
RG
534 except (UnicodeEncodeError), err:
535 if not ignore_encoding_errors:
536 raise
d3975459 537
7e5cab67
RG
538 def to_stderr(self, message):
539 """Print message to stderr."""
eae2666c 540 print >>sys.stderr, message.encode(preferredencoding())
d3975459 541
ccbd296b
MM
542 def to_cons_title(self, message):
543 """Set console/terminal window title to message."""
544 if not self.params.get('consoletitle', False):
545 return
546 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
547 # c_wchar_p() might not be necessary if `message` is
548 # already of type unicode()
549 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
550 elif 'TERM' in os.environ:
551 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
552
22899cea
RG
553 def fixed_template(self):
554 """Checks if the output template is fixed."""
d0a9affb 555 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
9fcd8355 556
0086d1ec
RG
557 def trouble(self, message=None):
558 """Determine action to take when a download problem appears.
559
560 Depending on if the downloader has been configured to ignore
e5bf0f55 561 download errors or not, this method may throw an exception or
9bf386d7 562 not when errors are found, after printing the message.
0086d1ec
RG
563 """
564 if message is not None:
565 self.to_stderr(message)
d0a9affb 566 if not self.params.get('ignoreerrors', False):
e5bf0f55 567 raise DownloadError(message)
9bf386d7 568 self._download_retcode = 1
0086d1ec 569
acd3d842
RG
570 def slow_down(self, start_time, byte_counter):
571 """Sleep if the download speed is over the rate limit."""
d0a9affb 572 rate_limit = self.params.get('ratelimit', None)
acd3d842
RG
573 if rate_limit is None or byte_counter == 0:
574 return
575 now = time.time()
576 elapsed = now - start_time
577 if elapsed <= 0.0:
578 return
579 speed = float(byte_counter) / elapsed
580 if speed > rate_limit:
581 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
3fb2c487
RG
582
583 def temp_name(self, filename):
584 """Returns a temporary filename for the given filename."""
585 if self.params.get('nopart', False) or filename == u'-' or \
586 (os.path.exists(filename) and not os.path.isfile(filename)):
587 return filename
588 return filename + u'.part'
589
8cc42e7c
RG
590 def undo_temp_name(self, filename):
591 if filename.endswith(u'.part'):
592 return filename[:-len(u'.part')]
593 return filename
594
62cf7aaf
RG
595 def try_rename(self, old_filename, new_filename):
596 try:
7d950ca1
RG
597 if old_filename == new_filename:
598 return
62cf7aaf
RG
599 os.rename(old_filename, new_filename)
600 except (IOError, OSError), err:
601 self.trouble(u'ERROR: unable to rename file')
e3018902
RG
602
603 def try_utime(self, filename, last_modified_hdr):
604 """Try to set the last-modified time of the given file."""
605 if last_modified_hdr is None:
606 return
607 if not os.path.isfile(filename):
608 return
609 timestr = last_modified_hdr
610 if timestr is None:
611 return
612 filetime = timeconvert(timestr)
613 if filetime is None:
614 return
615 try:
616 os.utime(filename,(time.time(), filetime))
617 except:
618 pass
acd3d842 619
8b95c387 620 def report_writedescription(self, descfn):
6eb08fbf
PH
621 """ Report that the description file is being written """
622 self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
623
624 def report_writeinfojson(self, infofn):
625 """ Report that the metadata file has been written """
626 self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
8b95c387 627
bafa5cd9
RG
628 def report_destination(self, filename):
629 """Report destination filename."""
331ce0a0 630 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
d3975459 631
bafa5cd9
RG
632 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
633 """Report download progress."""
d9835247
RG
634 if self.params.get('noprogress', False):
635 return
331ce0a0 636 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
bafa5cd9 637 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
ccbd296b
MM
638 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
639 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
7db85b2c
RG
640
641 def report_resuming_byte(self, resume_len):
8a9f53be 642 """Report attempt to resume at given byte."""
331ce0a0 643 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
d3975459 644
7031008c 645 def report_retry(self, count, retries):
e86e9474 646 """Report retry in case of HTTP error 5xx"""
331ce0a0 647 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
d3975459 648
7db85b2c
RG
649 def report_file_already_downloaded(self, file_name):
650 """Report file has already been fully downloaded."""
43ab0ca4 651 try:
331ce0a0 652 self.to_screen(u'[download] %s has already been downloaded' % file_name)
43ab0ca4 653 except (UnicodeEncodeError), err:
331ce0a0 654 self.to_screen(u'[download] The file has already been downloaded')
d3975459 655
7db85b2c
RG
656 def report_unable_to_resume(self):
657 """Report it was impossible to resume download."""
331ce0a0 658 self.to_screen(u'[download] Unable to resume')
d3975459 659
bafa5cd9
RG
660 def report_finish(self):
661 """Report download finished."""
d9835247 662 if self.params.get('noprogress', False):
331ce0a0 663 self.to_screen(u'[download] Download completed')
d9835247 664 else:
331ce0a0 665 self.to_screen(u'')
d3975459 666
df372a65
RG
667 def increment_downloads(self):
668 """Increment the ordinal that assigns a number to each file."""
669 self._num_downloads += 1
bafa5cd9 670
9f796346
GI
671 def prepare_filename(self, info_dict):
672 """Generate the output filename."""
673 try:
674 template_dict = dict(info_dict)
675 template_dict['epoch'] = unicode(long(time.time()))
676 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
677 filename = self.params['outtmpl'] % template_dict
678 return filename
679 except (ValueError, KeyError), err:
680 self.trouble(u'ERROR: invalid system charset or erroneous output template')
681 return None
682
c8619e01
RG
683 def process_info(self, info_dict):
684 """Process a single dictionary returned by an InfoExtractor."""
9f796346 685 filename = self.prepare_filename(info_dict)
c8619e01
RG
686 # Do nothing else if in simulate mode
687 if self.params.get('simulate', False):
cbfff4db
RG
688 # Forced printings
689 if self.params.get('forcetitle', False):
490fd7ae 690 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
cbfff4db 691 if self.params.get('forceurl', False):
490fd7ae 692 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
7e58d568
RG
693 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
694 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
695 if self.params.get('forcedescription', False) and 'description' in info_dict:
696 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
9f796346
GI
697 if self.params.get('forcefilename', False) and filename is not None:
698 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
cbfff4db 699
9bf386d7 700 return
d3975459 701
9f796346 702 if filename is None:
38ed1344 703 return
850ab765 704 if self.params.get('nooverwrites', False) and os.path.exists(filename):
5c44af18 705 self.to_stderr(u'WARNING: file exists and will be skipped')
9bf386d7 706 return
7b7759f5 707
c8619e01
RG
708 try:
709 self.pmkdir(filename)
710 except (OSError, IOError), err:
db7e31b8 711 self.trouble(u'ERROR: unable to create directories: %s' % str(err))
9bf386d7 712 return
7b7759f5 713
8b95c387
PH
714 if self.params.get('writedescription', False):
715 try:
716 descfn = filename + '.description'
6eb08fbf 717 self.report_writedescription(descfn)
1293ce58
PH
718 descfile = open(descfn, 'wb')
719 try:
8b95c387 720 descfile.write(info_dict['description'].encode('utf-8'))
1293ce58
PH
721 finally:
722 descfile.close()
8b95c387
PH
723 except (OSError, IOError):
724 self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn))
725 return
726
6eb08fbf
PH
727 if self.params.get('writeinfojson', False):
728 infofn = filename + '.info.json'
729 self.report_writeinfojson(infofn)
730 try:
731 json.dump
732 except (NameError,AttributeError):
733 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
734 return
735 try:
1293ce58
PH
736 infof = open(infofn, 'wb')
737 try:
6eb08fbf 738 json.dump(info_dict, infof)
1293ce58
PH
739 finally:
740 infof.close()
6eb08fbf
PH
741 except (OSError, IOError):
742 self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn))
743 return
744
c8619e01 745 try:
e616ec0c 746 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
c8619e01 747 except (OSError, IOError), err:
73f4e7af 748 raise UnavailableVideoError
c8619e01 749 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
db7e31b8 750 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
9bf386d7 751 return
d69a1c91 752 except (ContentTooShortError, ), err:
db7e31b8 753 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
d69a1c91 754 return
7b7759f5 755
55e7c75e
RG
756 if success:
757 try:
758 self.post_process(filename, info_dict)
759 except (PostProcessingError), err:
db7e31b8 760 self.trouble(u'ERROR: postprocessing: %s' % str(err))
55e7c75e 761 return
c8619e01 762
4fa74b52
RG
763 def download(self, url_list):
764 """Download a given list of URLs."""
22899cea 765 if len(url_list) > 1 and self.fixed_template():
d0a9affb 766 raise SameFileError(self.params['outtmpl'])
22899cea 767
4fa74b52
RG
768 for url in url_list:
769 suitable_found = False
770 for ie in self._ies:
c8619e01 771 # Go to next InfoExtractor if not suitable
4fa74b52
RG
772 if not ie.suitable(url):
773 continue
c8619e01 774
4fa74b52
RG
775 # Suitable InfoExtractor found
776 suitable_found = True
c8619e01 777
6f21f686
RG
778 # Extract information from URL and process it
779 ie.extract(url)
65cd34c5 780
c8619e01 781 # Suitable InfoExtractor had been found; go to next URL
4fa74b52 782 break
c8619e01 783
4fa74b52 784 if not suitable_found:
db7e31b8 785 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
bb681b88 786
9bf386d7 787 return self._download_retcode
65cd34c5
RG
788
789 def post_process(self, filename, ie_info):
790 """Run the postprocessing chain on the given file."""
791 info = dict(ie_info)
792 info['filepath'] = filename
793 for pp in self._pps:
794 info = pp.run(info)
795 if info is None:
796 break
d3975459 797
e616ec0c 798 def _download_with_rtmpdump(self, filename, url, player_url):
0487b407 799 self.report_destination(filename)
62cf7aaf 800 tmpfilename = self.temp_name(filename)
0487b407
RG
801
802 # Check for rtmpdump first
803 try:
804 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
805 except (OSError, IOError):
806 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
807 return False
808
809 # Download using rtmpdump. rtmpdump returns exit code 2 when
810 # the connection was interrumpted and resuming appears to be
811 # possible. This is part of rtmpdump's normal usage, AFAIK.
62cf7aaf 812 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
1c1821f8
RG
813 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
814 while retval == 2 or retval == 1:
62cf7aaf 815 prevsize = os.path.getsize(tmpfilename)
331ce0a0 816 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
e616ec0c 817 time.sleep(5.0) # This seems to be needed
1c1821f8 818 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
62cf7aaf 819 cursize = os.path.getsize(tmpfilename)
e616ec0c
RG
820 if prevsize == cursize and retval == 1:
821 break
0487b407 822 if retval == 0:
62cf7aaf
RG
823 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
824 self.try_rename(tmpfilename, filename)
0487b407
RG
825 return True
826 else:
db7e31b8 827 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
0487b407
RG
828 return False
829
e616ec0c 830 def _do_download(self, filename, url, player_url):
62cf7aaf 831 # Check file already present
3fb2c487 832 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
62cf7aaf
RG
833 self.report_file_already_downloaded(filename)
834 return True
835
0487b407
RG
836 # Attempt to download using rtmpdump
837 if url.startswith('rtmp'):
e616ec0c 838 return self._download_with_rtmpdump(filename, url, player_url)
0487b407 839
62cf7aaf 840 tmpfilename = self.temp_name(filename)
55e7c75e 841 stream = None
9c457d2a 842 open_mode = 'wb'
1987c232
RG
843
844 # Do not include the Accept-Encoding header
845 headers = {'Youtubedl-no-compression': 'True'}
846 basic_request = urllib2.Request(url, None, headers)
847 request = urllib2.Request(url, None, headers)
7db85b2c 848
9c457d2a 849 # Establish possible resume length
62cf7aaf
RG
850 if os.path.isfile(tmpfilename):
851 resume_len = os.path.getsize(tmpfilename)
55e7c75e
RG
852 else:
853 resume_len = 0
9c457d2a
RG
854
855 # Request parameters in case of being able to resume
850ab765 856 if self.params.get('continuedl', False) and resume_len != 0:
7db85b2c
RG
857 self.report_resuming_byte(resume_len)
858 request.add_header('Range','bytes=%d-' % resume_len)
9c457d2a 859 open_mode = 'ab'
55e7c75e 860
7031008c
RG
861 count = 0
862 retries = self.params.get('retries', 0)
101e0d1e 863 while count <= retries:
7031008c
RG
864 # Establish connection
865 try:
866 data = urllib2.urlopen(request)
867 break
868 except (urllib2.HTTPError, ), err:
ac249f42 869 if (err.code < 500 or err.code >= 600) and err.code != 416:
101e0d1e 870 # Unexpected HTTP error
7031008c 871 raise
101e0d1e
RG
872 elif err.code == 416:
873 # Unable to resume (requested range not satisfiable)
874 try:
875 # Open the connection again without the range header
876 data = urllib2.urlopen(basic_request)
877 content_length = data.info()['Content-Length']
878 except (urllib2.HTTPError, ), err:
ac249f42 879 if err.code < 500 or err.code >= 600:
101e0d1e
RG
880 raise
881 else:
882 # Examine the reported length
268fb2bd 883 if (content_length is not None and
7a9054ec 884 (resume_len - 100 < long(content_length) < resume_len + 100)):
268fb2bd
RG
885 # The file had already been fully downloaded.
886 # Explanation to the above condition: in issue #175 it was revealed that
887 # YouTube sometimes adds or removes a few bytes from the end of the file,
888 # changing the file size slightly and causing problems for some users. So
889 # I decided to implement a suggested change and consider the file
890 # completely downloaded if the file size differs less than 100 bytes from
891 # the one in the hard drive.
101e0d1e 892 self.report_file_already_downloaded(filename)
62cf7aaf 893 self.try_rename(tmpfilename, filename)
101e0d1e
RG
894 return True
895 else:
896 # The length does not match, we start the download over
897 self.report_unable_to_resume()
898 open_mode = 'wb'
899 break
900 # Retry
901 count += 1
902 if count <= retries:
903 self.report_retry(count, retries)
904
905 if count > retries:
906 self.trouble(u'ERROR: giving up after %s retries' % retries)
907 return False
7db85b2c 908
4fa74b52 909 data_len = data.info().get('Content-length', None)
106d091e
RG
910 if data_len is not None:
911 data_len = long(data_len) + resume_len
4fa74b52 912 data_len_str = self.format_bytes(data_len)
106d091e 913 byte_counter = 0 + resume_len
4fa74b52
RG
914 block_size = 1024
915 start = time.time()
916 while True:
bafa5cd9 917 # Download and write
4fa74b52
RG
918 before = time.time()
919 data_block = data.read(block_size)
920 after = time.time()
975a91d0 921 if len(data_block) == 0:
4fa74b52 922 break
975a91d0 923 byte_counter += len(data_block)
55e7c75e
RG
924
925 # Open file just in time
926 if stream is None:
927 try:
62cf7aaf 928 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
8cc42e7c 929 filename = self.undo_temp_name(tmpfilename)
55e7c75e
RG
930 self.report_destination(filename)
931 except (OSError, IOError), err:
db7e31b8 932 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
55e7c75e 933 return False
131efd1a
RG
934 try:
935 stream.write(data_block)
936 except (IOError, OSError), err:
d67e0974
RG
937 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
938 return False
975a91d0 939 block_size = self.best_block_size(after - before, len(data_block))
4fa74b52 940
55e7c75e
RG
941 # Progress message
942 percent_str = self.calc_percent(byte_counter, data_len)
975a91d0
RG
943 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
944 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
55e7c75e
RG
945 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
946
acd3d842 947 # Apply rate limit
975a91d0 948 self.slow_down(start, byte_counter - resume_len)
acd3d842 949
6f0ff3ba 950 stream.close()
bafa5cd9 951 self.report_finish()
b905e5f5 952 if data_len is not None and byte_counter != data_len:
d69a1c91 953 raise ContentTooShortError(byte_counter, long(data_len))
62cf7aaf 954 self.try_rename(tmpfilename, filename)
e3018902 955
09bd408c 956 # Update file modification time
e3018902
RG
957 if self.params.get('updatetime', True):
958 self.try_utime(filename, data.info().get('last-modified', None))
959
55e7c75e 960 return True
4fa74b52
RG
961
962class InfoExtractor(object):
963 """Information Extractor class.
964
965 Information extractors are the classes that, given a URL, extract
966 information from the video (or videos) the URL refers to. This
967 information includes the real video URL, the video title and simplified
2851b2ca
RG
968 title, author and others. The information is stored in a dictionary
969 which is then passed to the FileDownloader. The FileDownloader
970 processes this information possibly downloading the video to the file
971 system, among other possible outcomes. The dictionaries must include
4fa74b52
RG
972 the following fields:
973
974 id: Video identifier.
975 url: Final video URL.
976 uploader: Nickname of the video uploader.
977 title: Literal title.
978 stitle: Simplified title.
979 ext: Video filename extension.
6ba562b0 980 format: Video format.
e616ec0c 981 player_url: SWF Player URL (may be None).
4fa74b52 982
7e58d568
RG
983 The following fields are optional. Their primary purpose is to allow
984 youtube-dl to serve as the backend for a video search function, such
985 as the one in youtube2mp3. They are only used when their respective
986 forced printing functions are called:
987
988 thumbnail: Full URL to a video thumbnail image.
989 description: One-line video description.
990
4fa74b52
RG
991 Subclasses of this one should re-define the _real_initialize() and
992 _real_extract() methods, as well as the suitable() static method.
993 Probably, they should also be instantiated and added to the main
994 downloader.
995 """
996
997 _ready = False
998 _downloader = None
999
1000 def __init__(self, downloader=None):
1001 """Constructor. Receives an optional downloader."""
1002 self._ready = False
1003 self.set_downloader(downloader)
1004
1005 @staticmethod
1006 def suitable(url):
1007 """Receives a URL and returns True if suitable for this IE."""
020f7150 1008 return False
4fa74b52
RG
1009
1010 def initialize(self):
1c5e2302 1011 """Initializes an instance (authentication, etc)."""
4fa74b52
RG
1012 if not self._ready:
1013 self._real_initialize()
1014 self._ready = True
1015
1016 def extract(self, url):
1017 """Extracts URL information and returns it in list of dicts."""
1018 self.initialize()
1019 return self._real_extract(url)
1020
1021 def set_downloader(self, downloader):
1022 """Sets the downloader for this IE."""
1023 self._downloader = downloader
d3975459 1024
4fa74b52
RG
1025 def _real_initialize(self):
1026 """Real initialization process. Redefine in subclasses."""
1027 pass
1028
1029 def _real_extract(self, url):
1030 """Real extraction process. Redefine in subclasses."""
1031 pass
1032
1033class YoutubeIE(InfoExtractor):
1034 """Information extractor for youtube.com."""
1035
86e709d3 1036 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
9715661c 1037 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
7df4635f 1038 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
72ac78b8 1039 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
4fa74b52 1040 _NETRC_MACHINE = 'youtube'
497cd3e6
RG
1041 # Listed in order of quality
1042 _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
7b7759f5 1043 _video_extensions = {
1044 '13': '3gp',
1045 '17': 'mp4',
1046 '18': 'mp4',
1047 '22': 'mp4',
d9bc015b 1048 '37': 'mp4',
9e9647d9 1049 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
0b59bf4a
RG
1050 '43': 'webm',
1051 '45': 'webm',
7b7759f5 1052 }
4fa74b52 1053
020f7150
RG
1054 @staticmethod
1055 def suitable(url):
1056 return (re.match(YoutubeIE._VALID_URL, url) is not None)
1057
72ac78b8
RG
1058 def report_lang(self):
1059 """Report attempt to set language."""
331ce0a0 1060 self._downloader.to_screen(u'[youtube] Setting language')
72ac78b8 1061
bafa5cd9
RG
1062 def report_login(self):
1063 """Report attempt to log in."""
331ce0a0 1064 self._downloader.to_screen(u'[youtube] Logging in')
d3975459 1065
bafa5cd9
RG
1066 def report_age_confirmation(self):
1067 """Report attempt to confirm age."""
331ce0a0 1068 self._downloader.to_screen(u'[youtube] Confirming age')
d3975459 1069
e616ec0c
RG
1070 def report_video_webpage_download(self, video_id):
1071 """Report attempt to download video webpage."""
331ce0a0 1072 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
d3975459 1073
71b7300e
RG
1074 def report_video_info_webpage_download(self, video_id):
1075 """Report attempt to download video info webpage."""
331ce0a0 1076 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
d3975459 1077
bafa5cd9
RG
1078 def report_information_extraction(self, video_id):
1079 """Report attempt to extract video information."""
331ce0a0 1080 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
d3975459 1081
7b7759f5 1082 def report_unavailable_format(self, video_id, format):
1083 """Report extracted video URL."""
331ce0a0 1084 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
d3975459 1085
0487b407
RG
1086 def report_rtmp_download(self):
1087 """Indicate the download will use the RTMP protocol."""
331ce0a0 1088 self._downloader.to_screen(u'[youtube] RTMP download detected')
d3975459 1089
4fa74b52
RG
1090 def _real_initialize(self):
1091 if self._downloader is None:
1092 return
1093
1094 username = None
1095 password = None
d0a9affb 1096 downloader_params = self._downloader.params
4fa74b52
RG
1097
1098 # Attempt to use provided username and password or .netrc data
1099 if downloader_params.get('username', None) is not None:
1100 username = downloader_params['username']
1101 password = downloader_params['password']
1102 elif downloader_params.get('usenetrc', False):
1103 try:
1104 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1105 if info is not None:
1106 username = info[0]
1107 password = info[2]
1108 else:
1109 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1110 except (IOError, netrc.NetrcParseError), err:
6f21f686 1111 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
4fa74b52
RG
1112 return
1113
72ac78b8 1114 # Set language
1987c232 1115 request = urllib2.Request(self._LANG_URL)
72ac78b8
RG
1116 try:
1117 self.report_lang()
1118 urllib2.urlopen(request).read()
1119 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1120 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
72ac78b8
RG
1121 return
1122
cc109403
RG
1123 # No authentication to be performed
1124 if username is None:
1125 return
1126
4fa74b52 1127 # Log in
9fcd8355
RG
1128 login_form = {
1129 'current_form': 'loginForm',
4fa74b52
RG
1130 'next': '/',
1131 'action_login': 'Log In',
1132 'username': username,
9fcd8355
RG
1133 'password': password,
1134 }
1987c232 1135 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
4fa74b52 1136 try:
bafa5cd9 1137 self.report_login()
4fa74b52
RG
1138 login_results = urllib2.urlopen(request).read()
1139 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
6f21f686 1140 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
4fa74b52
RG
1141 return
1142 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1143 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
4fa74b52 1144 return
d3975459 1145
4fa74b52 1146 # Confirm age
9fcd8355
RG
1147 age_form = {
1148 'next_url': '/',
1149 'action_confirm': 'Confirm',
1150 }
1987c232 1151 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
4fa74b52 1152 try:
bafa5cd9 1153 self.report_age_confirmation()
4fa74b52
RG
1154 age_results = urllib2.urlopen(request).read()
1155 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1156 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
e5bf0f55 1157 return
4fa74b52
RG
1158
1159 def _real_extract(self, url):
1160 # Extract video id from URL
020f7150 1161 mobj = re.match(self._VALID_URL, url)
4fa74b52 1162 if mobj is None:
147753eb 1163 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1164 return
4fa74b52
RG
1165 video_id = mobj.group(2)
1166
497cd3e6
RG
1167 # Get video webpage
1168 self.report_video_webpage_download(video_id)
1987c232 1169 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
497cd3e6
RG
1170 try:
1171 video_webpage = urllib2.urlopen(request).read()
1172 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1173 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1174 return
968aa884 1175
497cd3e6 1176 # Attempt to extract SWF player URL
b620a5f8 1177 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
497cd3e6 1178 if mobj is not None:
b620a5f8 1179 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
497cd3e6
RG
1180 else:
1181 player_url = None
1182
1183 # Get video info
1184 self.report_video_info_webpage_download(video_id)
1185 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1186 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1187 % (video_id, el_type))
1987c232 1188 request = urllib2.Request(video_info_url)
e616ec0c 1189 try:
497cd3e6
RG
1190 video_info_webpage = urllib2.urlopen(request).read()
1191 video_info = parse_qs(video_info_webpage)
1192 if 'token' in video_info:
1193 break
e616ec0c 1194 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
497cd3e6 1195 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
e616ec0c 1196 return
f95f29fd
RG
1197 if 'token' not in video_info:
1198 if 'reason' in video_info:
8e686771 1199 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
f95f29fd
RG
1200 else:
1201 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1202 return
1203
1204 # Start extracting information
497cd3e6
RG
1205 self.report_information_extraction(video_id)
1206
1207 # uploader
1208 if 'author' not in video_info:
1209 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1210 return
1211 video_uploader = urllib.unquote_plus(video_info['author'][0])
e616ec0c 1212
497cd3e6
RG
1213 # title
1214 if 'title' not in video_info:
1215 self._downloader.trouble(u'ERROR: unable to extract video title')
1216 return
1217 video_title = urllib.unquote_plus(video_info['title'][0])
1218 video_title = video_title.decode('utf-8')
1219 video_title = sanitize_title(video_title)
1220
1221 # simplified title
1222 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1223 simple_title = simple_title.strip(ur'_')
1224
1225 # thumbnail image
1226 if 'thumbnail_url' not in video_info:
1227 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1228 video_thumbnail = ''
1229 else: # don't panic if we can't find it
1230 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1231
b3a27b52
NA
1232 # upload date
1233 upload_date = u'NA'
3efa45c3 1234 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
b3a27b52 1235 if mobj is not None:
a1f03c7b 1236 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
87cbd213 1237 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
a1f03c7b
NA
1238 for expression in format_expressions:
1239 try:
1240 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1241 except:
1242 pass
b3a27b52 1243
497cd3e6 1244 # description
c6b55a8d
PH
1245 try:
1246 lxml.etree
1247 except NameError:
1248 video_description = u'No description available.'
8b95c387 1249 if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
c6b55a8d
PH
1250 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1251 if mobj is not None:
1252 video_description = mobj.group(1).decode('utf-8')
1253 else:
1254 html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1255 vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1256 video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
91e6a385 1257 # TODO use another parser
497cd3e6 1258
5ce7d172
RG
1259 # token
1260 video_token = urllib.unquote_plus(video_info['token'][0])
1261
497cd3e6 1262 # Decide which formats to download
f83ae781 1263 req_format = self._downloader.params.get('format', None)
2e3a32e4 1264
f137bef9
PH
1265 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1266 self.report_rtmp_download()
1267 video_url_list = [(None, video_info['conn'][0])]
f137bef9 1268 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
0ac22e4f 1269 url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
8519c32d 1270 url_data = [parse_qs(uds) for uds in url_data_strs]
f137bef9 1271 url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
8519c32d 1272 url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
2b70537d 1273
497cd3e6
RG
1274 format_limit = self._downloader.params.get('format_limit', None)
1275 if format_limit is not None and format_limit in self._available_formats:
1276 format_list = self._available_formats[self._available_formats.index(format_limit):]
e616ec0c 1277 else:
497cd3e6
RG
1278 format_list = self._available_formats
1279 existing_formats = [x for x in format_list if x in url_map]
1280 if len(existing_formats) == 0:
1281 self._downloader.trouble(u'ERROR: no known formats available for video')
968aa884 1282 return
f83ae781 1283 if req_format is None:
d157d259 1284 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
f83ae781 1285 elif req_format == '-1':
d157d259 1286 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
497cd3e6 1287 else:
5c132793
RG
1288 # Specific format
1289 if req_format not in url_map:
1290 self._downloader.trouble(u'ERROR: requested format not available')
1291 return
1292 video_url_list = [(req_format, url_map[req_format])] # Specific format
497cd3e6 1293 else:
f3dc18d8 1294 self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
497cd3e6 1295 return
7b7759f5 1296
497cd3e6
RG
1297 for format_param, video_real_url in video_url_list:
1298 # At this point we have a new video
1299 self._downloader.increment_downloads()
1300
1301 # Extension
1302 video_extension = self._video_extensions.get(format_param, 'flv')
7e58d568 1303
968aa884 1304 try:
7b7759f5 1305 # Process video information
1306 self._downloader.process_info({
1307 'id': video_id.decode('utf-8'),
1308 'url': video_real_url.decode('utf-8'),
1309 'uploader': video_uploader.decode('utf-8'),
138b11f3 1310 'upload_date': upload_date,
7b7759f5 1311 'title': video_title,
1312 'stitle': simple_title,
1313 'ext': video_extension.decode('utf-8'),
6ba562b0 1314 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
7e58d568 1315 'thumbnail': video_thumbnail.decode('utf-8'),
c6b55a8d 1316 'description': video_description,
e616ec0c 1317 'player_url': player_url,
7b7759f5 1318 })
497cd3e6 1319 except UnavailableVideoError, err:
09cc744c 1320 self._downloader.trouble(u'\nERROR: unable to download video')
42bcd27d 1321
4fa74b52 1322
020f7150
RG
1323class MetacafeIE(InfoExtractor):
1324 """Information Extractor for metacafe.com."""
1325
1326 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
2546e767 1327 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
dbccb6cd 1328 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
020f7150
RG
1329 _youtube_ie = None
1330
1331 def __init__(self, youtube_ie, downloader=None):
1332 InfoExtractor.__init__(self, downloader)
1333 self._youtube_ie = youtube_ie
1334
1335 @staticmethod
1336 def suitable(url):
1337 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1338
1339 def report_disclaimer(self):
1340 """Report disclaimer retrieval."""
331ce0a0 1341 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
020f7150
RG
1342
1343 def report_age_confirmation(self):
1344 """Report attempt to confirm age."""
331ce0a0 1345 self._downloader.to_screen(u'[metacafe] Confirming age')
d3975459 1346
020f7150
RG
1347 def report_download_webpage(self, video_id):
1348 """Report webpage download."""
331ce0a0 1349 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
d3975459 1350
020f7150
RG
1351 def report_extraction(self, video_id):
1352 """Report information extraction."""
331ce0a0 1353 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
020f7150
RG
1354
1355 def _real_initialize(self):
1356 # Retrieve disclaimer
1987c232 1357 request = urllib2.Request(self._DISCLAIMER)
020f7150
RG
1358 try:
1359 self.report_disclaimer()
1360 disclaimer = urllib2.urlopen(request).read()
1361 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1362 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
020f7150
RG
1363 return
1364
1365 # Confirm age
1366 disclaimer_form = {
2546e767 1367 'filters': '0',
020f7150
RG
1368 'submit': "Continue - I'm over 18",
1369 }
1987c232 1370 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
020f7150
RG
1371 try:
1372 self.report_age_confirmation()
1373 disclaimer = urllib2.urlopen(request).read()
1374 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1375 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
020f7150 1376 return
d3975459 1377
020f7150
RG
1378 def _real_extract(self, url):
1379 # Extract id and simplified title from URL
1380 mobj = re.match(self._VALID_URL, url)
1381 if mobj is None:
147753eb 1382 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1383 return
020f7150
RG
1384
1385 video_id = mobj.group(1)
1386
1387 # Check if video comes from YouTube
1388 mobj2 = re.match(r'^yt-(.*)$', video_id)
1389 if mobj2 is not None:
6f21f686
RG
1390 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1391 return
020f7150 1392
df372a65 1393 # At this point we have a new video
9bf7fa52 1394 self._downloader.increment_downloads()
df372a65 1395
020f7150 1396 simple_title = mobj.group(2).decode('utf-8')
020f7150
RG
1397
1398 # Retrieve video webpage to extract further information
1399 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1400 try:
1401 self.report_download_webpage(video_id)
1402 webpage = urllib2.urlopen(request).read()
1403 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1404 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
6f21f686 1405 return
020f7150
RG
1406
1407 # Extract URL, uploader and title from webpage
1408 self.report_extraction(video_id)
18963a36 1409 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
c6c555cf
RG
1410 if mobj is not None:
1411 mediaURL = urllib.unquote(mobj.group(1))
6b57e8c5 1412 video_extension = mediaURL[-3:]
d3975459 1413
c6c555cf
RG
1414 # Extract gdaKey if available
1415 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1416 if mobj is None:
1417 video_url = mediaURL
1418 else:
1419 gdaKey = mobj.group(1)
1420 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
109626fc 1421 else:
c6c555cf
RG
1422 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1423 if mobj is None:
1424 self._downloader.trouble(u'ERROR: unable to extract media URL')
1425 return
1426 vardict = parse_qs(mobj.group(1))
1427 if 'mediaData' not in vardict:
1428 self._downloader.trouble(u'ERROR: unable to extract media URL')
1429 return
1430 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1431 if mobj is None:
1432 self._downloader.trouble(u'ERROR: unable to extract media URL')
1433 return
6b57e8c5
RG
1434 mediaURL = mobj.group(1).replace('\\/', '/')
1435 video_extension = mediaURL[-3:]
1436 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
020f7150 1437
2546e767 1438 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
020f7150 1439 if mobj is None:
147753eb 1440 self._downloader.trouble(u'ERROR: unable to extract title')
6f21f686 1441 return
020f7150 1442 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1443 video_title = sanitize_title(video_title)
020f7150 1444
29f07568 1445 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
020f7150 1446 if mobj is None:
147753eb 1447 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
6f21f686 1448 return
dbccb6cd 1449 video_uploader = mobj.group(1)
020f7150 1450
42bcd27d 1451 try:
1452 # Process video information
1453 self._downloader.process_info({
1454 'id': video_id.decode('utf-8'),
1455 'url': video_url.decode('utf-8'),
1456 'uploader': video_uploader.decode('utf-8'),
138b11f3 1457 'upload_date': u'NA',
42bcd27d 1458 'title': video_title,
1459 'stitle': simple_title,
1460 'ext': video_extension.decode('utf-8'),
6ba562b0 1461 'format': u'NA',
e616ec0c 1462 'player_url': None,
42bcd27d 1463 })
73f4e7af 1464 except UnavailableVideoError:
09cc744c 1465 self._downloader.trouble(u'\nERROR: unable to download video')
020f7150 1466
25af2bce 1467
4135fa45
WB
1468class DailymotionIE(InfoExtractor):
1469 """Information Extractor for Dailymotion"""
1470
1471 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
4135fa45
WB
1472
1473 def __init__(self, downloader=None):
1474 InfoExtractor.__init__(self, downloader)
1475
1476 @staticmethod
1477 def suitable(url):
1478 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1479
4135fa45
WB
1480 def report_download_webpage(self, video_id):
1481 """Report webpage download."""
331ce0a0 1482 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
d3975459 1483
4135fa45
WB
1484 def report_extraction(self, video_id):
1485 """Report information extraction."""
331ce0a0 1486 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
4135fa45
WB
1487
1488 def _real_initialize(self):
1489 return
1490
4135fa45
WB
1491 def _real_extract(self, url):
1492 # Extract id and simplified title from URL
1493 mobj = re.match(self._VALID_URL, url)
1494 if mobj is None:
1495 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1496 return
1497
df372a65 1498 # At this point we have a new video
9bf7fa52 1499 self._downloader.increment_downloads()
4135fa45
WB
1500 video_id = mobj.group(1)
1501
1502 simple_title = mobj.group(2).decode('utf-8')
1503 video_extension = 'flv'
1504
1505 # Retrieve video webpage to extract further information
1506 request = urllib2.Request(url)
1507 try:
1508 self.report_download_webpage(video_id)
1509 webpage = urllib2.urlopen(request).read()
1510 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1511 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1512 return
1513
1514 # Extract URL, uploader and title from webpage
1515 self.report_extraction(video_id)
1516 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1517 if mobj is None:
1518 self._downloader.trouble(u'ERROR: unable to extract media URL')
1519 return
1520 mediaURL = urllib.unquote(mobj.group(1))
1521
1522 # if needed add http://www.dailymotion.com/ if relative URL
1523
1524 video_url = mediaURL
1525
1526 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1527 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1528 if mobj is None:
1529 self._downloader.trouble(u'ERROR: unable to extract title')
1530 return
1531 video_title = mobj.group(1).decode('utf-8')
1532 video_title = sanitize_title(video_title)
1533
c02d8e40 1534 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
4135fa45
WB
1535 if mobj is None:
1536 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1537 return
1538 video_uploader = mobj.group(1)
1539
1540 try:
1541 # Process video information
1542 self._downloader.process_info({
1543 'id': video_id.decode('utf-8'),
1544 'url': video_url.decode('utf-8'),
1545 'uploader': video_uploader.decode('utf-8'),
138b11f3 1546 'upload_date': u'NA',
4135fa45
WB
1547 'title': video_title,
1548 'stitle': simple_title,
1549 'ext': video_extension.decode('utf-8'),
1550 'format': u'NA',
1551 'player_url': None,
1552 })
73f4e7af 1553 except UnavailableVideoError:
09cc744c 1554 self._downloader.trouble(u'\nERROR: unable to download video')
4135fa45 1555
49c0028a 1556class GoogleIE(InfoExtractor):
1557 """Information extractor for video.google.com."""
1558
490fd7ae 1559 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
49c0028a 1560
1561 def __init__(self, downloader=None):
1562 InfoExtractor.__init__(self, downloader)
1563
1564 @staticmethod
1565 def suitable(url):
1566 return (re.match(GoogleIE._VALID_URL, url) is not None)
1567
1568 def report_download_webpage(self, video_id):
1569 """Report webpage download."""
331ce0a0 1570 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
49c0028a 1571
1572 def report_extraction(self, video_id):
1573 """Report information extraction."""
331ce0a0 1574 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
49c0028a 1575
1576 def _real_initialize(self):
1577 return
1578
1579 def _real_extract(self, url):
1580 # Extract id from URL
1581 mobj = re.match(self._VALID_URL, url)
1582 if mobj is None:
1583 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1584 return
1585
df372a65 1586 # At this point we have a new video
9bf7fa52 1587 self._downloader.increment_downloads()
49c0028a 1588 video_id = mobj.group(1)
1589
1590 video_extension = 'mp4'
1591
1592 # Retrieve video webpage to extract further information
490fd7ae 1593 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
49c0028a 1594 try:
1595 self.report_download_webpage(video_id)
1596 webpage = urllib2.urlopen(request).read()
1597 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1598 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1599 return
1600
1601 # Extract URL, uploader, and title from webpage
1602 self.report_extraction(video_id)
490fd7ae
RG
1603 mobj = re.search(r"download_url:'([^']+)'", webpage)
1604 if mobj is None:
1605 video_extension = 'flv'
1606 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
49c0028a 1607 if mobj is None:
1608 self._downloader.trouble(u'ERROR: unable to extract media URL')
1609 return
1610 mediaURL = urllib.unquote(mobj.group(1))
1611 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1612 mediaURL = mediaURL.replace('\\x26', '\x26')
1613
1614 video_url = mediaURL
1615
1616 mobj = re.search(r'<title>(.*)</title>', webpage)
1617 if mobj is None:
1618 self._downloader.trouble(u'ERROR: unable to extract title')
1619 return
1620 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1621 video_title = sanitize_title(video_title)
31cbdaaf 1622 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1623
7e58d568
RG
1624 # Extract video description
1625 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1626 if mobj is None:
1627 self._downloader.trouble(u'ERROR: unable to extract video description')
1628 return
1629 video_description = mobj.group(1).decode('utf-8')
1630 if not video_description:
1631 video_description = 'No description available.'
1632
1633 # Extract video thumbnail
1634 if self._downloader.params.get('forcethumbnail', False):
1635 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1636 try:
1637 webpage = urllib2.urlopen(request).read()
1638 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1639 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1640 return
1641 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1642 if mobj is None:
1643 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1644 return
1645 video_thumbnail = mobj.group(1)
1646 else: # we need something to pass to process_info
1647 video_thumbnail = ''
1648
1649
49c0028a 1650 try:
1651 # Process video information
1652 self._downloader.process_info({
1653 'id': video_id.decode('utf-8'),
1654 'url': video_url.decode('utf-8'),
6ba562b0 1655 'uploader': u'NA',
138b11f3 1656 'upload_date': u'NA',
490fd7ae 1657 'title': video_title,
31cbdaaf 1658 'stitle': simple_title,
49c0028a 1659 'ext': video_extension.decode('utf-8'),
6ba562b0 1660 'format': u'NA',
e616ec0c 1661 'player_url': None,
49c0028a 1662 })
73f4e7af 1663 except UnavailableVideoError:
09cc744c 1664 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 1665
1666
1667class PhotobucketIE(InfoExtractor):
1668 """Information extractor for photobucket.com."""
1669
1670 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1671
1672 def __init__(self, downloader=None):
1673 InfoExtractor.__init__(self, downloader)
1674
1675 @staticmethod
1676 def suitable(url):
1677 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1678
1679 def report_download_webpage(self, video_id):
1680 """Report webpage download."""
331ce0a0 1681 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
49c0028a 1682
1683 def report_extraction(self, video_id):
1684 """Report information extraction."""
331ce0a0 1685 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
49c0028a 1686
1687 def _real_initialize(self):
1688 return
1689
1690 def _real_extract(self, url):
1691 # Extract id from URL
1692 mobj = re.match(self._VALID_URL, url)
1693 if mobj is None:
1694 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1695 return
1696
df372a65 1697 # At this point we have a new video
9bf7fa52 1698 self._downloader.increment_downloads()
49c0028a 1699 video_id = mobj.group(1)
1700
1701 video_extension = 'flv'
1702
1703 # Retrieve video webpage to extract further information
1704 request = urllib2.Request(url)
1705 try:
1706 self.report_download_webpage(video_id)
1707 webpage = urllib2.urlopen(request).read()
1708 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1709 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1710 return
1711
1712 # Extract URL, uploader, and title from webpage
1713 self.report_extraction(video_id)
1714 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1715 if mobj is None:
1716 self._downloader.trouble(u'ERROR: unable to extract media URL')
1717 return
1718 mediaURL = urllib.unquote(mobj.group(1))
1719
1720 video_url = mediaURL
1721
1722 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1723 if mobj is None:
1724 self._downloader.trouble(u'ERROR: unable to extract title')
1725 return
1726 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1727 video_title = sanitize_title(video_title)
31cbdaaf 1728 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1729
1730 video_uploader = mobj.group(2).decode('utf-8')
1731
1732 try:
1733 # Process video information
1734 self._downloader.process_info({
1735 'id': video_id.decode('utf-8'),
1736 'url': video_url.decode('utf-8'),
490fd7ae 1737 'uploader': video_uploader,
138b11f3 1738 'upload_date': u'NA',
490fd7ae 1739 'title': video_title,
31cbdaaf 1740 'stitle': simple_title,
490fd7ae 1741 'ext': video_extension.decode('utf-8'),
6ba562b0 1742 'format': u'NA',
e616ec0c 1743 'player_url': None,
490fd7ae 1744 })
73f4e7af 1745 except UnavailableVideoError:
09cc744c 1746 self._downloader.trouble(u'\nERROR: unable to download video')
490fd7ae
RG
1747
1748
61945318
RG
1749class YahooIE(InfoExtractor):
1750 """Information extractor for video.yahoo.com."""
1751
1752 # _VALID_URL matches all Yahoo! Video URLs
1753 # _VPAGE_URL matches only the extractable '/watch/' URLs
1754 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1755 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1756
1757 def __init__(self, downloader=None):
1758 InfoExtractor.__init__(self, downloader)
1759
1760 @staticmethod
1761 def suitable(url):
1762 return (re.match(YahooIE._VALID_URL, url) is not None)
1763
1764 def report_download_webpage(self, video_id):
1765 """Report webpage download."""
331ce0a0 1766 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
61945318
RG
1767
1768 def report_extraction(self, video_id):
1769 """Report information extraction."""
331ce0a0 1770 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
61945318
RG
1771
1772 def _real_initialize(self):
1773 return
1774
df372a65 1775 def _real_extract(self, url, new_video=True):
61945318
RG
1776 # Extract ID from URL
1777 mobj = re.match(self._VALID_URL, url)
1778 if mobj is None:
1779 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1780 return
1781
df372a65 1782 # At this point we have a new video
9bf7fa52 1783 self._downloader.increment_downloads()
61945318
RG
1784 video_id = mobj.group(2)
1785 video_extension = 'flv'
1786
1787 # Rewrite valid but non-extractable URLs as
1788 # extractable English language /watch/ URLs
1789 if re.match(self._VPAGE_URL, url) is None:
1790 request = urllib2.Request(url)
1791 try:
1792 webpage = urllib2.urlopen(request).read()
1793 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1794 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1795 return
1796
1797 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1798 if mobj is None:
1799 self._downloader.trouble(u'ERROR: Unable to extract id field')
1800 return
1801 yahoo_id = mobj.group(1)
1802
1803 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1804 if mobj is None:
1805 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1806 return
1807 yahoo_vid = mobj.group(1)
1808
1809 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
df372a65 1810 return self._real_extract(url, new_video=False)
61945318
RG
1811
1812 # Retrieve video webpage to extract further information
1813 request = urllib2.Request(url)
1814 try:
1815 self.report_download_webpage(video_id)
1816 webpage = urllib2.urlopen(request).read()
1817 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1818 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1819 return
1820
1821 # Extract uploader and title from webpage
1822 self.report_extraction(video_id)
1823 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1824 if mobj is None:
1825 self._downloader.trouble(u'ERROR: unable to extract video title')
1826 return
1827 video_title = mobj.group(1).decode('utf-8')
1828 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1829
1830 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1831 if mobj is None:
1832 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1833 return
1834 video_uploader = mobj.group(1).decode('utf-8')
1835
7e58d568
RG
1836 # Extract video thumbnail
1837 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1838 if mobj is None:
1839 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1840 return
1841 video_thumbnail = mobj.group(1).decode('utf-8')
1842
1843 # Extract video description
1844 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1845 if mobj is None:
1846 self._downloader.trouble(u'ERROR: unable to extract video description')
1847 return
1848 video_description = mobj.group(1).decode('utf-8')
1849 if not video_description: video_description = 'No description available.'
1850
61945318
RG
1851 # Extract video height and width
1852 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1853 if mobj is None:
1854 self._downloader.trouble(u'ERROR: unable to extract video height')
1855 return
1856 yv_video_height = mobj.group(1)
1857
1858 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1859 if mobj is None:
1860 self._downloader.trouble(u'ERROR: unable to extract video width')
1861 return
1862 yv_video_width = mobj.group(1)
1863
1864 # Retrieve video playlist to extract media URL
1865 # I'm not completely sure what all these options are, but we
1866 # seem to need most of them, otherwise the server sends a 401.
1867 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1868 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1869 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
7a9054ec
GV
1870 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1871 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
61945318
RG
1872 try:
1873 self.report_download_webpage(video_id)
1874 webpage = urllib2.urlopen(request).read()
1875 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1876 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1877 return
1878
1879 # Extract media URL from playlist XML
1880 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1881 if mobj is None:
1882 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1883 return
1884 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1885 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1886
1887 try:
1888 # Process video information
1889 self._downloader.process_info({
1890 'id': video_id.decode('utf-8'),
1891 'url': video_url,
1892 'uploader': video_uploader,
138b11f3 1893 'upload_date': u'NA',
61945318
RG
1894 'title': video_title,
1895 'stitle': simple_title,
1896 'ext': video_extension.decode('utf-8'),
7e58d568
RG
1897 'thumbnail': video_thumbnail.decode('utf-8'),
1898 'description': video_description,
1899 'thumbnail': video_thumbnail,
1900 'description': video_description,
e616ec0c 1901 'player_url': None,
61945318 1902 })
73f4e7af 1903 except UnavailableVideoError:
09cc744c 1904 self._downloader.trouble(u'\nERROR: unable to download video')
61945318
RG
1905
1906
490fd7ae
RG
1907class GenericIE(InfoExtractor):
1908 """Generic last-resort information extractor."""
1909
1910 def __init__(self, downloader=None):
1911 InfoExtractor.__init__(self, downloader)
1912
1913 @staticmethod
1914 def suitable(url):
1915 return True
1916
1917 def report_download_webpage(self, video_id):
1918 """Report webpage download."""
331ce0a0
RG
1919 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1920 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
490fd7ae
RG
1921
1922 def report_extraction(self, video_id):
1923 """Report information extraction."""
331ce0a0 1924 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
490fd7ae
RG
1925
1926 def _real_initialize(self):
1927 return
1928
1929 def _real_extract(self, url):
df372a65 1930 # At this point we have a new video
9bf7fa52 1931 self._downloader.increment_downloads()
df372a65 1932
490fd7ae
RG
1933 video_id = url.split('/')[-1]
1934 request = urllib2.Request(url)
1935 try:
1936 self.report_download_webpage(video_id)
1937 webpage = urllib2.urlopen(request).read()
1938 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1939 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1940 return
1941 except ValueError, err:
1942 # since this is the last-resort InfoExtractor, if
1943 # this error is thrown, it'll be thrown here
1944 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1945 return
1946
a9806fd8 1947 self.report_extraction(video_id)
490fd7ae
RG
1948 # Start with something easy: JW Player in SWFObject
1949 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1950 if mobj is None:
1951 # Broaden the search a little bit
1952 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1953 if mobj is None:
1954 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1955 return
1956
1957 # It's possible that one of the regexes
1958 # matched, but returned an empty group:
1959 if mobj.group(1) is None:
1960 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1961 return
1962
1963 video_url = urllib.unquote(mobj.group(1))
1964 video_id = os.path.basename(video_url)
1965
1966 # here's a fun little line of code for you:
1967 video_extension = os.path.splitext(video_id)[1][1:]
1968 video_id = os.path.splitext(video_id)[0]
1969
1970 # it's tempting to parse this further, but you would
1971 # have to take into account all the variations like
1972 # Video Title - Site Name
1973 # Site Name | Video Title
1974 # Video Title - Tagline | Site Name
1975 # and so on and so forth; it's just not practical
1976 mobj = re.search(r'<title>(.*)</title>', webpage)
1977 if mobj is None:
1978 self._downloader.trouble(u'ERROR: unable to extract title')
1979 return
1980 video_title = mobj.group(1).decode('utf-8')
1981 video_title = sanitize_title(video_title)
31cbdaaf 1982 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
490fd7ae
RG
1983
1984 # video uploader is domain name
1985 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1986 if mobj is None:
1987 self._downloader.trouble(u'ERROR: unable to extract title')
1988 return
1989 video_uploader = mobj.group(1).decode('utf-8')
1990
1991 try:
1992 # Process video information
1993 self._downloader.process_info({
1994 'id': video_id.decode('utf-8'),
1995 'url': video_url.decode('utf-8'),
1996 'uploader': video_uploader,
138b11f3 1997 'upload_date': u'NA',
490fd7ae 1998 'title': video_title,
31cbdaaf 1999 'stitle': simple_title,
49c0028a 2000 'ext': video_extension.decode('utf-8'),
6ba562b0 2001 'format': u'NA',
e616ec0c 2002 'player_url': None,
49c0028a 2003 })
73f4e7af 2004 except UnavailableVideoError, err:
09cc744c 2005 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 2006
2007
25af2bce
RG
2008class YoutubeSearchIE(InfoExtractor):
2009 """Information Extractor for YouTube search queries."""
2010 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
2011 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2012 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
304a4d85 2013 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
25af2bce 2014 _youtube_ie = None
fd9288c3 2015 _max_youtube_results = 1000
25af2bce 2016
f995f712 2017 def __init__(self, youtube_ie, downloader=None):
25af2bce
RG
2018 InfoExtractor.__init__(self, downloader)
2019 self._youtube_ie = youtube_ie
d3975459 2020
25af2bce
RG
2021 @staticmethod
2022 def suitable(url):
2023 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
2024
2025 def report_download_page(self, query, pagenum):
2026 """Report attempt to download playlist page with given number."""
490fd7ae 2027 query = query.decode(preferredencoding())
331ce0a0 2028 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
25af2bce
RG
2029
2030 def _real_initialize(self):
2031 self._youtube_ie.initialize()
d3975459 2032
25af2bce
RG
2033 def _real_extract(self, query):
2034 mobj = re.match(self._VALID_QUERY, query)
2035 if mobj is None:
147753eb 2036 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
6f21f686 2037 return
25af2bce
RG
2038
2039 prefix, query = query.split(':')
2040 prefix = prefix[8:]
490fd7ae 2041 query = query.encode('utf-8')
f995f712 2042 if prefix == '':
6f21f686
RG
2043 self._download_n_results(query, 1)
2044 return
f995f712 2045 elif prefix == 'all':
6f21f686
RG
2046 self._download_n_results(query, self._max_youtube_results)
2047 return
f995f712 2048 else:
25af2bce 2049 try:
e1f18b8a 2050 n = long(prefix)
25af2bce 2051 if n <= 0:
147753eb 2052 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
6f21f686 2053 return
257453b9 2054 elif n > self._max_youtube_results:
6f21f686 2055 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
257453b9 2056 n = self._max_youtube_results
6f21f686
RG
2057 self._download_n_results(query, n)
2058 return
e1f18b8a 2059 except ValueError: # parsing prefix as integer fails
6f21f686
RG
2060 self._download_n_results(query, 1)
2061 return
25af2bce
RG
2062
2063 def _download_n_results(self, query, n):
2064 """Downloads a specified number of results for a query"""
2065
2066 video_ids = []
2067 already_seen = set()
2068 pagenum = 1
2069
2070 while True:
2071 self.report_download_page(query, pagenum)
a9633f14 2072 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2073 request = urllib2.Request(result_url)
25af2bce
RG
2074 try:
2075 page = urllib2.urlopen(request).read()
2076 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2077 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2078 return
25af2bce
RG
2079
2080 # Extract video identifiers
2081 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2082 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2083 if video_id not in already_seen:
2084 video_ids.append(video_id)
2085 already_seen.add(video_id)
2086 if len(video_ids) == n:
2087 # Specified n videos reached
25af2bce 2088 for id in video_ids:
6f21f686
RG
2089 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2090 return
25af2bce 2091
304a4d85 2092 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
25af2bce 2093 for id in video_ids:
6f21f686
RG
2094 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2095 return
25af2bce
RG
2096
2097 pagenum = pagenum + 1
2098
7e58d568
RG
2099class GoogleSearchIE(InfoExtractor):
2100 """Information Extractor for Google Video search queries."""
2101 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
2102 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2103 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2104 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2105 _google_ie = None
2106 _max_google_results = 1000
2107
2108 def __init__(self, google_ie, downloader=None):
2109 InfoExtractor.__init__(self, downloader)
2110 self._google_ie = google_ie
d3975459 2111
7e58d568
RG
2112 @staticmethod
2113 def suitable(url):
2114 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
2115
2116 def report_download_page(self, query, pagenum):
2117 """Report attempt to download playlist page with given number."""
2118 query = query.decode(preferredencoding())
331ce0a0 2119 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2120
2121 def _real_initialize(self):
2122 self._google_ie.initialize()
d3975459 2123
7e58d568
RG
2124 def _real_extract(self, query):
2125 mobj = re.match(self._VALID_QUERY, query)
2126 if mobj is None:
2127 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2128 return
2129
2130 prefix, query = query.split(':')
2131 prefix = prefix[8:]
2132 query = query.encode('utf-8')
2133 if prefix == '':
2134 self._download_n_results(query, 1)
2135 return
2136 elif prefix == 'all':
2137 self._download_n_results(query, self._max_google_results)
2138 return
2139 else:
2140 try:
2141 n = long(prefix)
2142 if n <= 0:
2143 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2144 return
2145 elif n > self._max_google_results:
2146 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
2147 n = self._max_google_results
2148 self._download_n_results(query, n)
2149 return
2150 except ValueError: # parsing prefix as integer fails
2151 self._download_n_results(query, 1)
2152 return
2153
2154 def _download_n_results(self, query, n):
2155 """Downloads a specified number of results for a query"""
2156
2157 video_ids = []
2158 already_seen = set()
2159 pagenum = 1
2160
2161 while True:
2162 self.report_download_page(query, pagenum)
2163 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2164 request = urllib2.Request(result_url)
7e58d568
RG
2165 try:
2166 page = urllib2.urlopen(request).read()
2167 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2168 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2169 return
2170
2171 # Extract video identifiers
2172 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2173 video_id = mobj.group(1)
2174 if video_id not in already_seen:
2175 video_ids.append(video_id)
2176 already_seen.add(video_id)
2177 if len(video_ids) == n:
2178 # Specified n videos reached
2179 for id in video_ids:
2180 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2181 return
2182
2183 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2184 for id in video_ids:
2185 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2186 return
2187
2188 pagenum = pagenum + 1
2189
2190class YahooSearchIE(InfoExtractor):
2191 """Information Extractor for Yahoo! Video search queries."""
2192 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2193 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2194 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2195 _MORE_PAGES_INDICATOR = r'\s*Next'
2196 _yahoo_ie = None
2197 _max_yahoo_results = 1000
2198
2199 def __init__(self, yahoo_ie, downloader=None):
2200 InfoExtractor.__init__(self, downloader)
2201 self._yahoo_ie = yahoo_ie
d3975459 2202
7e58d568
RG
2203 @staticmethod
2204 def suitable(url):
2205 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2206
2207 def report_download_page(self, query, pagenum):
2208 """Report attempt to download playlist page with given number."""
2209 query = query.decode(preferredencoding())
331ce0a0 2210 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2211
2212 def _real_initialize(self):
2213 self._yahoo_ie.initialize()
d3975459 2214
7e58d568
RG
2215 def _real_extract(self, query):
2216 mobj = re.match(self._VALID_QUERY, query)
2217 if mobj is None:
2218 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2219 return
2220
2221 prefix, query = query.split(':')
2222 prefix = prefix[8:]
2223 query = query.encode('utf-8')
2224 if prefix == '':
2225 self._download_n_results(query, 1)
2226 return
2227 elif prefix == 'all':
2228 self._download_n_results(query, self._max_yahoo_results)
2229 return
2230 else:
2231 try:
2232 n = long(prefix)
2233 if n <= 0:
2234 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2235 return
2236 elif n > self._max_yahoo_results:
2237 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2238 n = self._max_yahoo_results
2239 self._download_n_results(query, n)
2240 return
2241 except ValueError: # parsing prefix as integer fails
2242 self._download_n_results(query, 1)
2243 return
2244
2245 def _download_n_results(self, query, n):
2246 """Downloads a specified number of results for a query"""
2247
2248 video_ids = []
2249 already_seen = set()
2250 pagenum = 1
2251
2252 while True:
2253 self.report_download_page(query, pagenum)
2254 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2255 request = urllib2.Request(result_url)
7e58d568
RG
2256 try:
2257 page = urllib2.urlopen(request).read()
2258 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2259 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2260 return
2261
2262 # Extract video identifiers
2263 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2264 video_id = mobj.group(1)
2265 if video_id not in already_seen:
2266 video_ids.append(video_id)
2267 already_seen.add(video_id)
2268 if len(video_ids) == n:
2269 # Specified n videos reached
2270 for id in video_ids:
2271 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2272 return
2273
2274 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2275 for id in video_ids:
2276 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2277 return
2278
2279 pagenum = pagenum + 1
2280
0c2dc87d
RG
2281class YoutubePlaylistIE(InfoExtractor):
2282 """Information Extractor for YouTube playlists."""
2283
d119b54d 2284 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
f74e22ae 2285 _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
0c2dc87d 2286 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
ce5cafea 2287 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
0c2dc87d
RG
2288 _youtube_ie = None
2289
2290 def __init__(self, youtube_ie, downloader=None):
2291 InfoExtractor.__init__(self, downloader)
2292 self._youtube_ie = youtube_ie
d3975459 2293
0c2dc87d
RG
2294 @staticmethod
2295 def suitable(url):
2296 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2297
2298 def report_download_page(self, playlist_id, pagenum):
2299 """Report attempt to download playlist page with given number."""
331ce0a0 2300 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
0c2dc87d
RG
2301
2302 def _real_initialize(self):
2303 self._youtube_ie.initialize()
d3975459 2304
0c2dc87d
RG
2305 def _real_extract(self, url):
2306 # Extract playlist id
2307 mobj = re.match(self._VALID_URL, url)
2308 if mobj is None:
147753eb 2309 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
6f21f686 2310 return
0c2dc87d 2311
d119b54d
RG
2312 # Single video case
2313 if mobj.group(3) is not None:
2314 self._youtube_ie.extract(mobj.group(3))
2315 return
2316
0c2dc87d 2317 # Download playlist pages
f74e22ae
GI
2318 # prefix is 'p' as default for playlists but there are other types that need extra care
2319 playlist_prefix = mobj.group(1)
2320 if playlist_prefix == 'a':
2321 playlist_access = 'artist'
2322 else:
7cc3c6fd 2323 playlist_prefix = 'p'
f74e22ae
GI
2324 playlist_access = 'view_play_list'
2325 playlist_id = mobj.group(2)
0c2dc87d
RG
2326 video_ids = []
2327 pagenum = 1
2328
2329 while True:
2330 self.report_download_page(playlist_id, pagenum)
f74e22ae 2331 request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
0c2dc87d
RG
2332 try:
2333 page = urllib2.urlopen(request).read()
2334 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2335 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2336 return
0c2dc87d
RG
2337
2338 # Extract video identifiers
27d98b6e 2339 ids_in_page = []
0c2dc87d 2340 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
27d98b6e
RG
2341 if mobj.group(1) not in ids_in_page:
2342 ids_in_page.append(mobj.group(1))
2343 video_ids.extend(ids_in_page)
0c2dc87d 2344
ce5cafea 2345 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
0c2dc87d
RG
2346 break
2347 pagenum = pagenum + 1
2348
8cc44341
RG
2349 playliststart = self._downloader.params.get('playliststart', 1) - 1
2350 playlistend = self._downloader.params.get('playlistend', -1)
2351 video_ids = video_ids[playliststart:playlistend]
2352
0c2dc87d 2353 for id in video_ids:
6f21f686
RG
2354 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2355 return
0c2dc87d 2356
c39c05cd
A
2357class YoutubeUserIE(InfoExtractor):
2358 """Information Extractor for YouTube users."""
2359
5aba6ea4 2360 _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
c39c05cd 2361 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
5aba6ea4
RG
2362 _GDATA_PAGE_SIZE = 50
2363 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2364 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
c39c05cd
A
2365 _youtube_ie = None
2366
2367 def __init__(self, youtube_ie, downloader=None):
2368 InfoExtractor.__init__(self, downloader)
2369 self._youtube_ie = youtube_ie
d3975459 2370
c39c05cd
A
2371 @staticmethod
2372 def suitable(url):
2373 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2374
5aba6ea4 2375 def report_download_page(self, username, start_index):
c39c05cd 2376 """Report attempt to download user page."""
5aba6ea4
RG
2377 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2378 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
c39c05cd
A
2379
2380 def _real_initialize(self):
2381 self._youtube_ie.initialize()
d3975459 2382
c39c05cd
A
2383 def _real_extract(self, url):
2384 # Extract username
2385 mobj = re.match(self._VALID_URL, url)
2386 if mobj is None:
2387 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2388 return
2389
c39c05cd 2390 username = mobj.group(1)
5aba6ea4
RG
2391
2392 # Download video ids using YouTube Data API. Result size per
2393 # query is limited (currently to 50 videos) so we need to query
2394 # page by page until there are no video ids - it means we got
2395 # all of them.
2396
c39c05cd 2397 video_ids = []
5aba6ea4 2398 pagenum = 0
c39c05cd 2399
5aba6ea4
RG
2400 while True:
2401 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2402 self.report_download_page(username, start_index)
c39c05cd 2403
5aba6ea4 2404 request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
c39c05cd 2405
5aba6ea4
RG
2406 try:
2407 page = urllib2.urlopen(request).read()
2408 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2409 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2410 return
c39c05cd 2411
5aba6ea4
RG
2412 # Extract video identifiers
2413 ids_in_page = []
2414
2415 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2416 if mobj.group(1) not in ids_in_page:
2417 ids_in_page.append(mobj.group(1))
2418
2419 video_ids.extend(ids_in_page)
2420
2421 # A little optimization - if current page is not
2422 # "full", ie. does not contain PAGE_SIZE video ids then
2423 # we can assume that this page is the last one - there
2424 # are no more ids on further pages - no need to query
2425 # again.
2426
2427 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2428 break
2429
2430 pagenum += 1
2431
2432 all_ids_count = len(video_ids)
8cc44341
RG
2433 playliststart = self._downloader.params.get('playliststart', 1) - 1
2434 playlistend = self._downloader.params.get('playlistend', -1)
204c9398 2435
5aba6ea4
RG
2436 if playlistend == -1:
2437 video_ids = video_ids[playliststart:]
2438 else:
2439 video_ids = video_ids[playliststart:playlistend]
7a9054ec 2440
5aba6ea4 2441 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
7a9054ec 2442 (username, all_ids_count, len(video_ids)))
5aba6ea4
RG
2443
2444 for video_id in video_ids:
2445 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2446
c39c05cd 2447
27179cfd
VV
2448class DepositFilesIE(InfoExtractor):
2449 """Information extractor for depositfiles.com"""
2450
2451 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2452
2453 def __init__(self, downloader=None):
2454 InfoExtractor.__init__(self, downloader)
2455
2456 @staticmethod
2457 def suitable(url):
2458 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2459
2460 def report_download_webpage(self, file_id):
2461 """Report webpage download."""
2462 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2463
2464 def report_extraction(self, file_id):
2465 """Report information extraction."""
2466 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2467
2468 def _real_initialize(self):
2469 return
2470
2471 def _real_extract(self, url):
2472 # At this point we have a new file
2473 self._downloader.increment_downloads()
2474
2475 file_id = url.split('/')[-1]
2476 # Rebuild url in english locale
2477 url = 'http://depositfiles.com/en/files/' + file_id
2478
2479 # Retrieve file webpage with 'Free download' button pressed
2480 free_download_indication = { 'gateway_result' : '1' }
1987c232 2481 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
27179cfd
VV
2482 try:
2483 self.report_download_webpage(file_id)
2484 webpage = urllib2.urlopen(request).read()
2485 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2486 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2487 return
2488
2489 # Search for the real file URL
2490 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2491 if (mobj is None) or (mobj.group(1) is None):
2492 # Try to figure out reason of the error.
2493 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2494 if (mobj is not None) and (mobj.group(1) is not None):
2495 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2496 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2497 else:
2498 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2499 return
2500
2501 file_url = mobj.group(1)
2502 file_extension = os.path.splitext(file_url)[1][1:]
2503
2504 # Search for file title
2505 mobj = re.search(r'<b title="(.*?)">', webpage)
2506 if mobj is None:
2507 self._downloader.trouble(u'ERROR: unable to extract title')
2508 return
2509 file_title = mobj.group(1).decode('utf-8')
2510
2511 try:
2512 # Process file information
2513 self._downloader.process_info({
2514 'id': file_id.decode('utf-8'),
2515 'url': file_url.decode('utf-8'),
2516 'uploader': u'NA',
2517 'upload_date': u'NA',
2518 'title': file_title,
2519 'stitle': file_title,
2520 'ext': file_extension.decode('utf-8'),
2521 'format': u'NA',
2522 'player_url': None,
2523 })
2524 except UnavailableVideoError, err:
2525 self._downloader.trouble(u'ERROR: unable to download file')
2526
9f5f9602
GI
2527class FacebookIE(InfoExtractor):
2528 """Information Extractor for Facebook"""
2529
2530 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2531 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2532 _NETRC_MACHINE = 'facebook'
2533 _available_formats = ['highqual', 'lowqual']
2534 _video_extensions = {
2535 'highqual': 'mp4',
2536 'lowqual': 'mp4',
2537 }
2538
2539 def __init__(self, downloader=None):
2540 InfoExtractor.__init__(self, downloader)
2541
2542 @staticmethod
2543 def suitable(url):
2544 return (re.match(FacebookIE._VALID_URL, url) is not None)
2545
2546 def _reporter(self, message):
2547 """Add header and report message."""
2548 self._downloader.to_screen(u'[facebook] %s' % message)
2549
2550 def report_login(self):
2551 """Report attempt to log in."""
2552 self._reporter(u'Logging in')
2553
2554 def report_video_webpage_download(self, video_id):
2555 """Report attempt to download video webpage."""
2556 self._reporter(u'%s: Downloading video webpage' % video_id)
2557
2558 def report_information_extraction(self, video_id):
2559 """Report attempt to extract video information."""
2560 self._reporter(u'%s: Extracting video information' % video_id)
2561
2562 def _parse_page(self, video_webpage):
2563 """Extract video information from page"""
2564 # General data
2565 data = {'title': r'class="video_title datawrap">(.*?)</',
2566 'description': r'<div class="datawrap">(.*?)</div>',
2567 'owner': r'\("video_owner_name", "(.*?)"\)',
2568 'upload_date': r'data-date="(.*?)"',
2569 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2570 }
2571 video_info = {}
2572 for piece in data.keys():
2573 mobj = re.search(data[piece], video_webpage)
2574 if mobj is not None:
2575 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2576
2577 # Video urls
2578 video_urls = {}
2579 for fmt in self._available_formats:
2580 mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2581 if mobj is not None:
2582 # URL is in a Javascript segment inside an escaped Unicode format within
2583 # the generally utf-8 page
2584 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2585 video_info['video_urls'] = video_urls
2586
2587 return video_info
2588
2589 def _real_initialize(self):
2590 if self._downloader is None:
2591 return
2592
2593 useremail = None
2594 password = None
2595 downloader_params = self._downloader.params
2596
2597 # Attempt to use provided username and password or .netrc data
2598 if downloader_params.get('username', None) is not None:
2599 useremail = downloader_params['username']
2600 password = downloader_params['password']
2601 elif downloader_params.get('usenetrc', False):
2602 try:
2603 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2604 if info is not None:
2605 useremail = info[0]
2606 password = info[2]
2607 else:
2608 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2609 except (IOError, netrc.NetrcParseError), err:
2610 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2611 return
2612
2613 if useremail is None:
2614 return
2615
2616 # Log in
2617 login_form = {
2618 'email': useremail,
2619 'pass': password,
2620 'login': 'Log+In'
2621 }
2622 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2623 try:
2624 self.report_login()
2625 login_results = urllib2.urlopen(request).read()
2626 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2627 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2628 return
2629 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2630 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2631 return
2632
2633 def _real_extract(self, url):
2634 mobj = re.match(self._VALID_URL, url)
2635 if mobj is None:
2636 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2637 return
2638 video_id = mobj.group('ID')
2639
2640 # Get video webpage
2641 self.report_video_webpage_download(video_id)
2642 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2643 try:
2644 page = urllib2.urlopen(request)
2645 video_webpage = page.read()
2646 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2647 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2648 return
2649
2650 # Start extracting information
2651 self.report_information_extraction(video_id)
2652
2653 # Extract information
2654 video_info = self._parse_page(video_webpage)
2655
2656 # uploader
2657 if 'owner' not in video_info:
2658 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2659 return
2660 video_uploader = video_info['owner']
2661
2662 # title
2663 if 'title' not in video_info:
2664 self._downloader.trouble(u'ERROR: unable to extract video title')
2665 return
2666 video_title = video_info['title']
2667 video_title = video_title.decode('utf-8')
2668 video_title = sanitize_title(video_title)
2669
2670 # simplified title
2671 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2672 simple_title = simple_title.strip(ur'_')
2673
2674 # thumbnail image
2675 if 'thumbnail' not in video_info:
2676 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2677 video_thumbnail = ''
2678 else:
2679 video_thumbnail = video_info['thumbnail']
2680
2681 # upload date
2682 upload_date = u'NA'
2683 if 'upload_date' in video_info:
2684 upload_time = video_info['upload_date']
2685 timetuple = email.utils.parsedate_tz(upload_time)
2686 if timetuple is not None:
2687 try:
2688 upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2689 except:
2690 pass
2691
2692 # description
8b95c387 2693 video_description = video_info.get('description', 'No description available.')
9f5f9602
GI
2694
2695 url_map = video_info['video_urls']
2696 if len(url_map.keys()) > 0:
2697 # Decide which formats to download
2698 req_format = self._downloader.params.get('format', None)
2699 format_limit = self._downloader.params.get('format_limit', None)
2700
2701 if format_limit is not None and format_limit in self._available_formats:
2702 format_list = self._available_formats[self._available_formats.index(format_limit):]
2703 else:
2704 format_list = self._available_formats
2705 existing_formats = [x for x in format_list if x in url_map]
2706 if len(existing_formats) == 0:
2707 self._downloader.trouble(u'ERROR: no known formats available for video')
2708 return
2709 if req_format is None:
2710 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2711 elif req_format == '-1':
2712 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2713 else:
2714 # Specific format
2715 if req_format not in url_map:
2716 self._downloader.trouble(u'ERROR: requested format not available')
2717 return
2718 video_url_list = [(req_format, url_map[req_format])] # Specific format
2719
2720 for format_param, video_real_url in video_url_list:
2721
2722 # At this point we have a new video
2723 self._downloader.increment_downloads()
2724
2725 # Extension
2726 video_extension = self._video_extensions.get(format_param, 'mp4')
2727
9f5f9602
GI
2728 try:
2729 # Process video information
2730 self._downloader.process_info({
2731 'id': video_id.decode('utf-8'),
2732 'url': video_real_url.decode('utf-8'),
2733 'uploader': video_uploader.decode('utf-8'),
2734 'upload_date': upload_date,
2735 'title': video_title,
2736 'stitle': simple_title,
2737 'ext': video_extension.decode('utf-8'),
2738 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2739 'thumbnail': video_thumbnail.decode('utf-8'),
2740 'description': video_description.decode('utf-8'),
2741 'player_url': None,
2742 })
2743 except UnavailableVideoError, err:
2744 self._downloader.trouble(u'\nERROR: unable to download video')
2745
7745f5d8
PH
2746class BlipTVIE(InfoExtractor):
2747 """Information extractor for blip.tv"""
2748
1cab2c6d 2749 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
7745f5d8
PH
2750 _URL_EXT = r'^.*\.([a-z0-9]+)$'
2751
2752 @staticmethod
2753 def suitable(url):
2754 return (re.match(BlipTVIE._VALID_URL, url) is not None)
2755
7745f5d8
PH
2756 def report_extraction(self, file_id):
2757 """Report information extraction."""
aded78d9 2758 self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
7745f5d8
PH
2759
2760 def _simplify_title(self, title):
2761 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
2762 res = res.strip(ur'_')
2763 return res
2764
2765 def _real_extract(self, url):
2766 mobj = re.match(self._VALID_URL, url)
2767 if mobj is None:
2768 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2769 return
2770
1293ce58
PH
2771 if '?' in url:
2772 cchar = '&'
2773 else:
2774 cchar = '?'
2775 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
7745f5d8 2776 request = urllib2.Request(json_url)
aded78d9 2777 self.report_extraction(mobj.group(1))
7745f5d8
PH
2778 try:
2779 json_code = urllib2.urlopen(request).read()
2780 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2781 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2782 return
2783 try:
2784 json_data = json.loads(json_code)
1293ce58
PH
2785 if 'Post' in json_data:
2786 data = json_data['Post']
2787 else:
2788 data = json_data
7745f5d8
PH
2789
2790 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2791 video_url = data['media']['url']
2792 umobj = re.match(self._URL_EXT, video_url)
2793 if umobj is None:
2794 raise ValueError('Can not determine filename extension')
2795 ext = umobj.group(1)
2796
a1cab7ce
PH
2797 self._downloader.increment_downloads()
2798
7745f5d8
PH
2799 info = {
2800 'id': data['item_id'],
2801 'url': video_url,
2802 'uploader': data['display_name'],
2803 'upload_date': upload_date,
2804 'title': data['title'],
2805 'stitle': self._simplify_title(data['title']),
2806 'ext': ext,
2807 'format': data['media']['mimeType'],
2808 'thumbnail': data['thumbnailUrl'],
2809 'description': data['description'],
2810 'player_url': data['embedUrl']
2811 }
2812 except (ValueError,KeyError), err:
aded78d9 2813 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
7745f5d8
PH
2814 return
2815
2816 try:
2817 self._downloader.process_info(info)
2818 except UnavailableVideoError, err:
2819 self._downloader.trouble(u'\nERROR: unable to download video')
2820
2821
65cd34c5
RG
2822class PostProcessor(object):
2823 """Post Processor class.
2824
2825 PostProcessor objects can be added to downloaders with their
2826 add_post_processor() method. When the downloader has finished a
2827 successful download, it will take its internal chain of PostProcessors
2828 and start calling the run() method on each one of them, first with
2829 an initial argument and then with the returned value of the previous
2830 PostProcessor.
2831
2832 The chain will be stopped if one of them ever returns None or the end
2833 of the chain is reached.
2834
2835 PostProcessor objects follow a "mutual registration" process similar
2836 to InfoExtractor objects.
2837 """
2838
2839 _downloader = None
2840
2841 def __init__(self, downloader=None):
2842 self._downloader = downloader
2843
65cd34c5
RG
2844 def set_downloader(self, downloader):
2845 """Sets the downloader for this PP."""
2846 self._downloader = downloader
d3975459 2847
65cd34c5
RG
2848 def run(self, information):
2849 """Run the PostProcessor.
2850
2851 The "information" argument is a dictionary like the ones
2f11508a 2852 composed by InfoExtractors. The only difference is that this
65cd34c5
RG
2853 one has an extra field called "filepath" that points to the
2854 downloaded file.
2855
2856 When this method returns None, the postprocessing chain is
2857 stopped. However, this method may return an information
2858 dictionary that will be passed to the next postprocessing
2859 object in the chain. It can be the one it received after
2860 changing some fields.
2861
2862 In addition, this method may raise a PostProcessingError
2863 exception that will be taken into account by the downloader
2864 it was called from.
2865 """
2866 return information # by default, do nothing
d3975459 2867
3072fab1
RG
2868class FFmpegExtractAudioPP(PostProcessor):
2869
2870 def __init__(self, downloader=None, preferredcodec=None):
2871 PostProcessor.__init__(self, downloader)
2872 if preferredcodec is None:
2873 preferredcodec = 'best'
2874 self._preferredcodec = preferredcodec
2875
2876 @staticmethod
2877 def get_audio_codec(path):
da273188 2878 try:
2727dbf7
RG
2879 cmd = ['ffprobe', '-show_streams', '--', path]
2880 handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
da273188
RG
2881 output = handle.communicate()[0]
2882 if handle.wait() != 0:
2883 return None
2884 except (IOError, OSError):
3072fab1
RG
2885 return None
2886 audio_codec = None
2887 for line in output.split('\n'):
2888 if line.startswith('codec_name='):
2889 audio_codec = line.split('=')[1].strip()
2890 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
2891 return audio_codec
2892 return None
2893
2894 @staticmethod
2895 def run_ffmpeg(path, out_path, codec, more_opts):
2896 try:
2727dbf7
RG
2897 cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
2898 ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
3072fab1
RG
2899 return (ret == 0)
2900 except (IOError, OSError):
2901 return False
2902
2903 def run(self, information):
2904 path = information['filepath']
2905
2906 filecodec = self.get_audio_codec(path)
2907 if filecodec is None:
da273188 2908 self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
3072fab1
RG
2909 return None
2910
2911 more_opts = []
2912 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
2913 if filecodec == 'aac' or filecodec == 'mp3':
2914 # Lossless if possible
2915 acodec = 'copy'
2916 extension = filecodec
2917 if filecodec == 'aac':
2918 more_opts = ['-f', 'adts']
2919 else:
2920 # MP3 otherwise.
2921 acodec = 'libmp3lame'
2922 extension = 'mp3'
2923 more_opts = ['-ab', '128k']
2924 else:
2925 # We convert the audio (lossy)
2926 acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
2927 extension = self._preferredcodec
2928 more_opts = ['-ab', '128k']
2929 if self._preferredcodec == 'aac':
2930 more_opts += ['-f', 'adts']
2931
2932 (prefix, ext) = os.path.splitext(path)
2933 new_path = prefix + '.' + extension
2934 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
2935 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
2936
2937 if not status:
1bd92582 2938 self._downloader.to_stderr(u'WARNING: error running ffmpeg')
3072fab1
RG
2939 return None
2940
2941 try:
2942 os.remove(path)
2943 except (IOError, OSError):
2944 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
2945 return None
2946
2947 information['filepath'] = new_path
2948 return information
2949
5fb3df4a
GV
2950
2951def updateSelf(downloader, filename):
2952 ''' Update the program file with the latest version from the repository '''
2953 # Note: downloader only used for options
2954 if not os.access(filename, os.W_OK):
2955 sys.exit('ERROR: no write permissions on %s' % filename)
2956
2957 downloader.to_screen('Updating to latest stable version...')
2958
4fa74b52 2959 try:
5fb3df4a
GV
2960 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2961 latest_version = urllib.urlopen(latest_url).read().strip()
2962 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2963 newcontent = urllib.urlopen(prog_url).read()
2964 except (IOError, OSError), err:
2965 sys.exit('ERROR: unable to download latest version')
f9f1e798 2966
5fb3df4a
GV
2967 try:
2968 stream = open(filename, 'w')
2969 stream.write(newcontent)
2970 stream.close()
2971 except (IOError, OSError), err:
2972 sys.exit('ERROR: unable to overwrite current version')
4bec29ef 2973
5fb3df4a 2974 downloader.to_screen('Updated to version %s' % latest_version)
80066952 2975
4f9f96f6
GV
2976def parseOpts():
2977 # Deferred imports
2978 import getpass
2979 import optparse
e7cf18cb 2980
4f9f96f6
GV
2981 def _format_option_string(option):
2982 ''' ('-o', '--option') -> -o, --format METAVAR'''
80066952 2983
4f9f96f6
GV
2984 opts = []
2985
2986 if option._short_opts: opts.append(option._short_opts[0])
2987 if option._long_opts: opts.append(option._long_opts[0])
2988 if len(opts) > 1: opts.insert(1, ', ')
2989
2990 if option.takes_value(): opts.append(' %s' % option.metavar)
2991
2992 return "".join(opts)
2993
6a4f0a11
GV
2994 def _find_term_columns():
2995 columns = os.environ.get('COLUMNS', None)
2c8d32de
PH
2996 if columns:
2997 return int(columns)
2998
4f2a5e06
PH
2999 try:
3000 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3001 out,err = sp.communicate()
eb0387a8 3002 return int(out.split()[1])
4f2a5e06
PH
3003 except:
3004 pass
2c8d32de 3005 return None
6a4f0a11 3006
51c8e53f
GV
3007 max_width = 80
3008 max_help_position = 80
3009
3010 # No need to wrap help messages if we're on a wide console
6a4f0a11 3011 columns = _find_term_columns()
51c8e53f
GV
3012 if columns: max_width = columns
3013
3014 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
4f9f96f6
GV
3015 fmt.format_option_strings = _format_option_string
3016
3017 kw = {
3018 'version' : __version__,
3019 'formatter' : fmt,
2c8d32de 3020 'usage' : '%prog [options] url...',
4f9f96f6
GV
3021 'conflict_handler' : 'resolve',
3022 }
3023
3024 parser = optparse.OptionParser(**kw)
3025
3026 # option groups
3027 general = optparse.OptionGroup(parser, 'General Options')
3028 authentication = optparse.OptionGroup(parser, 'Authentication Options')
3029 video_format = optparse.OptionGroup(parser, 'Video Format Options')
3030 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
3031 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
3032 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
3033
3034 general.add_option('-h', '--help',
3035 action='help', help='print this help text and exit')
3036 general.add_option('-v', '--version',
3037 action='version', help='print program version and exit')
3038 general.add_option('-U', '--update',
3039 action='store_true', dest='update_self', help='update this program to latest stable version')
3040 general.add_option('-i', '--ignore-errors',
3041 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
3042 general.add_option('-r', '--rate-limit',
3043 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
3044 general.add_option('-R', '--retries',
3045 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
3046 general.add_option('--playlist-start',
3047 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
3048 general.add_option('--playlist-end',
3049 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
3050 general.add_option('--dump-user-agent',
3051 action='store_true', dest='dump_user_agent',
3052 help='display the current browser identification', default=False)
3053
3054 authentication.add_option('-u', '--username',
3055 dest='username', metavar='USERNAME', help='account username')
3056 authentication.add_option('-p', '--password',
3057 dest='password', metavar='PASSWORD', help='account password')
3058 authentication.add_option('-n', '--netrc',
3059 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
3060
3061
3062 video_format.add_option('-f', '--format',
3063 action='store', dest='format', metavar='FORMAT', help='video format code')
3064 video_format.add_option('--all-formats',
3065 action='store_const', dest='format', help='download all available video formats', const='-1')
3066 video_format.add_option('--max-quality',
3067 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
3068
3069
3070 verbosity.add_option('-q', '--quiet',
3071 action='store_true', dest='quiet', help='activates quiet mode', default=False)
3072 verbosity.add_option('-s', '--simulate',
3073 action='store_true', dest='simulate', help='do not download video', default=False)
3074 verbosity.add_option('-g', '--get-url',
3075 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
3076 verbosity.add_option('-e', '--get-title',
3077 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
3078 verbosity.add_option('--get-thumbnail',
3079 action='store_true', dest='getthumbnail',
3080 help='simulate, quiet but print thumbnail URL', default=False)
3081 verbosity.add_option('--get-description',
3082 action='store_true', dest='getdescription',
3083 help='simulate, quiet but print video description', default=False)
3084 verbosity.add_option('--get-filename',
3085 action='store_true', dest='getfilename',
3086 help='simulate, quiet but print output filename', default=False)
3087 verbosity.add_option('--no-progress',
3088 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
3089 verbosity.add_option('--console-title',
3090 action='store_true', dest='consoletitle',
3091 help='display progress in console titlebar', default=False)
3092
3093
3094 filesystem.add_option('-t', '--title',
3095 action='store_true', dest='usetitle', help='use title in file name', default=False)
3096 filesystem.add_option('-l', '--literal',
3097 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
3098 filesystem.add_option('-A', '--auto-number',
3099 action='store_true', dest='autonumber',
3100 help='number downloaded files starting from 00000', default=False)
3101 filesystem.add_option('-o', '--output',
3102 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
3103 filesystem.add_option('-a', '--batch-file',
3104 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
3105 filesystem.add_option('-w', '--no-overwrites',
3106 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
3107 filesystem.add_option('-c', '--continue',
3108 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
3109 filesystem.add_option('--cookies',
3110 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
3111 filesystem.add_option('--no-part',
3112 action='store_true', dest='nopart', help='do not use .part files', default=False)
3113 filesystem.add_option('--no-mtime',
3114 action='store_false', dest='updatetime',
3115 help='do not use the Last-modified header to set the file modification time', default=True)
2c8d32de
PH
3116 filesystem.add_option('--write-description',
3117 action='store_true', dest='writedescription',
3118 help='write video description to a .description file', default=False)
3119 filesystem.add_option('--write-info-json',
3120 action='store_true', dest='writeinfojson',
3121 help='write video metadata to a .info.json file', default=False)
4f9f96f6
GV
3122
3123
3124 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
3125 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
3126 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
3127 help='"best", "aac" or "mp3"; best by default')
3128
3129
3130 parser.add_option_group(general)
3131 parser.add_option_group(filesystem)
3132 parser.add_option_group(verbosity)
3133 parser.add_option_group(video_format)
3134 parser.add_option_group(authentication)
3135 parser.add_option_group(postproc)
3136
3137 opts, args = parser.parse_args()
3138
3139 return parser, opts, args
3140
5adcaa43
GV
3141def main():
3142 parser, opts, args = parseOpts()
4f9f96f6 3143
5adcaa43
GV
3144 # Open appropriate CookieJar
3145 if opts.cookiefile is None:
3146 jar = cookielib.CookieJar()
3147 else:
8cc44341 3148 try:
5adcaa43
GV
3149 jar = cookielib.MozillaCookieJar(opts.cookiefile)
3150 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
3151 jar.load()
3152 except (IOError, OSError), err:
3153 sys.exit(u'ERROR: unable to open cookie file')
80066952 3154
5adcaa43
GV
3155 # Dump user agent
3156 if opts.dump_user_agent:
3157 print std_headers['User-Agent']
3158 sys.exit(0)
e7cf18cb 3159
5adcaa43
GV
3160 # General configuration
3161 cookie_processor = urllib2.HTTPCookieProcessor(jar)
3162 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
3163 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
80066952 3164
5adcaa43
GV
3165 # Batch file verification
3166 batchurls = []
3167 if opts.batchfile is not None:
8cc44341 3168 try:
5adcaa43
GV
3169 if opts.batchfile == '-':
3170 batchfd = sys.stdin
4bec29ef 3171 else:
5adcaa43
GV
3172 batchfd = open(opts.batchfile, 'r')
3173 batchurls = batchfd.readlines()
3174 batchurls = [x.strip() for x in batchurls]
3175 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
3176 except IOError:
3177 sys.exit(u'ERROR: batch file could not be read')
3178 all_urls = batchurls + args
3179
3180 # Conflicting, missing and erroneous options
3181 if opts.usenetrc and (opts.username is not None or opts.password is not None):
3182 parser.error(u'using .netrc conflicts with giving username/password')
3183 if opts.password is not None and opts.username is None:
3184 parser.error(u'account username missing')
3185 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
3186 parser.error(u'using output template conflicts with using title, literal title or auto number')
3187 if opts.usetitle and opts.useliteral:
3188 parser.error(u'using title conflicts with using literal title')
3189 if opts.username is not None and opts.password is None:
3190 opts.password = getpass.getpass(u'Type account password and press return:')
3191 if opts.ratelimit is not None:
3192 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
3193 if numeric_limit is None:
3194 parser.error(u'invalid rate limit specified')
3195 opts.ratelimit = numeric_limit
3196 if opts.retries is not None:
8cc44341 3197 try:
5adcaa43 3198 opts.retries = long(opts.retries)
8cc44341 3199 except (TypeError, ValueError), err:
5adcaa43
GV
3200 parser.error(u'invalid retry count specified')
3201 try:
2c8d32de 3202 opts.playliststart = int(opts.playliststart)
5adcaa43 3203 if opts.playliststart <= 0:
2c8d32de 3204 raise ValueError(u'Playlist start must be positive')
5adcaa43
GV
3205 except (TypeError, ValueError), err:
3206 parser.error(u'invalid playlist start number specified')
3207 try:
2c8d32de 3208 opts.playlistend = int(opts.playlistend)
5adcaa43 3209 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2c8d32de 3210 raise ValueError(u'Playlist end must be greater than playlist start')
5adcaa43
GV
3211 except (TypeError, ValueError), err:
3212 parser.error(u'invalid playlist end number specified')
3213 if opts.extractaudio:
3214 if opts.audioformat not in ['best', 'aac', 'mp3']:
3215 parser.error(u'invalid audio format specified')
3216
3217 # Information extractors
3218 youtube_ie = YoutubeIE()
3219 metacafe_ie = MetacafeIE(youtube_ie)
3220 dailymotion_ie = DailymotionIE()
3221 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
3222 youtube_user_ie = YoutubeUserIE(youtube_ie)
3223 youtube_search_ie = YoutubeSearchIE(youtube_ie)
3224 google_ie = GoogleIE()
3225 google_search_ie = GoogleSearchIE(google_ie)
3226 photobucket_ie = PhotobucketIE()
3227 yahoo_ie = YahooIE()
3228 yahoo_search_ie = YahooSearchIE(yahoo_ie)
3229 deposit_files_ie = DepositFilesIE()
3230 facebook_ie = FacebookIE()
2c8d32de 3231 bliptv_ie = BlipTVIE()
5adcaa43
GV
3232 generic_ie = GenericIE()
3233
3234 # File downloader
3235 fd = FileDownloader({
3236 'usenetrc': opts.usenetrc,
3237 'username': opts.username,
3238 'password': opts.password,
3239 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3240 'forceurl': opts.geturl,
3241 'forcetitle': opts.gettitle,
3242 'forcethumbnail': opts.getthumbnail,
3243 'forcedescription': opts.getdescription,
3244 'forcefilename': opts.getfilename,
3245 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3246 'format': opts.format,
3247 'format_limit': opts.format_limit,
3248 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3249 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3250 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3251 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3252 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3253 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3254 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3255 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3256 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3257 or u'%(id)s.%(ext)s'),
3258 'ignoreerrors': opts.ignoreerrors,
3259 'ratelimit': opts.ratelimit,
3260 'nooverwrites': opts.nooverwrites,
3261 'retries': opts.retries,
3262 'continuedl': opts.continue_dl,
3263 'noprogress': opts.noprogress,
3264 'playliststart': opts.playliststart,
3265 'playlistend': opts.playlistend,
3266 'logtostderr': opts.outtmpl == '-',
3267 'consoletitle': opts.consoletitle,
3268 'nopart': opts.nopart,
3269 'updatetime': opts.updatetime,
2c8d32de
PH
3270 'writedescription': opts.writedescription,
3271 'writeinfojson': opts.writeinfojson,
5adcaa43
GV
3272 })
3273 fd.add_info_extractor(youtube_search_ie)
3274 fd.add_info_extractor(youtube_pl_ie)
3275 fd.add_info_extractor(youtube_user_ie)
3276 fd.add_info_extractor(metacafe_ie)
3277 fd.add_info_extractor(dailymotion_ie)
3278 fd.add_info_extractor(youtube_ie)
3279 fd.add_info_extractor(google_ie)
3280 fd.add_info_extractor(google_search_ie)
3281 fd.add_info_extractor(photobucket_ie)
3282 fd.add_info_extractor(yahoo_ie)
3283 fd.add_info_extractor(yahoo_search_ie)
3284 fd.add_info_extractor(deposit_files_ie)
3285 fd.add_info_extractor(facebook_ie)
2c8d32de 3286 fd.add_info_extractor(bliptv_ie)
5adcaa43
GV
3287
3288 # This must come last since it's the
3289 # fallback if none of the others work
3290 fd.add_info_extractor(generic_ie)
3291
3292 # PostProcessors
3293 if opts.extractaudio:
3294 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
3295
3296 # Update version
3297 if opts.update_self:
3298 updateSelf(fd, sys.argv[0])
3299
3300 # Maybe do nothing
3301 if len(all_urls) < 1:
3302 if not opts.update_self:
3303 parser.error(u'you must provide at least one URL')
3304 else:
3305 sys.exit()
3306 retcode = fd.download(all_urls)
80066952 3307
5adcaa43
GV
3308 # Dump cookie jar if requested
3309 if opts.cookiefile is not None:
3310 try:
3311 jar.save()
3312 except (IOError, OSError), err:
3313 sys.exit(u'ERROR: unable to save cookie jar')
80066952 3314
5adcaa43 3315 sys.exit(retcode)
80066952 3316
4fa74b52 3317
5adcaa43
GV
3318if __name__ == '__main__':
3319 try:
3320 main()
e5bf0f55
RG
3321 except DownloadError:
3322 sys.exit(1)
3323 except SameFileError:
76a7f364 3324 sys.exit(u'ERROR: fixed output name but more than one file to download')
4fa74b52 3325 except KeyboardInterrupt:
76a7f364 3326 sys.exit(u'\nERROR: Interrupted by user')
e9cb9c28
GV
3327
3328# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: