]> jfr.im git - yt-dlp.git/blame - youtube-dl
Add format fallback
[yt-dlp.git] / youtube-dl
CommitLineData
4fa74b52
RG
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
2770590d
GV
3
4__author__ = (
2c8d32de
PH
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
6ae796b1 12 'Rogério Brito',
eb11aacc 13 'Philipp Hagemeister',
6fc5b0bb 14 'Sören Schulze',
2770590d
GV
15 )
16
2c8d32de 17__license__ = 'Public Domain'
e5b9fac2 18__version__ = '2011.09.15'
2770590d 19
8236e851 20UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
d207e7cf 21
80066952 22import cookielib
a1f03c7b 23import datetime
1987c232 24import gzip
4fa74b52 25import htmlentitydefs
f9c68787 26import HTMLParser
4fa74b52 27import httplib
2546e767 28import locale
4fa74b52
RG
29import math
30import netrc
31import os
32import os.path
33import re
34import socket
35import string
0487b407 36import subprocess
4fa74b52
RG
37import sys
38import time
39import urllib
40import urllib2
c6b55a8d 41import warnings
1987c232 42import zlib
a04e80a4 43
0a3c8b62
PH
44if os.name == 'nt':
45 import ctypes
46
47try:
48 import email.utils
49except ImportError: # Python 2.4
50 import email.Utils
c6b55a8d
PH
51try:
52 import cStringIO as StringIO
53except ImportError:
54 import StringIO
55
a04e80a4
RG
56# parse_qs was moved from the cgi module to the urlparse module recently.
57try:
58 from urlparse import parse_qs
59except ImportError:
60 from cgi import parse_qs
4fa74b52 61
c6b55a8d
PH
62try:
63 import lxml.etree
2b70537d 64except ImportError:
c6b55a8d
PH
65 pass # Handled below
66
c8e30044
PH
67try:
68 import xml.etree.ElementTree
afb5b55d
PH
69except ImportError: # Python<2.5: Not officially supported, but let it slip
70 warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
c8e30044 71
f995f712 72std_headers = {
c44b9ee9 73 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
4fa74b52 74 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
96942e62 75 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
a57ed21f 76 'Accept-Encoding': 'gzip, deflate',
4fa74b52
RG
77 'Accept-Language': 'en-us,en;q=0.5',
78}
79
80simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
81
437d76c1
PH
82try:
83 import json
91e6a385 84except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
437d76c1
PH
85 import re
86 class json(object):
87 @staticmethod
88 def loads(s):
89 s = s.decode('UTF-8')
90 def raiseError(msg, i):
91 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
92 def skipSpace(i, expectMore=True):
93 while i < len(s) and s[i] in ' \t\r\n':
94 i += 1
95 if expectMore:
96 if i >= len(s):
97 raiseError('Premature end', i)
98 return i
99 def decodeEscape(match):
100 esc = match.group(1)
101 _STATIC = {
102 '"': '"',
103 '\\': '\\',
104 '/': '/',
105 'b': unichr(0x8),
106 'f': unichr(0xc),
107 'n': '\n',
108 'r': '\r',
109 't': '\t',
110 }
111 if esc in _STATIC:
112 return _STATIC[esc]
113 if esc[0] == 'u':
114 if len(esc) == 1+4:
115 return unichr(int(esc[1:5], 16))
116 if len(esc) == 5+6 and esc[5:7] == '\\u':
117 hi = int(esc[1:5], 16)
118 low = int(esc[7:11], 16)
119 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
120 raise ValueError('Unknown escape ' + str(esc))
121 def parseString(i):
122 i += 1
123 e = i
124 while True:
125 e = s.index('"', e)
126 bslashes = 0
127 while s[e-bslashes-1] == '\\':
128 bslashes += 1
129 if bslashes % 2 == 1:
130 e += 1
131 continue
132 break
133 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
134 stri = rexp.sub(decodeEscape, s[i:e])
135 return (e+1,stri)
136 def parseObj(i):
137 i += 1
138 res = {}
139 i = skipSpace(i)
140 if s[i] == '}': # Empty dictionary
141 return (i+1,res)
142 while True:
143 if s[i] != '"':
144 raiseError('Expected a string object key', i)
145 i,key = parseString(i)
146 i = skipSpace(i)
147 if i >= len(s) or s[i] != ':':
148 raiseError('Expected a colon', i)
149 i,val = parse(i+1)
150 res[key] = val
151 i = skipSpace(i)
152 if s[i] == '}':
153 return (i+1, res)
154 if s[i] != ',':
155 raiseError('Expected comma or closing curly brace', i)
156 i = skipSpace(i+1)
157 def parseArray(i):
158 res = []
159 i = skipSpace(i+1)
160 if s[i] == ']': # Empty array
161 return (i+1,res)
162 while True:
163 i,val = parse(i)
164 res.append(val)
165 i = skipSpace(i) # Raise exception if premature end
166 if s[i] == ']':
167 return (i+1, res)
168 if s[i] != ',':
169 raiseError('Expected a comma or closing bracket', i)
170 i = skipSpace(i+1)
171 def parseDiscrete(i):
172 for k,v in {'true': True, 'false': False, 'null': None}.items():
173 if s.startswith(k, i):
174 return (i+len(k), v)
175 raiseError('Not a boolean (or null)', i)
176 def parseNumber(i):
177 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
178 if mobj is None:
179 raiseError('Not a number', i)
180 nums = mobj.group(1)
181 if '.' in nums or 'e' in nums or 'E' in nums:
182 return (i+len(nums), float(nums))
183 return (i+len(nums), int(nums))
184 CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
185 def parse(i):
186 i = skipSpace(i)
187 i,res = CHARMAP.get(s[i], parseNumber)(i)
188 i = skipSpace(i, False)
189 return (i,res)
190 i,res = parse(0)
191 if i < len(s):
192 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
193 return res
194
eae2666c
RG
195def preferredencoding():
196 """Get preferred encoding.
197
198 Returns the best encoding scheme for the system, based on
199 locale.getpreferredencoding() and some further tweaks.
200 """
f94b636c
RG
201 def yield_preferredencoding():
202 try:
203 pref = locale.getpreferredencoding()
204 u'TEST'.encode(pref)
205 except:
206 pref = 'UTF-8'
207 while True:
208 yield pref
209 return yield_preferredencoding().next()
eae2666c 210
c0a10ca8 211
490fd7ae
RG
212def htmlentity_transform(matchobj):
213 """Transforms an HTML entity to a Unicode character.
d3975459 214
490fd7ae
RG
215 This function receives a match object and is intended to be used with
216 the re.sub() function.
217 """
218 entity = matchobj.group(1)
219
220 # Known non-numeric HTML entity
221 if entity in htmlentitydefs.name2codepoint:
222 return unichr(htmlentitydefs.name2codepoint[entity])
223
224 # Unicode character
225 mobj = re.match(ur'(?u)#(x?\d+)', entity)
226 if mobj is not None:
227 numstr = mobj.group(1)
228 if numstr.startswith(u'x'):
229 base = 16
230 numstr = u'0%s' % numstr
231 else:
232 base = 10
233 return unichr(long(numstr, base))
234
235 # Unknown entity in name, return its literal representation
236 return (u'&%s;' % entity)
237
c0a10ca8 238
490fd7ae 239def sanitize_title(utitle):
31bcb480 240 """Sanitizes a video title so it could be used as part of a filename."""
490fd7ae 241 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
490fd7ae
RG
242 return utitle.replace(unicode(os.sep), u'%')
243
c0a10ca8 244
31bcb480
RG
245def sanitize_open(filename, open_mode):
246 """Try to open the given filename, and slightly tweak it if this fails.
247
248 Attempts to open the given filename. If this fails, it tries to change
249 the filename slightly, step by step, until it's either able to open it
250 or it fails and raises a final exception, like the standard open()
251 function.
252
253 It returns the tuple (stream, definitive_file_name).
254 """
255 try:
131bc765 256 if filename == u'-':
e08878f4
RG
257 if sys.platform == 'win32':
258 import msvcrt
259 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
131bc765 260 return (sys.stdout, filename)
31bcb480
RG
261 stream = open(filename, open_mode)
262 return (stream, filename)
263 except (IOError, OSError), err:
264 # In case of error, try to remove win32 forbidden chars
ca6a11fa 265 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
31bcb480
RG
266
267 # An exception here should be caught in the caller
268 stream = open(filename, open_mode)
269 return (stream, filename)
270
c0a10ca8 271
09bd408c 272def timeconvert(timestr):
c0a10ca8
F
273 """Convert RFC 2822 defined time string into system timestamp"""
274 timestamp = None
275 timetuple = email.utils.parsedate_tz(timestr)
276 if timetuple is not None:
277 timestamp = email.utils.mktime_tz(timetuple)
278 return timestamp
279
09bd408c 280
e5bf0f55
RG
281class DownloadError(Exception):
282 """Download Error exception.
d3975459 283
e5bf0f55
RG
284 This exception may be thrown by FileDownloader objects if they are not
285 configured to continue on errors. They will contain the appropriate
286 error message.
287 """
288 pass
289
c0a10ca8 290
e5bf0f55
RG
291class SameFileError(Exception):
292 """Same File exception.
293
294 This exception will be thrown by FileDownloader objects if they detect
295 multiple files would have to be downloaded to the same file on disk.
296 """
297 pass
298
c0a10ca8 299
65cd34c5
RG
300class PostProcessingError(Exception):
301 """Post Processing exception.
302
303 This exception may be raised by PostProcessor's .run() method to
304 indicate an error in the postprocessing task.
305 """
306 pass
307
c0a10ca8 308
73f4e7af 309class UnavailableVideoError(Exception):
7b7759f5 310 """Unavailable Format exception.
311
312 This exception will be thrown when a video is requested
313 in a format that is not available for that video.
314 """
d69a1c91
RG
315 pass
316
c0a10ca8 317
d69a1c91
RG
318class ContentTooShortError(Exception):
319 """Content Too Short exception.
320
321 This exception may be raised by FileDownloader objects when a file they
322 download is too small for what the server announced first, indicating
323 the connection was probably interrupted.
324 """
325 # Both in bytes
326 downloaded = None
327 expected = None
328
329 def __init__(self, downloaded, expected):
330 self.downloaded = downloaded
331 self.expected = expected
7b7759f5 332
c0a10ca8 333
1987c232
RG
334class YoutubeDLHandler(urllib2.HTTPHandler):
335 """Handler for HTTP requests and responses.
336
337 This class, when installed with an OpenerDirector, automatically adds
338 the standard headers to every HTTP request and handles gzipped and
339 deflated responses from web servers. If compression is to be avoided in
340 a particular request, the original request in the program code only has
341 to include the HTTP header "Youtubedl-No-Compression", which will be
342 removed before making the real request.
c0a10ca8 343
1987c232
RG
344 Part of this code was copied from:
345
c0a10ca8
F
346 http://techknack.net/python-urllib2-handlers/
347
1987c232
RG
348 Andrew Rowls, the author of that code, agreed to release it to the
349 public domain.
350 """
351
352 @staticmethod
353 def deflate(data):
354 try:
355 return zlib.decompress(data, -zlib.MAX_WBITS)
356 except zlib.error:
357 return zlib.decompress(data)
c0a10ca8 358
7b531c0b
RG
359 @staticmethod
360 def addinfourl_wrapper(stream, headers, url, code):
361 if hasattr(urllib2.addinfourl, 'getcode'):
362 return urllib2.addinfourl(stream, headers, url, code)
0f6b00b5
RG
363 ret = urllib2.addinfourl(stream, headers, url)
364 ret.code = code
365 return ret
c0a10ca8 366
1987c232
RG
367 def http_request(self, req):
368 for h in std_headers:
369 if h in req.headers:
370 del req.headers[h]
371 req.add_header(h, std_headers[h])
372 if 'Youtubedl-no-compression' in req.headers:
373 if 'Accept-encoding' in req.headers:
374 del req.headers['Accept-encoding']
375 del req.headers['Youtubedl-no-compression']
376 return req
377
378 def http_response(self, req, resp):
379 old_resp = resp
380 # gzip
381 if resp.headers.get('Content-encoding', '') == 'gzip':
382 gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
7b531c0b 383 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
384 resp.msg = old_resp.msg
385 # deflate
386 if resp.headers.get('Content-encoding', '') == 'deflate':
387 gz = StringIO.StringIO(self.deflate(resp.read()))
7b531c0b 388 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
389 resp.msg = old_resp.msg
390 return resp
391
c0a10ca8 392
4fa74b52
RG
393class FileDownloader(object):
394 """File Downloader class.
395
396 File downloader objects are the ones responsible of downloading the
397 actual video file and writing it to disk if the user has requested
398 it, among some other tasks. In most cases there should be one per
399 program. As, given a video URL, the downloader doesn't know how to
400 extract all the needed information, task that InfoExtractors do, it
401 has to pass the URL to one of them.
402
403 For this, file downloader objects have a method that allows
404 InfoExtractors to be registered in a given order. When it is passed
405 a URL, the file downloader handles it to the first InfoExtractor it
2851b2ca
RG
406 finds that reports being able to handle it. The InfoExtractor extracts
407 all the information about the video or videos the URL refers to, and
408 asks the FileDownloader to process the video information, possibly
409 downloading the video.
4fa74b52
RG
410
411 File downloaders accept a lot of parameters. In order not to saturate
412 the object constructor with arguments, it receives a dictionary of
d0a9affb
RG
413 options instead. These options are available through the params
414 attribute for the InfoExtractors to use. The FileDownloader also
415 registers itself as the downloader in charge for the InfoExtractors
416 that are added to it, so this is a "mutual registration".
4fa74b52
RG
417
418 Available options:
419
80066952
RG
420 username: Username for authentication purposes.
421 password: Password for authentication purposes.
422 usenetrc: Use netrc for authentication instead.
423 quiet: Do not print messages to stdout.
424 forceurl: Force printing final URL.
425 forcetitle: Force printing title.
426 forcethumbnail: Force printing thumbnail URL.
427 forcedescription: Force printing description.
9f796346 428 forcefilename: Force printing final filename.
80066952
RG
429 simulate: Do not download the video files.
430 format: Video format code.
431 format_limit: Highest quality format to try.
432 outtmpl: Template for output names.
433 ignoreerrors: Do not stop on download errors.
434 ratelimit: Download speed limit, in bytes/sec.
435 nooverwrites: Prevent overwriting files.
436 retries: Number of times to retry for HTTP error 5xx
437 continuedl: Try to continue downloads if possible.
438 noprogress: Do not print the progress bar.
439 playliststart: Playlist item to start at.
8cc44341 440 playlistend: Playlist item to end at.
20e91e83
ABP
441 matchtitle: Download only matching titles.
442 rejecttitle: Reject downloads for matching titles.
331ce0a0 443 logtostderr: Log messages to stderr instead of stdout.
ccbd296b 444 consoletitle: Display progress in console window's titlebar.
3fb2c487 445 nopart: Do not use temporary .part files.
e3018902 446 updatetime: Use the Last-modified header to set output file timestamps.
8b95c387 447 writedescription: Write the video description to a .description file
6eb08fbf 448 writeinfojson: Write the video description to a .info.json file
4fa74b52
RG
449 """
450
d0a9affb 451 params = None
4fa74b52 452 _ies = []
65cd34c5 453 _pps = []
9bf386d7 454 _download_retcode = None
7d8d0612 455 _num_downloads = None
331ce0a0 456 _screen_file = None
4fa74b52
RG
457
458 def __init__(self, params):
1c5e2302 459 """Create a FileDownloader object with the given options."""
4fa74b52 460 self._ies = []
65cd34c5 461 self._pps = []
9bf386d7 462 self._download_retcode = 0
7d8d0612 463 self._num_downloads = 0
331ce0a0 464 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
d0a9affb 465 self.params = params
d3975459 466
4fa74b52
RG
467 @staticmethod
468 def format_bytes(bytes):
469 if bytes is None:
470 return 'N/A'
8497c36d
RG
471 if type(bytes) is str:
472 bytes = float(bytes)
473 if bytes == 0.0:
4fa74b52
RG
474 exponent = 0
475 else:
8497c36d 476 exponent = long(math.log(bytes, 1024.0))
4fa74b52 477 suffix = 'bkMGTPEZY'[exponent]
c0a10ca8 478 converted = float(bytes) / float(1024 ** exponent)
4fa74b52
RG
479 return '%.2f%s' % (converted, suffix)
480
481 @staticmethod
482 def calc_percent(byte_counter, data_len):
483 if data_len is None:
484 return '---.-%'
485 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
486
487 @staticmethod
488 def calc_eta(start, now, total, current):
489 if total is None:
490 return '--:--'
491 dif = now - start
492 if current == 0 or dif < 0.001: # One millisecond
493 return '--:--'
494 rate = float(current) / dif
495 eta = long((float(total) - float(current)) / rate)
496 (eta_mins, eta_secs) = divmod(eta, 60)
497 if eta_mins > 99:
498 return '--:--'
499 return '%02d:%02d' % (eta_mins, eta_secs)
500
5121ef20 501 @staticmethod
4fa74b52
RG
502 def calc_speed(start, now, bytes):
503 dif = now - start
504 if bytes == 0 or dif < 0.001: # One millisecond
9fcd8355 505 return '%10s' % '---b/s'
4fa74b52
RG
506 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
507
508 @staticmethod
509 def best_block_size(elapsed_time, bytes):
510 new_min = max(bytes / 2.0, 1.0)
511 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
512 if elapsed_time < 0.001:
e1f18b8a 513 return long(new_max)
4fa74b52
RG
514 rate = bytes / elapsed_time
515 if rate > new_max:
e1f18b8a 516 return long(new_max)
4fa74b52 517 if rate < new_min:
e1f18b8a
RG
518 return long(new_min)
519 return long(rate)
4fa74b52 520
acd3d842
RG
521 @staticmethod
522 def parse_bytes(bytestr):
523 """Parse a string indicating a byte quantity into a long integer."""
524 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
525 if matchobj is None:
526 return None
527 number = float(matchobj.group(1))
528 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
529 return long(round(number * multiplier))
530
4fa74b52
RG
531 def add_info_extractor(self, ie):
532 """Add an InfoExtractor object to the end of the list."""
533 self._ies.append(ie)
534 ie.set_downloader(self)
d3975459 535
65cd34c5
RG
536 def add_post_processor(self, pp):
537 """Add a PostProcessor object to the end of the chain."""
538 self._pps.append(pp)
539 pp.set_downloader(self)
d3975459 540
331ce0a0 541 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
9fcd8355 542 """Print message to stdout if not in quiet mode."""
43ab0ca4
RG
543 try:
544 if not self.params.get('quiet', False):
331ce0a0
RG
545 terminator = [u'\n', u''][skip_eol]
546 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
547 self._screen_file.flush()
43ab0ca4
RG
548 except (UnicodeEncodeError), err:
549 if not ignore_encoding_errors:
550 raise
d3975459 551
7e5cab67
RG
552 def to_stderr(self, message):
553 """Print message to stderr."""
eae2666c 554 print >>sys.stderr, message.encode(preferredencoding())
d3975459 555
ccbd296b
MM
556 def to_cons_title(self, message):
557 """Set console/terminal window title to message."""
558 if not self.params.get('consoletitle', False):
559 return
560 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
561 # c_wchar_p() might not be necessary if `message` is
562 # already of type unicode()
563 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
564 elif 'TERM' in os.environ:
565 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
566
22899cea
RG
567 def fixed_template(self):
568 """Checks if the output template is fixed."""
d0a9affb 569 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
9fcd8355 570
0086d1ec
RG
571 def trouble(self, message=None):
572 """Determine action to take when a download problem appears.
573
574 Depending on if the downloader has been configured to ignore
e5bf0f55 575 download errors or not, this method may throw an exception or
9bf386d7 576 not when errors are found, after printing the message.
0086d1ec
RG
577 """
578 if message is not None:
579 self.to_stderr(message)
d0a9affb 580 if not self.params.get('ignoreerrors', False):
e5bf0f55 581 raise DownloadError(message)
9bf386d7 582 self._download_retcode = 1
0086d1ec 583
acd3d842
RG
584 def slow_down(self, start_time, byte_counter):
585 """Sleep if the download speed is over the rate limit."""
d0a9affb 586 rate_limit = self.params.get('ratelimit', None)
acd3d842
RG
587 if rate_limit is None or byte_counter == 0:
588 return
589 now = time.time()
590 elapsed = now - start_time
591 if elapsed <= 0.0:
592 return
593 speed = float(byte_counter) / elapsed
594 if speed > rate_limit:
595 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
3fb2c487
RG
596
597 def temp_name(self, filename):
598 """Returns a temporary filename for the given filename."""
599 if self.params.get('nopart', False) or filename == u'-' or \
600 (os.path.exists(filename) and not os.path.isfile(filename)):
601 return filename
602 return filename + u'.part'
603
8cc42e7c
RG
604 def undo_temp_name(self, filename):
605 if filename.endswith(u'.part'):
606 return filename[:-len(u'.part')]
607 return filename
608
62cf7aaf
RG
609 def try_rename(self, old_filename, new_filename):
610 try:
7d950ca1
RG
611 if old_filename == new_filename:
612 return
62cf7aaf
RG
613 os.rename(old_filename, new_filename)
614 except (IOError, OSError), err:
615 self.trouble(u'ERROR: unable to rename file')
c0a10ca8 616
e3018902
RG
617 def try_utime(self, filename, last_modified_hdr):
618 """Try to set the last-modified time of the given file."""
619 if last_modified_hdr is None:
620 return
621 if not os.path.isfile(filename):
622 return
623 timestr = last_modified_hdr
624 if timestr is None:
625 return
626 filetime = timeconvert(timestr)
627 if filetime is None:
36597dc4 628 return filetime
e3018902 629 try:
c0a10ca8 630 os.utime(filename, (time.time(), filetime))
e3018902
RG
631 except:
632 pass
36597dc4 633 return filetime
acd3d842 634
8b95c387 635 def report_writedescription(self, descfn):
6eb08fbf
PH
636 """ Report that the description file is being written """
637 self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
638
639 def report_writeinfojson(self, infofn):
640 """ Report that the metadata file has been written """
641 self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
8b95c387 642
bafa5cd9
RG
643 def report_destination(self, filename):
644 """Report destination filename."""
331ce0a0 645 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
d3975459 646
bafa5cd9
RG
647 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
648 """Report download progress."""
d9835247
RG
649 if self.params.get('noprogress', False):
650 return
331ce0a0 651 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
bafa5cd9 652 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
ccbd296b
MM
653 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
654 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
7db85b2c
RG
655
656 def report_resuming_byte(self, resume_len):
8a9f53be 657 """Report attempt to resume at given byte."""
331ce0a0 658 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
d3975459 659
7031008c 660 def report_retry(self, count, retries):
e86e9474 661 """Report retry in case of HTTP error 5xx"""
331ce0a0 662 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
d3975459 663
7db85b2c
RG
664 def report_file_already_downloaded(self, file_name):
665 """Report file has already been fully downloaded."""
43ab0ca4 666 try:
331ce0a0 667 self.to_screen(u'[download] %s has already been downloaded' % file_name)
43ab0ca4 668 except (UnicodeEncodeError), err:
331ce0a0 669 self.to_screen(u'[download] The file has already been downloaded')
d3975459 670
7db85b2c
RG
671 def report_unable_to_resume(self):
672 """Report it was impossible to resume download."""
331ce0a0 673 self.to_screen(u'[download] Unable to resume')
d3975459 674
bafa5cd9
RG
675 def report_finish(self):
676 """Report download finished."""
d9835247 677 if self.params.get('noprogress', False):
331ce0a0 678 self.to_screen(u'[download] Download completed')
d9835247 679 else:
331ce0a0 680 self.to_screen(u'')
d3975459 681
df372a65
RG
682 def increment_downloads(self):
683 """Increment the ordinal that assigns a number to each file."""
684 self._num_downloads += 1
bafa5cd9 685
9f796346
GI
686 def prepare_filename(self, info_dict):
687 """Generate the output filename."""
688 try:
689 template_dict = dict(info_dict)
690 template_dict['epoch'] = unicode(long(time.time()))
691 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
692 filename = self.params['outtmpl'] % template_dict
693 return filename
694 except (ValueError, KeyError), err:
695 self.trouble(u'ERROR: invalid system charset or erroneous output template')
696 return None
697
c8619e01
RG
698 def process_info(self, info_dict):
699 """Process a single dictionary returned by an InfoExtractor."""
9f796346 700 filename = self.prepare_filename(info_dict)
9b4556c4
PH
701
702 # Forced printings
703 if self.params.get('forcetitle', False):
704 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
705 if self.params.get('forceurl', False):
706 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
707 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
708 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
709 if self.params.get('forcedescription', False) and 'description' in info_dict:
710 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
711 if self.params.get('forcefilename', False) and filename is not None:
712 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
da0db53a
DH
713 if self.params.get('forceformat', False):
714 print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
9b4556c4 715
c8619e01
RG
716 # Do nothing else if in simulate mode
717 if self.params.get('simulate', False):
9bf386d7 718 return
d3975459 719
9f796346 720 if filename is None:
38ed1344 721 return
20e91e83
ABP
722
723 matchtitle=self.params.get('matchtitle',False)
724 rejecttitle=self.params.get('rejecttitle',False)
725 title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
726 if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
727 self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
728 return
729 if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
730 self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
731 return
732
850ab765 733 if self.params.get('nooverwrites', False) and os.path.exists(filename):
5c44af18 734 self.to_stderr(u'WARNING: file exists and will be skipped')
9bf386d7 735 return
7b7759f5 736
c8619e01 737 try:
e5e74ffb
PH
738 dn = os.path.dirname(filename)
739 if dn != '' and not os.path.exists(dn):
740 os.makedirs(dn)
c8619e01 741 except (OSError, IOError), err:
cec3a53c 742 self.trouble(u'ERROR: unable to create directory ' + unicode(err))
9bf386d7 743 return
7b7759f5 744
8b95c387
PH
745 if self.params.get('writedescription', False):
746 try:
747 descfn = filename + '.description'
6eb08fbf 748 self.report_writedescription(descfn)
1293ce58
PH
749 descfile = open(descfn, 'wb')
750 try:
8b95c387 751 descfile.write(info_dict['description'].encode('utf-8'))
1293ce58
PH
752 finally:
753 descfile.close()
8b95c387 754 except (OSError, IOError):
cec3a53c 755 self.trouble(u'ERROR: Cannot write description file ' + descfn)
8b95c387
PH
756 return
757
6eb08fbf
PH
758 if self.params.get('writeinfojson', False):
759 infofn = filename + '.info.json'
760 self.report_writeinfojson(infofn)
761 try:
762 json.dump
763 except (NameError,AttributeError):
764 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
765 return
766 try:
1293ce58
PH
767 infof = open(infofn, 'wb')
768 try:
6eb08fbf 769 json.dump(info_dict, infof)
1293ce58
PH
770 finally:
771 infof.close()
6eb08fbf 772 except (OSError, IOError):
cec3a53c 773 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
6eb08fbf
PH
774 return
775
9b4556c4 776 if not self.params.get('skip_download', False):
55e7c75e 777 try:
36597dc4
K
778 success,add_data = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
779 info_dict.update(add_data)
9b4556c4
PH
780 except (OSError, IOError), err:
781 raise UnavailableVideoError
782 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
783 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
784 return
785 except (ContentTooShortError, ), err:
786 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
55e7c75e 787 return
9b4556c4
PH
788
789 if success:
790 try:
791 self.post_process(filename, info_dict)
792 except (PostProcessingError), err:
793 self.trouble(u'ERROR: postprocessing: %s' % str(err))
794 return
c8619e01 795
4fa74b52
RG
796 def download(self, url_list):
797 """Download a given list of URLs."""
22899cea 798 if len(url_list) > 1 and self.fixed_template():
d0a9affb 799 raise SameFileError(self.params['outtmpl'])
22899cea 800
4fa74b52
RG
801 for url in url_list:
802 suitable_found = False
803 for ie in self._ies:
c8619e01 804 # Go to next InfoExtractor if not suitable
4fa74b52
RG
805 if not ie.suitable(url):
806 continue
c8619e01 807
4fa74b52
RG
808 # Suitable InfoExtractor found
809 suitable_found = True
c8619e01 810
6f21f686
RG
811 # Extract information from URL and process it
812 ie.extract(url)
65cd34c5 813
c8619e01 814 # Suitable InfoExtractor had been found; go to next URL
4fa74b52 815 break
c8619e01 816
4fa74b52 817 if not suitable_found:
db7e31b8 818 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
bb681b88 819
9bf386d7 820 return self._download_retcode
65cd34c5
RG
821
822 def post_process(self, filename, ie_info):
823 """Run the postprocessing chain on the given file."""
824 info = dict(ie_info)
825 info['filepath'] = filename
826 for pp in self._pps:
827 info = pp.run(info)
828 if info is None:
829 break
d3975459 830
e616ec0c 831 def _download_with_rtmpdump(self, filename, url, player_url):
0487b407 832 self.report_destination(filename)
62cf7aaf 833 tmpfilename = self.temp_name(filename)
0487b407
RG
834
835 # Check for rtmpdump first
836 try:
837 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
838 except (OSError, IOError):
839 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
840 return False
841
842 # Download using rtmpdump. rtmpdump returns exit code 2 when
843 # the connection was interrumpted and resuming appears to be
844 # possible. This is part of rtmpdump's normal usage, AFAIK.
b487ef08 845 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
1c1821f8
RG
846 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
847 while retval == 2 or retval == 1:
62cf7aaf 848 prevsize = os.path.getsize(tmpfilename)
331ce0a0 849 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
e616ec0c 850 time.sleep(5.0) # This seems to be needed
1c1821f8 851 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
62cf7aaf 852 cursize = os.path.getsize(tmpfilename)
e616ec0c
RG
853 if prevsize == cursize and retval == 1:
854 break
b487ef08
PH
855 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
856 if prevsize == cursize and retval == 2 and cursize > 1024:
857 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
858 retval = 0
859 break
0487b407 860 if retval == 0:
62cf7aaf
RG
861 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
862 self.try_rename(tmpfilename, filename)
0487b407
RG
863 return True
864 else:
db7e31b8 865 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
0487b407
RG
866 return False
867
e616ec0c 868 def _do_download(self, filename, url, player_url):
62cf7aaf 869 # Check file already present
3fb2c487 870 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
62cf7aaf
RG
871 self.report_file_already_downloaded(filename)
872 return True
873
0487b407
RG
874 # Attempt to download using rtmpdump
875 if url.startswith('rtmp'):
e616ec0c 876 return self._download_with_rtmpdump(filename, url, player_url)
0487b407 877
62cf7aaf 878 tmpfilename = self.temp_name(filename)
55e7c75e 879 stream = None
9c457d2a 880 open_mode = 'wb'
1987c232
RG
881
882 # Do not include the Accept-Encoding header
883 headers = {'Youtubedl-no-compression': 'True'}
884 basic_request = urllib2.Request(url, None, headers)
885 request = urllib2.Request(url, None, headers)
7db85b2c 886
9c457d2a 887 # Establish possible resume length
62cf7aaf
RG
888 if os.path.isfile(tmpfilename):
889 resume_len = os.path.getsize(tmpfilename)
55e7c75e
RG
890 else:
891 resume_len = 0
9c457d2a
RG
892
893 # Request parameters in case of being able to resume
850ab765 894 if self.params.get('continuedl', False) and resume_len != 0:
7db85b2c 895 self.report_resuming_byte(resume_len)
c0a10ca8 896 request.add_header('Range', 'bytes=%d-' % resume_len)
9c457d2a 897 open_mode = 'ab'
55e7c75e 898
7031008c
RG
899 count = 0
900 retries = self.params.get('retries', 0)
101e0d1e 901 while count <= retries:
7031008c
RG
902 # Establish connection
903 try:
904 data = urllib2.urlopen(request)
905 break
906 except (urllib2.HTTPError, ), err:
ac249f42 907 if (err.code < 500 or err.code >= 600) and err.code != 416:
101e0d1e 908 # Unexpected HTTP error
7031008c 909 raise
101e0d1e
RG
910 elif err.code == 416:
911 # Unable to resume (requested range not satisfiable)
912 try:
913 # Open the connection again without the range header
914 data = urllib2.urlopen(basic_request)
915 content_length = data.info()['Content-Length']
916 except (urllib2.HTTPError, ), err:
ac249f42 917 if err.code < 500 or err.code >= 600:
101e0d1e
RG
918 raise
919 else:
920 # Examine the reported length
268fb2bd 921 if (content_length is not None and
c0a10ca8 922 (resume_len - 100 < long(content_length) < resume_len + 100)):
268fb2bd
RG
923 # The file had already been fully downloaded.
924 # Explanation to the above condition: in issue #175 it was revealed that
925 # YouTube sometimes adds or removes a few bytes from the end of the file,
926 # changing the file size slightly and causing problems for some users. So
927 # I decided to implement a suggested change and consider the file
928 # completely downloaded if the file size differs less than 100 bytes from
929 # the one in the hard drive.
101e0d1e 930 self.report_file_already_downloaded(filename)
62cf7aaf 931 self.try_rename(tmpfilename, filename)
101e0d1e
RG
932 return True
933 else:
934 # The length does not match, we start the download over
935 self.report_unable_to_resume()
936 open_mode = 'wb'
937 break
938 # Retry
939 count += 1
940 if count <= retries:
941 self.report_retry(count, retries)
942
943 if count > retries:
944 self.trouble(u'ERROR: giving up after %s retries' % retries)
945 return False
7db85b2c 946
4fa74b52 947 data_len = data.info().get('Content-length', None)
106d091e
RG
948 if data_len is not None:
949 data_len = long(data_len) + resume_len
4fa74b52 950 data_len_str = self.format_bytes(data_len)
106d091e 951 byte_counter = 0 + resume_len
4fa74b52
RG
952 block_size = 1024
953 start = time.time()
954 while True:
bafa5cd9 955 # Download and write
4fa74b52
RG
956 before = time.time()
957 data_block = data.read(block_size)
958 after = time.time()
975a91d0 959 if len(data_block) == 0:
4fa74b52 960 break
975a91d0 961 byte_counter += len(data_block)
55e7c75e
RG
962
963 # Open file just in time
964 if stream is None:
965 try:
62cf7aaf 966 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
dbddab27 967 assert stream is not None
8cc42e7c 968 filename = self.undo_temp_name(tmpfilename)
55e7c75e
RG
969 self.report_destination(filename)
970 except (OSError, IOError), err:
db7e31b8 971 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
55e7c75e 972 return False
131efd1a
RG
973 try:
974 stream.write(data_block)
975 except (IOError, OSError), err:
d67e0974
RG
976 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
977 return False
975a91d0 978 block_size = self.best_block_size(after - before, len(data_block))
4fa74b52 979
55e7c75e
RG
980 # Progress message
981 percent_str = self.calc_percent(byte_counter, data_len)
975a91d0
RG
982 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
983 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
55e7c75e
RG
984 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
985
acd3d842 986 # Apply rate limit
975a91d0 987 self.slow_down(start, byte_counter - resume_len)
acd3d842 988
dbddab27
PH
989 if stream is None:
990 self.trouble(u'\nERROR: Did not get any data blocks')
991 return False
6f0ff3ba 992 stream.close()
bafa5cd9 993 self.report_finish()
b905e5f5 994 if data_len is not None and byte_counter != data_len:
d69a1c91 995 raise ContentTooShortError(byte_counter, long(data_len))
62cf7aaf 996 self.try_rename(tmpfilename, filename)
e3018902 997
09bd408c 998 # Update file modification time
36597dc4 999 filetime = None
e3018902 1000 if self.params.get('updatetime', True):
36597dc4 1001 filetime = self.try_utime(filename, data.info().get('last-modified', None))
e3018902 1002
36597dc4 1003 return True, {'filetime': filetime}
4fa74b52 1004
c0a10ca8 1005
4fa74b52
RG
1006class InfoExtractor(object):
1007 """Information Extractor class.
1008
1009 Information extractors are the classes that, given a URL, extract
1010 information from the video (or videos) the URL refers to. This
1011 information includes the real video URL, the video title and simplified
2851b2ca
RG
1012 title, author and others. The information is stored in a dictionary
1013 which is then passed to the FileDownloader. The FileDownloader
1014 processes this information possibly downloading the video to the file
1015 system, among other possible outcomes. The dictionaries must include
4fa74b52
RG
1016 the following fields:
1017
1018 id: Video identifier.
1019 url: Final video URL.
1020 uploader: Nickname of the video uploader.
1021 title: Literal title.
1022 stitle: Simplified title.
1023 ext: Video filename extension.
6ba562b0 1024 format: Video format.
e616ec0c 1025 player_url: SWF Player URL (may be None).
4fa74b52 1026
7e58d568
RG
1027 The following fields are optional. Their primary purpose is to allow
1028 youtube-dl to serve as the backend for a video search function, such
1029 as the one in youtube2mp3. They are only used when their respective
1030 forced printing functions are called:
1031
1032 thumbnail: Full URL to a video thumbnail image.
1033 description: One-line video description.
1034
4fa74b52 1035 Subclasses of this one should re-define the _real_initialize() and
bdb3f7a7
PH
1036 _real_extract() methods and define a _VALID_URL regexp.
1037 Probably, they should also be added to the list of extractors.
4fa74b52
RG
1038 """
1039
1040 _ready = False
1041 _downloader = None
1042
1043 def __init__(self, downloader=None):
1044 """Constructor. Receives an optional downloader."""
1045 self._ready = False
1046 self.set_downloader(downloader)
1047
bdb3f7a7 1048 def suitable(self, url):
4fa74b52 1049 """Receives a URL and returns True if suitable for this IE."""
bdb3f7a7 1050 return re.match(self._VALID_URL, url) is not None
4fa74b52
RG
1051
1052 def initialize(self):
1c5e2302 1053 """Initializes an instance (authentication, etc)."""
4fa74b52
RG
1054 if not self._ready:
1055 self._real_initialize()
1056 self._ready = True
1057
1058 def extract(self, url):
1059 """Extracts URL information and returns it in list of dicts."""
1060 self.initialize()
1061 return self._real_extract(url)
1062
1063 def set_downloader(self, downloader):
1064 """Sets the downloader for this IE."""
1065 self._downloader = downloader
d3975459 1066
4fa74b52
RG
1067 def _real_initialize(self):
1068 """Real initialization process. Redefine in subclasses."""
1069 pass
1070
1071 def _real_extract(self, url):
1072 """Real extraction process. Redefine in subclasses."""
1073 pass
1074
c0a10ca8 1075
4fa74b52
RG
1076class YoutubeIE(InfoExtractor):
1077 """Information extractor for youtube.com."""
1078
86e709d3 1079 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
9715661c 1080 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
7df4635f 1081 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
72ac78b8 1082 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
4fa74b52 1083 _NETRC_MACHINE = 'youtube'
497cd3e6 1084 # Listed in order of quality
e0edf1e0 1085 _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
7b7759f5 1086 _video_extensions = {
1087 '13': '3gp',
1088 '17': 'mp4',
1089 '18': 'mp4',
1090 '22': 'mp4',
d9bc015b 1091 '37': 'mp4',
9e9647d9 1092 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
0b59bf4a
RG
1093 '43': 'webm',
1094 '45': 'webm',
7b7759f5 1095 }
f3098c4d 1096 IE_NAME = u'youtube'
4fa74b52 1097
72ac78b8
RG
1098 def report_lang(self):
1099 """Report attempt to set language."""
331ce0a0 1100 self._downloader.to_screen(u'[youtube] Setting language')
72ac78b8 1101
bafa5cd9
RG
1102 def report_login(self):
1103 """Report attempt to log in."""
331ce0a0 1104 self._downloader.to_screen(u'[youtube] Logging in')
d3975459 1105
bafa5cd9
RG
1106 def report_age_confirmation(self):
1107 """Report attempt to confirm age."""
331ce0a0 1108 self._downloader.to_screen(u'[youtube] Confirming age')
d3975459 1109
e616ec0c
RG
1110 def report_video_webpage_download(self, video_id):
1111 """Report attempt to download video webpage."""
331ce0a0 1112 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
d3975459 1113
71b7300e
RG
1114 def report_video_info_webpage_download(self, video_id):
1115 """Report attempt to download video info webpage."""
331ce0a0 1116 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
d3975459 1117
bafa5cd9
RG
1118 def report_information_extraction(self, video_id):
1119 """Report attempt to extract video information."""
331ce0a0 1120 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
d3975459 1121
7b7759f5 1122 def report_unavailable_format(self, video_id, format):
1123 """Report extracted video URL."""
331ce0a0 1124 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
d3975459 1125
0487b407
RG
1126 def report_rtmp_download(self):
1127 """Indicate the download will use the RTMP protocol."""
331ce0a0 1128 self._downloader.to_screen(u'[youtube] RTMP download detected')
d3975459 1129
4fa74b52
RG
1130 def _real_initialize(self):
1131 if self._downloader is None:
1132 return
1133
1134 username = None
1135 password = None
d0a9affb 1136 downloader_params = self._downloader.params
4fa74b52
RG
1137
1138 # Attempt to use provided username and password or .netrc data
1139 if downloader_params.get('username', None) is not None:
1140 username = downloader_params['username']
1141 password = downloader_params['password']
1142 elif downloader_params.get('usenetrc', False):
1143 try:
1144 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1145 if info is not None:
1146 username = info[0]
1147 password = info[2]
1148 else:
1149 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1150 except (IOError, netrc.NetrcParseError), err:
6f21f686 1151 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
4fa74b52
RG
1152 return
1153
72ac78b8 1154 # Set language
1987c232 1155 request = urllib2.Request(self._LANG_URL)
72ac78b8
RG
1156 try:
1157 self.report_lang()
1158 urllib2.urlopen(request).read()
1159 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1160 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
72ac78b8
RG
1161 return
1162
cc109403
RG
1163 # No authentication to be performed
1164 if username is None:
1165 return
1166
4fa74b52 1167 # Log in
9fcd8355
RG
1168 login_form = {
1169 'current_form': 'loginForm',
4fa74b52
RG
1170 'next': '/',
1171 'action_login': 'Log In',
1172 'username': username,
9fcd8355
RG
1173 'password': password,
1174 }
1987c232 1175 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
4fa74b52 1176 try:
bafa5cd9 1177 self.report_login()
4fa74b52
RG
1178 login_results = urllib2.urlopen(request).read()
1179 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
6f21f686 1180 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
4fa74b52
RG
1181 return
1182 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1183 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
4fa74b52 1184 return
d3975459 1185
4fa74b52 1186 # Confirm age
9fcd8355
RG
1187 age_form = {
1188 'next_url': '/',
1189 'action_confirm': 'Confirm',
1190 }
1987c232 1191 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
4fa74b52 1192 try:
bafa5cd9 1193 self.report_age_confirmation()
4fa74b52
RG
1194 age_results = urllib2.urlopen(request).read()
1195 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1196 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
e5bf0f55 1197 return
4fa74b52
RG
1198
1199 def _real_extract(self, url):
1200 # Extract video id from URL
020f7150 1201 mobj = re.match(self._VALID_URL, url)
4fa74b52 1202 if mobj is None:
147753eb 1203 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1204 return
4fa74b52
RG
1205 video_id = mobj.group(2)
1206
497cd3e6
RG
1207 # Get video webpage
1208 self.report_video_webpage_download(video_id)
1987c232 1209 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
497cd3e6
RG
1210 try:
1211 video_webpage = urllib2.urlopen(request).read()
1212 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1213 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1214 return
968aa884 1215
497cd3e6 1216 # Attempt to extract SWF player URL
b620a5f8 1217 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
497cd3e6 1218 if mobj is not None:
b620a5f8 1219 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
497cd3e6
RG
1220 else:
1221 player_url = None
1222
1223 # Get video info
1224 self.report_video_info_webpage_download(video_id)
1225 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1226 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
c0a10ca8 1227 % (video_id, el_type))
1987c232 1228 request = urllib2.Request(video_info_url)
e616ec0c 1229 try:
497cd3e6
RG
1230 video_info_webpage = urllib2.urlopen(request).read()
1231 video_info = parse_qs(video_info_webpage)
1232 if 'token' in video_info:
1233 break
e616ec0c 1234 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
497cd3e6 1235 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
e616ec0c 1236 return
f95f29fd
RG
1237 if 'token' not in video_info:
1238 if 'reason' in video_info:
8e686771 1239 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
f95f29fd
RG
1240 else:
1241 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1242 return
1243
1244 # Start extracting information
497cd3e6
RG
1245 self.report_information_extraction(video_id)
1246
1247 # uploader
1248 if 'author' not in video_info:
1249 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1250 return
1251 video_uploader = urllib.unquote_plus(video_info['author'][0])
e616ec0c 1252
497cd3e6
RG
1253 # title
1254 if 'title' not in video_info:
1255 self._downloader.trouble(u'ERROR: unable to extract video title')
1256 return
1257 video_title = urllib.unquote_plus(video_info['title'][0])
1258 video_title = video_title.decode('utf-8')
1259 video_title = sanitize_title(video_title)
1260
1261 # simplified title
1262 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1263 simple_title = simple_title.strip(ur'_')
1264
1265 # thumbnail image
1266 if 'thumbnail_url' not in video_info:
1267 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1268 video_thumbnail = ''
1269 else: # don't panic if we can't find it
1270 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1271
b3a27b52
NA
1272 # upload date
1273 upload_date = u'NA'
3efa45c3 1274 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
b3a27b52 1275 if mobj is not None:
a1f03c7b 1276 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
87cbd213 1277 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
a1f03c7b
NA
1278 for expression in format_expressions:
1279 try:
1280 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1281 except:
1282 pass
b3a27b52 1283
497cd3e6 1284 # description
c6b55a8d
PH
1285 try:
1286 lxml.etree
1287 except NameError:
1288 video_description = u'No description available.'
8b95c387 1289 if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
c6b55a8d
PH
1290 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1291 if mobj is not None:
1292 video_description = mobj.group(1).decode('utf-8')
1293 else:
1294 html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1295 vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1296 video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
91e6a385 1297 # TODO use another parser
497cd3e6 1298
5ce7d172
RG
1299 # token
1300 video_token = urllib.unquote_plus(video_info['token'][0])
1301
497cd3e6 1302 # Decide which formats to download
f83ae781 1303 req_format = self._downloader.params.get('format', None)
2e3a32e4 1304
f137bef9
PH
1305 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1306 self.report_rtmp_download()
1307 video_url_list = [(None, video_info['conn'][0])]
f137bef9 1308 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
0ac22e4f 1309 url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
8519c32d 1310 url_data = [parse_qs(uds) for uds in url_data_strs]
f137bef9 1311 url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
8519c32d 1312 url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
2b70537d 1313
497cd3e6
RG
1314 format_limit = self._downloader.params.get('format_limit', None)
1315 if format_limit is not None and format_limit in self._available_formats:
1316 format_list = self._available_formats[self._available_formats.index(format_limit):]
e616ec0c 1317 else:
497cd3e6
RG
1318 format_list = self._available_formats
1319 existing_formats = [x for x in format_list if x in url_map]
1320 if len(existing_formats) == 0:
1321 self._downloader.trouble(u'ERROR: no known formats available for video')
968aa884 1322 return
5260e68f 1323 if req_format is None or req_format == 'best':
d157d259 1324 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
c52b01f3
K
1325 elif req_format == 'worst':
1326 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
5260e68f 1327 elif req_format in ('-1', 'all'):
d157d259 1328 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
497cd3e6 1329 else:
5260e68f
PH
1330 # Specific formats. We pick the first in a slash-delimeted sequence.
1331 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1332 req_formats = req_format.split('/')
1333 video_url_list = None
1334 for rf in req_formats:
1335 if rf in url_map:
1336 video_url_list = [(rf, url_map[rf])]
1337 break
1338 if video_url_list is None:
5c132793
RG
1339 self._downloader.trouble(u'ERROR: requested format not available')
1340 return
497cd3e6 1341 else:
f3dc18d8 1342 self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
497cd3e6 1343 return
7b7759f5 1344
497cd3e6
RG
1345 for format_param, video_real_url in video_url_list:
1346 # At this point we have a new video
1347 self._downloader.increment_downloads()
1348
1349 # Extension
1350 video_extension = self._video_extensions.get(format_param, 'flv')
7e58d568 1351
968aa884 1352 try:
7b7759f5 1353 # Process video information
1354 self._downloader.process_info({
1355 'id': video_id.decode('utf-8'),
1356 'url': video_real_url.decode('utf-8'),
1357 'uploader': video_uploader.decode('utf-8'),
138b11f3 1358 'upload_date': upload_date,
7b7759f5 1359 'title': video_title,
1360 'stitle': simple_title,
1361 'ext': video_extension.decode('utf-8'),
6ba562b0 1362 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
7e58d568 1363 'thumbnail': video_thumbnail.decode('utf-8'),
c6b55a8d 1364 'description': video_description,
e616ec0c 1365 'player_url': player_url,
7b7759f5 1366 })
497cd3e6 1367 except UnavailableVideoError, err:
09cc744c 1368 self._downloader.trouble(u'\nERROR: unable to download video')
42bcd27d 1369
4fa74b52 1370
020f7150
RG
1371class MetacafeIE(InfoExtractor):
1372 """Information Extractor for metacafe.com."""
1373
1374 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
2546e767 1375 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
dbccb6cd 1376 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
020f7150 1377 _youtube_ie = None
f3098c4d 1378 IE_NAME = u'metacafe'
020f7150
RG
1379
1380 def __init__(self, youtube_ie, downloader=None):
1381 InfoExtractor.__init__(self, downloader)
1382 self._youtube_ie = youtube_ie
1383
020f7150
RG
1384 def report_disclaimer(self):
1385 """Report disclaimer retrieval."""
331ce0a0 1386 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
020f7150
RG
1387
1388 def report_age_confirmation(self):
1389 """Report attempt to confirm age."""
331ce0a0 1390 self._downloader.to_screen(u'[metacafe] Confirming age')
d3975459 1391
020f7150
RG
1392 def report_download_webpage(self, video_id):
1393 """Report webpage download."""
331ce0a0 1394 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
d3975459 1395
020f7150
RG
1396 def report_extraction(self, video_id):
1397 """Report information extraction."""
331ce0a0 1398 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
020f7150
RG
1399
1400 def _real_initialize(self):
1401 # Retrieve disclaimer
1987c232 1402 request = urllib2.Request(self._DISCLAIMER)
020f7150
RG
1403 try:
1404 self.report_disclaimer()
1405 disclaimer = urllib2.urlopen(request).read()
1406 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1407 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
020f7150
RG
1408 return
1409
1410 # Confirm age
1411 disclaimer_form = {
2546e767 1412 'filters': '0',
020f7150
RG
1413 'submit': "Continue - I'm over 18",
1414 }
1987c232 1415 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
020f7150
RG
1416 try:
1417 self.report_age_confirmation()
1418 disclaimer = urllib2.urlopen(request).read()
1419 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1420 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
020f7150 1421 return
d3975459 1422
020f7150
RG
1423 def _real_extract(self, url):
1424 # Extract id and simplified title from URL
1425 mobj = re.match(self._VALID_URL, url)
1426 if mobj is None:
147753eb 1427 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1428 return
020f7150
RG
1429
1430 video_id = mobj.group(1)
1431
1432 # Check if video comes from YouTube
1433 mobj2 = re.match(r'^yt-(.*)$', video_id)
1434 if mobj2 is not None:
6f21f686
RG
1435 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1436 return
020f7150 1437
df372a65 1438 # At this point we have a new video
9bf7fa52 1439 self._downloader.increment_downloads()
df372a65 1440
020f7150 1441 simple_title = mobj.group(2).decode('utf-8')
020f7150
RG
1442
1443 # Retrieve video webpage to extract further information
1444 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1445 try:
1446 self.report_download_webpage(video_id)
1447 webpage = urllib2.urlopen(request).read()
1448 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1449 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
6f21f686 1450 return
020f7150
RG
1451
1452 # Extract URL, uploader and title from webpage
1453 self.report_extraction(video_id)
18963a36 1454 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
c6c555cf
RG
1455 if mobj is not None:
1456 mediaURL = urllib.unquote(mobj.group(1))
6b57e8c5 1457 video_extension = mediaURL[-3:]
d3975459 1458
c6c555cf
RG
1459 # Extract gdaKey if available
1460 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1461 if mobj is None:
1462 video_url = mediaURL
1463 else:
1464 gdaKey = mobj.group(1)
1465 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
109626fc 1466 else:
c6c555cf
RG
1467 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1468 if mobj is None:
1469 self._downloader.trouble(u'ERROR: unable to extract media URL')
1470 return
1471 vardict = parse_qs(mobj.group(1))
1472 if 'mediaData' not in vardict:
1473 self._downloader.trouble(u'ERROR: unable to extract media URL')
1474 return
1475 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1476 if mobj is None:
1477 self._downloader.trouble(u'ERROR: unable to extract media URL')
1478 return
6b57e8c5
RG
1479 mediaURL = mobj.group(1).replace('\\/', '/')
1480 video_extension = mediaURL[-3:]
1481 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
020f7150 1482
2546e767 1483 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
020f7150 1484 if mobj is None:
147753eb 1485 self._downloader.trouble(u'ERROR: unable to extract title')
6f21f686 1486 return
020f7150 1487 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1488 video_title = sanitize_title(video_title)
020f7150 1489
29f07568 1490 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
020f7150 1491 if mobj is None:
147753eb 1492 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
6f21f686 1493 return
dbccb6cd 1494 video_uploader = mobj.group(1)
020f7150 1495
42bcd27d 1496 try:
1497 # Process video information
1498 self._downloader.process_info({
1499 'id': video_id.decode('utf-8'),
1500 'url': video_url.decode('utf-8'),
1501 'uploader': video_uploader.decode('utf-8'),
138b11f3 1502 'upload_date': u'NA',
42bcd27d 1503 'title': video_title,
1504 'stitle': simple_title,
1505 'ext': video_extension.decode('utf-8'),
6ba562b0 1506 'format': u'NA',
e616ec0c 1507 'player_url': None,
42bcd27d 1508 })
73f4e7af 1509 except UnavailableVideoError:
09cc744c 1510 self._downloader.trouble(u'\nERROR: unable to download video')
020f7150 1511
25af2bce 1512
4135fa45
WB
1513class DailymotionIE(InfoExtractor):
1514 """Information Extractor for Dailymotion"""
1515
1516 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
f3098c4d 1517 IE_NAME = u'dailymotion'
4135fa45
WB
1518
1519 def __init__(self, downloader=None):
1520 InfoExtractor.__init__(self, downloader)
1521
4135fa45
WB
1522 def report_download_webpage(self, video_id):
1523 """Report webpage download."""
331ce0a0 1524 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
d3975459 1525
4135fa45
WB
1526 def report_extraction(self, video_id):
1527 """Report information extraction."""
331ce0a0 1528 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
4135fa45
WB
1529
1530 def _real_initialize(self):
1531 return
1532
4135fa45
WB
1533 def _real_extract(self, url):
1534 # Extract id and simplified title from URL
1535 mobj = re.match(self._VALID_URL, url)
1536 if mobj is None:
1537 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1538 return
1539
df372a65 1540 # At this point we have a new video
9bf7fa52 1541 self._downloader.increment_downloads()
4135fa45
WB
1542 video_id = mobj.group(1)
1543
1544 simple_title = mobj.group(2).decode('utf-8')
1545 video_extension = 'flv'
1546
1547 # Retrieve video webpage to extract further information
1548 request = urllib2.Request(url)
62a29bbf 1549 request.add_header('Cookie', 'family_filter=off')
4135fa45
WB
1550 try:
1551 self.report_download_webpage(video_id)
1552 webpage = urllib2.urlopen(request).read()
1553 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1554 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1555 return
1556
1557 # Extract URL, uploader and title from webpage
1558 self.report_extraction(video_id)
62a29bbf 1559 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
4135fa45
WB
1560 if mobj is None:
1561 self._downloader.trouble(u'ERROR: unable to extract media URL')
1562 return
62a29bbf 1563 sequence = urllib.unquote(mobj.group(1))
1564 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1565 if mobj is None:
1566 self._downloader.trouble(u'ERROR: unable to extract media URL')
1567 return
1568 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
4135fa45
WB
1569
1570 # if needed add http://www.dailymotion.com/ if relative URL
1571
1572 video_url = mediaURL
1573
62a29bbf 1574 mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
4135fa45
WB
1575 if mobj is None:
1576 self._downloader.trouble(u'ERROR: unable to extract title')
1577 return
1578 video_title = mobj.group(1).decode('utf-8')
1579 video_title = sanitize_title(video_title)
1580
62a29bbf 1581 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
4135fa45
WB
1582 if mobj is None:
1583 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1584 return
1585 video_uploader = mobj.group(1)
1586
1587 try:
1588 # Process video information
1589 self._downloader.process_info({
1590 'id': video_id.decode('utf-8'),
1591 'url': video_url.decode('utf-8'),
1592 'uploader': video_uploader.decode('utf-8'),
138b11f3 1593 'upload_date': u'NA',
4135fa45
WB
1594 'title': video_title,
1595 'stitle': simple_title,
1596 'ext': video_extension.decode('utf-8'),
1597 'format': u'NA',
1598 'player_url': None,
1599 })
73f4e7af 1600 except UnavailableVideoError:
09cc744c 1601 self._downloader.trouble(u'\nERROR: unable to download video')
4135fa45 1602
c0a10ca8 1603
49c0028a 1604class GoogleIE(InfoExtractor):
1605 """Information extractor for video.google.com."""
1606
490fd7ae 1607 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
f3098c4d 1608 IE_NAME = u'video.google'
49c0028a 1609
1610 def __init__(self, downloader=None):
1611 InfoExtractor.__init__(self, downloader)
1612
49c0028a 1613 def report_download_webpage(self, video_id):
1614 """Report webpage download."""
331ce0a0 1615 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
49c0028a 1616
1617 def report_extraction(self, video_id):
1618 """Report information extraction."""
331ce0a0 1619 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
49c0028a 1620
1621 def _real_initialize(self):
1622 return
1623
1624 def _real_extract(self, url):
1625 # Extract id from URL
1626 mobj = re.match(self._VALID_URL, url)
1627 if mobj is None:
1628 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1629 return
1630
df372a65 1631 # At this point we have a new video
9bf7fa52 1632 self._downloader.increment_downloads()
49c0028a 1633 video_id = mobj.group(1)
1634
1635 video_extension = 'mp4'
1636
1637 # Retrieve video webpage to extract further information
490fd7ae 1638 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
49c0028a 1639 try:
1640 self.report_download_webpage(video_id)
1641 webpage = urllib2.urlopen(request).read()
1642 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1643 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1644 return
1645
1646 # Extract URL, uploader, and title from webpage
1647 self.report_extraction(video_id)
490fd7ae
RG
1648 mobj = re.search(r"download_url:'([^']+)'", webpage)
1649 if mobj is None:
1650 video_extension = 'flv'
1651 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
49c0028a 1652 if mobj is None:
1653 self._downloader.trouble(u'ERROR: unable to extract media URL')
1654 return
1655 mediaURL = urllib.unquote(mobj.group(1))
1656 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1657 mediaURL = mediaURL.replace('\\x26', '\x26')
1658
1659 video_url = mediaURL
1660
1661 mobj = re.search(r'<title>(.*)</title>', webpage)
1662 if mobj is None:
1663 self._downloader.trouble(u'ERROR: unable to extract title')
1664 return
1665 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1666 video_title = sanitize_title(video_title)
31cbdaaf 1667 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1668
7e58d568
RG
1669 # Extract video description
1670 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1671 if mobj is None:
1672 self._downloader.trouble(u'ERROR: unable to extract video description')
1673 return
1674 video_description = mobj.group(1).decode('utf-8')
1675 if not video_description:
1676 video_description = 'No description available.'
1677
1678 # Extract video thumbnail
1679 if self._downloader.params.get('forcethumbnail', False):
1680 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1681 try:
1682 webpage = urllib2.urlopen(request).read()
1683 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1684 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1685 return
1686 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1687 if mobj is None:
1688 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1689 return
1690 video_thumbnail = mobj.group(1)
1691 else: # we need something to pass to process_info
1692 video_thumbnail = ''
1693
49c0028a 1694 try:
1695 # Process video information
1696 self._downloader.process_info({
1697 'id': video_id.decode('utf-8'),
1698 'url': video_url.decode('utf-8'),
6ba562b0 1699 'uploader': u'NA',
138b11f3 1700 'upload_date': u'NA',
490fd7ae 1701 'title': video_title,
31cbdaaf 1702 'stitle': simple_title,
49c0028a 1703 'ext': video_extension.decode('utf-8'),
6ba562b0 1704 'format': u'NA',
e616ec0c 1705 'player_url': None,
49c0028a 1706 })
73f4e7af 1707 except UnavailableVideoError:
09cc744c 1708 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 1709
1710
1711class PhotobucketIE(InfoExtractor):
1712 """Information extractor for photobucket.com."""
1713
1714 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
f3098c4d 1715 IE_NAME = u'photobucket'
49c0028a 1716
1717 def __init__(self, downloader=None):
1718 InfoExtractor.__init__(self, downloader)
1719
49c0028a 1720 def report_download_webpage(self, video_id):
1721 """Report webpage download."""
331ce0a0 1722 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
49c0028a 1723
1724 def report_extraction(self, video_id):
1725 """Report information extraction."""
331ce0a0 1726 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
49c0028a 1727
1728 def _real_initialize(self):
1729 return
1730
1731 def _real_extract(self, url):
1732 # Extract id from URL
1733 mobj = re.match(self._VALID_URL, url)
1734 if mobj is None:
1735 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1736 return
1737
df372a65 1738 # At this point we have a new video
9bf7fa52 1739 self._downloader.increment_downloads()
49c0028a 1740 video_id = mobj.group(1)
1741
1742 video_extension = 'flv'
1743
1744 # Retrieve video webpage to extract further information
1745 request = urllib2.Request(url)
1746 try:
1747 self.report_download_webpage(video_id)
1748 webpage = urllib2.urlopen(request).read()
1749 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1750 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1751 return
1752
1753 # Extract URL, uploader, and title from webpage
1754 self.report_extraction(video_id)
1755 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1756 if mobj is None:
1757 self._downloader.trouble(u'ERROR: unable to extract media URL')
1758 return
1759 mediaURL = urllib.unquote(mobj.group(1))
1760
1761 video_url = mediaURL
1762
1763 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1764 if mobj is None:
1765 self._downloader.trouble(u'ERROR: unable to extract title')
1766 return
1767 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1768 video_title = sanitize_title(video_title)
31cbdaaf 1769 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1770
1771 video_uploader = mobj.group(2).decode('utf-8')
1772
1773 try:
1774 # Process video information
1775 self._downloader.process_info({
1776 'id': video_id.decode('utf-8'),
1777 'url': video_url.decode('utf-8'),
490fd7ae 1778 'uploader': video_uploader,
138b11f3 1779 'upload_date': u'NA',
490fd7ae 1780 'title': video_title,
31cbdaaf 1781 'stitle': simple_title,
490fd7ae 1782 'ext': video_extension.decode('utf-8'),
6ba562b0 1783 'format': u'NA',
e616ec0c 1784 'player_url': None,
490fd7ae 1785 })
73f4e7af 1786 except UnavailableVideoError:
09cc744c 1787 self._downloader.trouble(u'\nERROR: unable to download video')
490fd7ae
RG
1788
1789
61945318
RG
1790class YahooIE(InfoExtractor):
1791 """Information extractor for video.yahoo.com."""
1792
1793 # _VALID_URL matches all Yahoo! Video URLs
1794 # _VPAGE_URL matches only the extractable '/watch/' URLs
1795 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1796 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
f3098c4d 1797 IE_NAME = u'video.yahoo'
61945318
RG
1798
1799 def __init__(self, downloader=None):
1800 InfoExtractor.__init__(self, downloader)
1801
61945318
RG
1802 def report_download_webpage(self, video_id):
1803 """Report webpage download."""
331ce0a0 1804 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
61945318
RG
1805
1806 def report_extraction(self, video_id):
1807 """Report information extraction."""
331ce0a0 1808 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
61945318
RG
1809
1810 def _real_initialize(self):
1811 return
1812
df372a65 1813 def _real_extract(self, url, new_video=True):
61945318
RG
1814 # Extract ID from URL
1815 mobj = re.match(self._VALID_URL, url)
1816 if mobj is None:
1817 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1818 return
1819
df372a65 1820 # At this point we have a new video
9bf7fa52 1821 self._downloader.increment_downloads()
61945318
RG
1822 video_id = mobj.group(2)
1823 video_extension = 'flv'
1824
1825 # Rewrite valid but non-extractable URLs as
1826 # extractable English language /watch/ URLs
1827 if re.match(self._VPAGE_URL, url) is None:
1828 request = urllib2.Request(url)
1829 try:
1830 webpage = urllib2.urlopen(request).read()
1831 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1832 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1833 return
1834
1835 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1836 if mobj is None:
1837 self._downloader.trouble(u'ERROR: Unable to extract id field')
1838 return
1839 yahoo_id = mobj.group(1)
1840
1841 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1842 if mobj is None:
1843 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1844 return
1845 yahoo_vid = mobj.group(1)
1846
1847 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
df372a65 1848 return self._real_extract(url, new_video=False)
61945318
RG
1849
1850 # Retrieve video webpage to extract further information
1851 request = urllib2.Request(url)
1852 try:
1853 self.report_download_webpage(video_id)
1854 webpage = urllib2.urlopen(request).read()
1855 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1856 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1857 return
1858
1859 # Extract uploader and title from webpage
1860 self.report_extraction(video_id)
1861 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1862 if mobj is None:
1863 self._downloader.trouble(u'ERROR: unable to extract video title')
1864 return
1865 video_title = mobj.group(1).decode('utf-8')
1866 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1867
1868 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1869 if mobj is None:
1870 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1871 return
1872 video_uploader = mobj.group(1).decode('utf-8')
1873
7e58d568
RG
1874 # Extract video thumbnail
1875 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1876 if mobj is None:
1877 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1878 return
1879 video_thumbnail = mobj.group(1).decode('utf-8')
1880
1881 # Extract video description
1882 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1883 if mobj is None:
1884 self._downloader.trouble(u'ERROR: unable to extract video description')
1885 return
1886 video_description = mobj.group(1).decode('utf-8')
c0a10ca8
F
1887 if not video_description:
1888 video_description = 'No description available.'
7e58d568 1889
61945318
RG
1890 # Extract video height and width
1891 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1892 if mobj is None:
1893 self._downloader.trouble(u'ERROR: unable to extract video height')
1894 return
1895 yv_video_height = mobj.group(1)
1896
1897 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1898 if mobj is None:
1899 self._downloader.trouble(u'ERROR: unable to extract video width')
1900 return
1901 yv_video_width = mobj.group(1)
1902
1903 # Retrieve video playlist to extract media URL
1904 # I'm not completely sure what all these options are, but we
1905 # seem to need most of them, otherwise the server sends a 401.
1906 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1907 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1908 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
c0a10ca8
F
1909 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1910 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
61945318
RG
1911 try:
1912 self.report_download_webpage(video_id)
1913 webpage = urllib2.urlopen(request).read()
1914 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1915 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1916 return
1917
1918 # Extract media URL from playlist XML
1919 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1920 if mobj is None:
1921 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1922 return
1923 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1924 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1925
1926 try:
1927 # Process video information
1928 self._downloader.process_info({
1929 'id': video_id.decode('utf-8'),
1930 'url': video_url,
1931 'uploader': video_uploader,
138b11f3 1932 'upload_date': u'NA',
61945318
RG
1933 'title': video_title,
1934 'stitle': simple_title,
1935 'ext': video_extension.decode('utf-8'),
7e58d568
RG
1936 'thumbnail': video_thumbnail.decode('utf-8'),
1937 'description': video_description,
1938 'thumbnail': video_thumbnail,
e616ec0c 1939 'player_url': None,
61945318 1940 })
73f4e7af 1941 except UnavailableVideoError:
09cc744c 1942 self._downloader.trouble(u'\nERROR: unable to download video')
61945318
RG
1943
1944
92743d42
RB
1945class VimeoIE(InfoExtractor):
1946 """Information extractor for vimeo.com."""
1947
1948 # _VALID_URL matches Vimeo URLs
44c636df 1949 _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
f3098c4d 1950 IE_NAME = u'vimeo'
92743d42
RB
1951
1952 def __init__(self, downloader=None):
1953 InfoExtractor.__init__(self, downloader)
1954
92743d42
RB
1955 def report_download_webpage(self, video_id):
1956 """Report webpage download."""
0ecedbdb 1957 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
92743d42
RB
1958
1959 def report_extraction(self, video_id):
1960 """Report information extraction."""
0ecedbdb 1961 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
92743d42
RB
1962
1963 def _real_initialize(self):
1964 return
1965
1966 def _real_extract(self, url, new_video=True):
1967 # Extract ID from URL
1968 mobj = re.match(self._VALID_URL, url)
1969 if mobj is None:
1970 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1971 return
1972
1973 # At this point we have a new video
1974 self._downloader.increment_downloads()
1975 video_id = mobj.group(1)
92743d42
RB
1976
1977 # Retrieve video webpage to extract further information
1978 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
1979 try:
1980 self.report_download_webpage(video_id)
1981 webpage = urllib2.urlopen(request).read()
1982 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1983 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1984 return
1985
f24c674b
RB
1986 # Now we begin extracting as much information as we can from what we
1987 # retrieved. First we extract the information common to all extractors,
1988 # and latter we extract those that are Vimeo specific.
92743d42 1989 self.report_extraction(video_id)
f24c674b
RB
1990
1991 # Extract title
c5a088d3 1992 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
92743d42
RB
1993 if mobj is None:
1994 self._downloader.trouble(u'ERROR: unable to extract video title')
1995 return
1996 video_title = mobj.group(1).decode('utf-8')
1997 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1998
f24c674b 1999 # Extract uploader
c5a088d3 2000 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
92743d42
RB
2001 if mobj is None:
2002 self._downloader.trouble(u'ERROR: unable to extract video uploader')
2003 return
2004 video_uploader = mobj.group(1).decode('utf-8')
2005
2006 # Extract video thumbnail
c5a088d3 2007 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
92743d42
RB
2008 if mobj is None:
2009 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
2010 return
2011 video_thumbnail = mobj.group(1).decode('utf-8')
2012
2013 # # Extract video description
2014 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
2015 # if mobj is None:
2016 # self._downloader.trouble(u'ERROR: unable to extract video description')
2017 # return
2018 # video_description = mobj.group(1).decode('utf-8')
2019 # if not video_description: video_description = 'No description available.'
2020 video_description = 'Foo.'
2021
f24c674b 2022 # Vimeo specific: extract request signature
c5a088d3 2023 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
92743d42
RB
2024 if mobj is None:
2025 self._downloader.trouble(u'ERROR: unable to extract request signature')
2026 return
2027 sig = mobj.group(1).decode('utf-8')
2028
f24c674b 2029 # Vimeo specific: Extract request signature expiration
c5a088d3 2030 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
92743d42
RB
2031 if mobj is None:
2032 self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2033 return
2034 sig_exp = mobj.group(1).decode('utf-8')
2035
2036 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
2037
2038 try:
2039 # Process video information
2040 self._downloader.process_info({
2041 'id': video_id.decode('utf-8'),
2042 'url': video_url,
2043 'uploader': video_uploader,
2044 'upload_date': u'NA',
2045 'title': video_title,
2046 'stitle': simple_title,
2fc31a48 2047 'ext': u'mp4',
92743d42
RB
2048 'thumbnail': video_thumbnail.decode('utf-8'),
2049 'description': video_description,
2050 'thumbnail': video_thumbnail,
2051 'description': video_description,
2052 'player_url': None,
2053 })
2054 except UnavailableVideoError:
2055 self._downloader.trouble(u'ERROR: unable to download video')
2056
2057
490fd7ae
RG
2058class GenericIE(InfoExtractor):
2059 """Generic last-resort information extractor."""
2060
f3098c4d
PH
2061 _VALID_URL = r'.*'
2062 IE_NAME = u'generic'
bdb3f7a7 2063
490fd7ae
RG
2064 def __init__(self, downloader=None):
2065 InfoExtractor.__init__(self, downloader)
2066
490fd7ae
RG
2067 def report_download_webpage(self, video_id):
2068 """Report webpage download."""
331ce0a0
RG
2069 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2070 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
490fd7ae
RG
2071
2072 def report_extraction(self, video_id):
2073 """Report information extraction."""
331ce0a0 2074 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
490fd7ae
RG
2075
2076 def _real_initialize(self):
2077 return
2078
2079 def _real_extract(self, url):
df372a65 2080 # At this point we have a new video
9bf7fa52 2081 self._downloader.increment_downloads()
df372a65 2082
490fd7ae
RG
2083 video_id = url.split('/')[-1]
2084 request = urllib2.Request(url)
2085 try:
2086 self.report_download_webpage(video_id)
2087 webpage = urllib2.urlopen(request).read()
2088 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2089 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2090 return
2091 except ValueError, err:
2092 # since this is the last-resort InfoExtractor, if
2093 # this error is thrown, it'll be thrown here
2094 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2095 return
2096
a9806fd8 2097 self.report_extraction(video_id)
490fd7ae
RG
2098 # Start with something easy: JW Player in SWFObject
2099 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2100 if mobj is None:
2101 # Broaden the search a little bit
2102 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2103 if mobj is None:
2104 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2105 return
2106
2107 # It's possible that one of the regexes
2108 # matched, but returned an empty group:
2109 if mobj.group(1) is None:
2110 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2111 return
2112
2113 video_url = urllib.unquote(mobj.group(1))
c0a10ca8 2114 video_id = os.path.basename(video_url)
490fd7ae
RG
2115
2116 # here's a fun little line of code for you:
2117 video_extension = os.path.splitext(video_id)[1][1:]
c0a10ca8 2118 video_id = os.path.splitext(video_id)[0]
490fd7ae
RG
2119
2120 # it's tempting to parse this further, but you would
2121 # have to take into account all the variations like
2122 # Video Title - Site Name
2123 # Site Name | Video Title
2124 # Video Title - Tagline | Site Name
2125 # and so on and so forth; it's just not practical
2126 mobj = re.search(r'<title>(.*)</title>', webpage)
2127 if mobj is None:
2128 self._downloader.trouble(u'ERROR: unable to extract title')
2129 return
2130 video_title = mobj.group(1).decode('utf-8')
2131 video_title = sanitize_title(video_title)
31cbdaaf 2132 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
490fd7ae
RG
2133
2134 # video uploader is domain name
2135 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2136 if mobj is None:
2137 self._downloader.trouble(u'ERROR: unable to extract title')
2138 return
2139 video_uploader = mobj.group(1).decode('utf-8')
2140
2141 try:
2142 # Process video information
2143 self._downloader.process_info({
2144 'id': video_id.decode('utf-8'),
2145 'url': video_url.decode('utf-8'),
2146 'uploader': video_uploader,
138b11f3 2147 'upload_date': u'NA',
490fd7ae 2148 'title': video_title,
31cbdaaf 2149 'stitle': simple_title,
49c0028a 2150 'ext': video_extension.decode('utf-8'),
6ba562b0 2151 'format': u'NA',
e616ec0c 2152 'player_url': None,
49c0028a 2153 })
73f4e7af 2154 except UnavailableVideoError, err:
09cc744c 2155 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 2156
2157
25af2bce
RG
2158class YoutubeSearchIE(InfoExtractor):
2159 """Information Extractor for YouTube search queries."""
bdb3f7a7 2160 _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
25af2bce
RG
2161 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2162 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
304a4d85 2163 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
25af2bce 2164 _youtube_ie = None
fd9288c3 2165 _max_youtube_results = 1000
f3098c4d 2166 IE_NAME = u'youtube:search'
25af2bce 2167
f995f712 2168 def __init__(self, youtube_ie, downloader=None):
25af2bce
RG
2169 InfoExtractor.__init__(self, downloader)
2170 self._youtube_ie = youtube_ie
d3975459 2171
25af2bce
RG
2172 def report_download_page(self, query, pagenum):
2173 """Report attempt to download playlist page with given number."""
490fd7ae 2174 query = query.decode(preferredencoding())
331ce0a0 2175 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
25af2bce
RG
2176
2177 def _real_initialize(self):
2178 self._youtube_ie.initialize()
d3975459 2179
25af2bce 2180 def _real_extract(self, query):
bdb3f7a7 2181 mobj = re.match(self._VALID_URL, query)
25af2bce 2182 if mobj is None:
147753eb 2183 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
6f21f686 2184 return
25af2bce
RG
2185
2186 prefix, query = query.split(':')
2187 prefix = prefix[8:]
c0a10ca8 2188 query = query.encode('utf-8')
f995f712 2189 if prefix == '':
6f21f686
RG
2190 self._download_n_results(query, 1)
2191 return
f995f712 2192 elif prefix == 'all':
6f21f686
RG
2193 self._download_n_results(query, self._max_youtube_results)
2194 return
f995f712 2195 else:
25af2bce 2196 try:
e1f18b8a 2197 n = long(prefix)
25af2bce 2198 if n <= 0:
147753eb 2199 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
6f21f686 2200 return
257453b9 2201 elif n > self._max_youtube_results:
c0a10ca8 2202 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
257453b9 2203 n = self._max_youtube_results
6f21f686
RG
2204 self._download_n_results(query, n)
2205 return
e1f18b8a 2206 except ValueError: # parsing prefix as integer fails
6f21f686
RG
2207 self._download_n_results(query, 1)
2208 return
25af2bce
RG
2209
2210 def _download_n_results(self, query, n):
2211 """Downloads a specified number of results for a query"""
2212
2213 video_ids = []
2214 already_seen = set()
2215 pagenum = 1
2216
2217 while True:
2218 self.report_download_page(query, pagenum)
a9633f14 2219 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2220 request = urllib2.Request(result_url)
25af2bce
RG
2221 try:
2222 page = urllib2.urlopen(request).read()
2223 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2224 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2225 return
25af2bce
RG
2226
2227 # Extract video identifiers
2228 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2229 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2230 if video_id not in already_seen:
2231 video_ids.append(video_id)
2232 already_seen.add(video_id)
2233 if len(video_ids) == n:
2234 # Specified n videos reached
25af2bce 2235 for id in video_ids:
6f21f686
RG
2236 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2237 return
25af2bce 2238
304a4d85 2239 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
25af2bce 2240 for id in video_ids:
6f21f686
RG
2241 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2242 return
25af2bce
RG
2243
2244 pagenum = pagenum + 1
2245
c0a10ca8 2246
7e58d568
RG
2247class GoogleSearchIE(InfoExtractor):
2248 """Information Extractor for Google Video search queries."""
bdb3f7a7 2249 _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
7e58d568
RG
2250 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2251 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2252 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2253 _google_ie = None
2254 _max_google_results = 1000
f3098c4d 2255 IE_NAME = u'video.google:search'
7e58d568
RG
2256
2257 def __init__(self, google_ie, downloader=None):
2258 InfoExtractor.__init__(self, downloader)
2259 self._google_ie = google_ie
d3975459 2260
7e58d568
RG
2261 def report_download_page(self, query, pagenum):
2262 """Report attempt to download playlist page with given number."""
2263 query = query.decode(preferredencoding())
331ce0a0 2264 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2265
2266 def _real_initialize(self):
2267 self._google_ie.initialize()
d3975459 2268
7e58d568 2269 def _real_extract(self, query):
bdb3f7a7 2270 mobj = re.match(self._VALID_URL, query)
7e58d568
RG
2271 if mobj is None:
2272 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2273 return
2274
2275 prefix, query = query.split(':')
2276 prefix = prefix[8:]
c0a10ca8 2277 query = query.encode('utf-8')
7e58d568
RG
2278 if prefix == '':
2279 self._download_n_results(query, 1)
2280 return
2281 elif prefix == 'all':
2282 self._download_n_results(query, self._max_google_results)
2283 return
2284 else:
2285 try:
2286 n = long(prefix)
2287 if n <= 0:
2288 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2289 return
2290 elif n > self._max_google_results:
c0a10ca8 2291 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
7e58d568
RG
2292 n = self._max_google_results
2293 self._download_n_results(query, n)
2294 return
2295 except ValueError: # parsing prefix as integer fails
2296 self._download_n_results(query, 1)
2297 return
2298
2299 def _download_n_results(self, query, n):
2300 """Downloads a specified number of results for a query"""
2301
2302 video_ids = []
2303 already_seen = set()
2304 pagenum = 1
2305
2306 while True:
2307 self.report_download_page(query, pagenum)
2308 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2309 request = urllib2.Request(result_url)
7e58d568
RG
2310 try:
2311 page = urllib2.urlopen(request).read()
2312 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2313 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2314 return
2315
2316 # Extract video identifiers
2317 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2318 video_id = mobj.group(1)
2319 if video_id not in already_seen:
2320 video_ids.append(video_id)
2321 already_seen.add(video_id)
2322 if len(video_ids) == n:
2323 # Specified n videos reached
2324 for id in video_ids:
2325 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2326 return
2327
2328 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2329 for id in video_ids:
2330 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2331 return
2332
2333 pagenum = pagenum + 1
2334
c0a10ca8 2335
7e58d568
RG
2336class YahooSearchIE(InfoExtractor):
2337 """Information Extractor for Yahoo! Video search queries."""
bdb3f7a7 2338 _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
7e58d568
RG
2339 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2340 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2341 _MORE_PAGES_INDICATOR = r'\s*Next'
2342 _yahoo_ie = None
2343 _max_yahoo_results = 1000
f3098c4d 2344 IE_NAME = u'video.yahoo:search'
7e58d568
RG
2345
2346 def __init__(self, yahoo_ie, downloader=None):
2347 InfoExtractor.__init__(self, downloader)
2348 self._yahoo_ie = yahoo_ie
d3975459 2349
7e58d568
RG
2350 def report_download_page(self, query, pagenum):
2351 """Report attempt to download playlist page with given number."""
2352 query = query.decode(preferredencoding())
331ce0a0 2353 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2354
2355 def _real_initialize(self):
2356 self._yahoo_ie.initialize()
d3975459 2357
7e58d568 2358 def _real_extract(self, query):
bdb3f7a7 2359 mobj = re.match(self._VALID_URL, query)
7e58d568
RG
2360 if mobj is None:
2361 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2362 return
2363
2364 prefix, query = query.split(':')
2365 prefix = prefix[8:]
c0a10ca8 2366 query = query.encode('utf-8')
7e58d568
RG
2367 if prefix == '':
2368 self._download_n_results(query, 1)
2369 return
2370 elif prefix == 'all':
2371 self._download_n_results(query, self._max_yahoo_results)
2372 return
2373 else:
2374 try:
2375 n = long(prefix)
2376 if n <= 0:
2377 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2378 return
2379 elif n > self._max_yahoo_results:
c0a10ca8 2380 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
7e58d568
RG
2381 n = self._max_yahoo_results
2382 self._download_n_results(query, n)
2383 return
2384 except ValueError: # parsing prefix as integer fails
2385 self._download_n_results(query, 1)
2386 return
2387
2388 def _download_n_results(self, query, n):
2389 """Downloads a specified number of results for a query"""
2390
2391 video_ids = []
2392 already_seen = set()
2393 pagenum = 1
2394
2395 while True:
2396 self.report_download_page(query, pagenum)
2397 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2398 request = urllib2.Request(result_url)
7e58d568
RG
2399 try:
2400 page = urllib2.urlopen(request).read()
2401 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2402 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2403 return
2404
2405 # Extract video identifiers
2406 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2407 video_id = mobj.group(1)
2408 if video_id not in already_seen:
2409 video_ids.append(video_id)
2410 already_seen.add(video_id)
2411 if len(video_ids) == n:
2412 # Specified n videos reached
2413 for id in video_ids:
2414 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2415 return
2416
2417 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2418 for id in video_ids:
2419 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2420 return
2421
2422 pagenum = pagenum + 1
2423
c0a10ca8 2424
0c2dc87d
RG
2425class YoutubePlaylistIE(InfoExtractor):
2426 """Information Extractor for YouTube playlists."""
2427
2152ee86 2428 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
f74e22ae 2429 _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
0c2dc87d 2430 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
ce5cafea 2431 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
0c2dc87d 2432 _youtube_ie = None
f3098c4d 2433 IE_NAME = u'youtube:playlist'
0c2dc87d
RG
2434
2435 def __init__(self, youtube_ie, downloader=None):
2436 InfoExtractor.__init__(self, downloader)
2437 self._youtube_ie = youtube_ie
d3975459 2438
0c2dc87d
RG
2439 def report_download_page(self, playlist_id, pagenum):
2440 """Report attempt to download playlist page with given number."""
331ce0a0 2441 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
0c2dc87d
RG
2442
2443 def _real_initialize(self):
2444 self._youtube_ie.initialize()
d3975459 2445
0c2dc87d
RG
2446 def _real_extract(self, url):
2447 # Extract playlist id
2448 mobj = re.match(self._VALID_URL, url)
2449 if mobj is None:
147753eb 2450 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
6f21f686 2451 return
0c2dc87d 2452
d119b54d
RG
2453 # Single video case
2454 if mobj.group(3) is not None:
2455 self._youtube_ie.extract(mobj.group(3))
2456 return
2457
0c2dc87d 2458 # Download playlist pages
f74e22ae
GI
2459 # prefix is 'p' as default for playlists but there are other types that need extra care
2460 playlist_prefix = mobj.group(1)
2461 if playlist_prefix == 'a':
2462 playlist_access = 'artist'
2463 else:
7cc3c6fd 2464 playlist_prefix = 'p'
f74e22ae
GI
2465 playlist_access = 'view_play_list'
2466 playlist_id = mobj.group(2)
0c2dc87d
RG
2467 video_ids = []
2468 pagenum = 1
2469
2470 while True:
2471 self.report_download_page(playlist_id, pagenum)
f74e22ae 2472 request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
0c2dc87d
RG
2473 try:
2474 page = urllib2.urlopen(request).read()
2475 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2476 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2477 return
0c2dc87d
RG
2478
2479 # Extract video identifiers
27d98b6e 2480 ids_in_page = []
0c2dc87d 2481 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
27d98b6e
RG
2482 if mobj.group(1) not in ids_in_page:
2483 ids_in_page.append(mobj.group(1))
2484 video_ids.extend(ids_in_page)
0c2dc87d 2485
ce5cafea 2486 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
0c2dc87d
RG
2487 break
2488 pagenum = pagenum + 1
2489
8cc44341
RG
2490 playliststart = self._downloader.params.get('playliststart', 1) - 1
2491 playlistend = self._downloader.params.get('playlistend', -1)
2492 video_ids = video_ids[playliststart:playlistend]
2493
0c2dc87d 2494 for id in video_ids:
6f21f686
RG
2495 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2496 return
0c2dc87d 2497
c0a10ca8 2498
c39c05cd
A
2499class YoutubeUserIE(InfoExtractor):
2500 """Information Extractor for YouTube users."""
2501
5aba6ea4 2502 _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
c39c05cd 2503 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
5aba6ea4
RG
2504 _GDATA_PAGE_SIZE = 50
2505 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2506 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
c39c05cd 2507 _youtube_ie = None
f3098c4d 2508 IE_NAME = u'youtube:user'
c39c05cd
A
2509
2510 def __init__(self, youtube_ie, downloader=None):
2511 InfoExtractor.__init__(self, downloader)
2512 self._youtube_ie = youtube_ie
d3975459 2513
5aba6ea4 2514 def report_download_page(self, username, start_index):
c39c05cd 2515 """Report attempt to download user page."""
5aba6ea4 2516 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
c0a10ca8 2517 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
c39c05cd
A
2518
2519 def _real_initialize(self):
2520 self._youtube_ie.initialize()
d3975459 2521
c39c05cd
A
2522 def _real_extract(self, url):
2523 # Extract username
2524 mobj = re.match(self._VALID_URL, url)
2525 if mobj is None:
2526 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2527 return
2528
c39c05cd 2529 username = mobj.group(1)
5aba6ea4
RG
2530
2531 # Download video ids using YouTube Data API. Result size per
2532 # query is limited (currently to 50 videos) so we need to query
2533 # page by page until there are no video ids - it means we got
2534 # all of them.
2535
c39c05cd 2536 video_ids = []
5aba6ea4 2537 pagenum = 0
c39c05cd 2538
5aba6ea4
RG
2539 while True:
2540 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2541 self.report_download_page(username, start_index)
c39c05cd 2542
5aba6ea4 2543 request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
c39c05cd 2544
5aba6ea4
RG
2545 try:
2546 page = urllib2.urlopen(request).read()
2547 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2548 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2549 return
c39c05cd 2550
5aba6ea4
RG
2551 # Extract video identifiers
2552 ids_in_page = []
2553
2554 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2555 if mobj.group(1) not in ids_in_page:
2556 ids_in_page.append(mobj.group(1))
2557
2558 video_ids.extend(ids_in_page)
2559
2560 # A little optimization - if current page is not
2561 # "full", ie. does not contain PAGE_SIZE video ids then
2562 # we can assume that this page is the last one - there
2563 # are no more ids on further pages - no need to query
2564 # again.
2565
2566 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2567 break
2568
2569 pagenum += 1
2570
2571 all_ids_count = len(video_ids)
8cc44341
RG
2572 playliststart = self._downloader.params.get('playliststart', 1) - 1
2573 playlistend = self._downloader.params.get('playlistend', -1)
204c9398 2574
5aba6ea4
RG
2575 if playlistend == -1:
2576 video_ids = video_ids[playliststart:]
2577 else:
2578 video_ids = video_ids[playliststart:playlistend]
7a9054ec 2579
5aba6ea4 2580 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
c0a10ca8 2581 (username, all_ids_count, len(video_ids)))
5aba6ea4
RG
2582
2583 for video_id in video_ids:
2584 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2585
c39c05cd 2586
27179cfd
VV
2587class DepositFilesIE(InfoExtractor):
2588 """Information extractor for depositfiles.com"""
2589
2590 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
f3098c4d 2591 IE_NAME = u'DepositFiles'
27179cfd
VV
2592
2593 def __init__(self, downloader=None):
2594 InfoExtractor.__init__(self, downloader)
2595
27179cfd
VV
2596 def report_download_webpage(self, file_id):
2597 """Report webpage download."""
2598 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2599
2600 def report_extraction(self, file_id):
2601 """Report information extraction."""
2602 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2603
2604 def _real_initialize(self):
2605 return
2606
2607 def _real_extract(self, url):
2608 # At this point we have a new file
2609 self._downloader.increment_downloads()
2610
2611 file_id = url.split('/')[-1]
2612 # Rebuild url in english locale
2613 url = 'http://depositfiles.com/en/files/' + file_id
2614
2615 # Retrieve file webpage with 'Free download' button pressed
2616 free_download_indication = { 'gateway_result' : '1' }
1987c232 2617 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
27179cfd
VV
2618 try:
2619 self.report_download_webpage(file_id)
2620 webpage = urllib2.urlopen(request).read()
2621 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2622 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2623 return
2624
2625 # Search for the real file URL
2626 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2627 if (mobj is None) or (mobj.group(1) is None):
2628 # Try to figure out reason of the error.
2629 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2630 if (mobj is not None) and (mobj.group(1) is not None):
2631 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2632 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2633 else:
2634 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2635 return
2636
2637 file_url = mobj.group(1)
2638 file_extension = os.path.splitext(file_url)[1][1:]
2639
2640 # Search for file title
2641 mobj = re.search(r'<b title="(.*?)">', webpage)
2642 if mobj is None:
2643 self._downloader.trouble(u'ERROR: unable to extract title')
2644 return
2645 file_title = mobj.group(1).decode('utf-8')
2646
2647 try:
2648 # Process file information
2649 self._downloader.process_info({
2650 'id': file_id.decode('utf-8'),
2651 'url': file_url.decode('utf-8'),
2652 'uploader': u'NA',
2653 'upload_date': u'NA',
2654 'title': file_title,
2655 'stitle': file_title,
2656 'ext': file_extension.decode('utf-8'),
2657 'format': u'NA',
2658 'player_url': None,
2659 })
2660 except UnavailableVideoError, err:
2661 self._downloader.trouble(u'ERROR: unable to download file')
2662
c0a10ca8 2663
9f5f9602
GI
2664class FacebookIE(InfoExtractor):
2665 """Information Extractor for Facebook"""
2666
2667 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2668 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2669 _NETRC_MACHINE = 'facebook'
2670 _available_formats = ['highqual', 'lowqual']
2671 _video_extensions = {
2672 'highqual': 'mp4',
2673 'lowqual': 'mp4',
2674 }
f3098c4d 2675 IE_NAME = u'facebook'
9f5f9602
GI
2676
2677 def __init__(self, downloader=None):
2678 InfoExtractor.__init__(self, downloader)
2679
9f5f9602
GI
2680 def _reporter(self, message):
2681 """Add header and report message."""
2682 self._downloader.to_screen(u'[facebook] %s' % message)
2683
2684 def report_login(self):
2685 """Report attempt to log in."""
2686 self._reporter(u'Logging in')
2687
2688 def report_video_webpage_download(self, video_id):
2689 """Report attempt to download video webpage."""
2690 self._reporter(u'%s: Downloading video webpage' % video_id)
2691
2692 def report_information_extraction(self, video_id):
2693 """Report attempt to extract video information."""
2694 self._reporter(u'%s: Extracting video information' % video_id)
2695
2696 def _parse_page(self, video_webpage):
2697 """Extract video information from page"""
2698 # General data
2699 data = {'title': r'class="video_title datawrap">(.*?)</',
2700 'description': r'<div class="datawrap">(.*?)</div>',
2701 'owner': r'\("video_owner_name", "(.*?)"\)',
2702 'upload_date': r'data-date="(.*?)"',
2703 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2704 }
2705 video_info = {}
2706 for piece in data.keys():
2707 mobj = re.search(data[piece], video_webpage)
2708 if mobj is not None:
2709 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2710
2711 # Video urls
2712 video_urls = {}
2713 for fmt in self._available_formats:
2714 mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2715 if mobj is not None:
2716 # URL is in a Javascript segment inside an escaped Unicode format within
2717 # the generally utf-8 page
2718 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2719 video_info['video_urls'] = video_urls
2720
2721 return video_info
2722
2723 def _real_initialize(self):
2724 if self._downloader is None:
2725 return
2726
2727 useremail = None
2728 password = None
2729 downloader_params = self._downloader.params
2730
2731 # Attempt to use provided username and password or .netrc data
2732 if downloader_params.get('username', None) is not None:
2733 useremail = downloader_params['username']
2734 password = downloader_params['password']
2735 elif downloader_params.get('usenetrc', False):
2736 try:
2737 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2738 if info is not None:
2739 useremail = info[0]
2740 password = info[2]
2741 else:
2742 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2743 except (IOError, netrc.NetrcParseError), err:
2744 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2745 return
2746
2747 if useremail is None:
2748 return
2749
2750 # Log in
2751 login_form = {
2752 'email': useremail,
2753 'pass': password,
2754 'login': 'Log+In'
2755 }
2756 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2757 try:
2758 self.report_login()
2759 login_results = urllib2.urlopen(request).read()
2760 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2761 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2762 return
2763 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2764 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2765 return
2766
2767 def _real_extract(self, url):
2768 mobj = re.match(self._VALID_URL, url)
2769 if mobj is None:
2770 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2771 return
2772 video_id = mobj.group('ID')
2773
2774 # Get video webpage
2775 self.report_video_webpage_download(video_id)
2776 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2777 try:
2778 page = urllib2.urlopen(request)
2779 video_webpage = page.read()
2780 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2781 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2782 return
2783
2784 # Start extracting information
2785 self.report_information_extraction(video_id)
2786
2787 # Extract information
2788 video_info = self._parse_page(video_webpage)
2789
2790 # uploader
2791 if 'owner' not in video_info:
2792 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2793 return
2794 video_uploader = video_info['owner']
2795
2796 # title
2797 if 'title' not in video_info:
2798 self._downloader.trouble(u'ERROR: unable to extract video title')
2799 return
2800 video_title = video_info['title']
2801 video_title = video_title.decode('utf-8')
2802 video_title = sanitize_title(video_title)
2803
2804 # simplified title
2805 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2806 simple_title = simple_title.strip(ur'_')
2807
2808 # thumbnail image
2809 if 'thumbnail' not in video_info:
2810 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2811 video_thumbnail = ''
2812 else:
2813 video_thumbnail = video_info['thumbnail']
2814
2815 # upload date
2816 upload_date = u'NA'
2817 if 'upload_date' in video_info:
2818 upload_time = video_info['upload_date']
2819 timetuple = email.utils.parsedate_tz(upload_time)
2820 if timetuple is not None:
2821 try:
2822 upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2823 except:
2824 pass
2825
2826 # description
8b95c387 2827 video_description = video_info.get('description', 'No description available.')
9f5f9602
GI
2828
2829 url_map = video_info['video_urls']
2830 if len(url_map.keys()) > 0:
2831 # Decide which formats to download
2832 req_format = self._downloader.params.get('format', None)
2833 format_limit = self._downloader.params.get('format_limit', None)
2834
2835 if format_limit is not None and format_limit in self._available_formats:
2836 format_list = self._available_formats[self._available_formats.index(format_limit):]
2837 else:
2838 format_list = self._available_formats
2839 existing_formats = [x for x in format_list if x in url_map]
2840 if len(existing_formats) == 0:
2841 self._downloader.trouble(u'ERROR: no known formats available for video')
2842 return
2843 if req_format is None:
2844 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
c52b01f3
K
2845 elif req_format == 'worst':
2846 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
9f5f9602
GI
2847 elif req_format == '-1':
2848 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2849 else:
2850 # Specific format
2851 if req_format not in url_map:
2852 self._downloader.trouble(u'ERROR: requested format not available')
2853 return
2854 video_url_list = [(req_format, url_map[req_format])] # Specific format
2855
2856 for format_param, video_real_url in video_url_list:
2857
2858 # At this point we have a new video
2859 self._downloader.increment_downloads()
2860
2861 # Extension
2862 video_extension = self._video_extensions.get(format_param, 'mp4')
2863
9f5f9602
GI
2864 try:
2865 # Process video information
2866 self._downloader.process_info({
2867 'id': video_id.decode('utf-8'),
2868 'url': video_real_url.decode('utf-8'),
2869 'uploader': video_uploader.decode('utf-8'),
2870 'upload_date': upload_date,
2871 'title': video_title,
2872 'stitle': simple_title,
2873 'ext': video_extension.decode('utf-8'),
2874 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2875 'thumbnail': video_thumbnail.decode('utf-8'),
2876 'description': video_description.decode('utf-8'),
2877 'player_url': None,
2878 })
2879 except UnavailableVideoError, err:
2880 self._downloader.trouble(u'\nERROR: unable to download video')
2881
7745f5d8
PH
2882class BlipTVIE(InfoExtractor):
2883 """Information extractor for blip.tv"""
2884
1cab2c6d 2885 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
7745f5d8 2886 _URL_EXT = r'^.*\.([a-z0-9]+)$'
f3098c4d 2887 IE_NAME = u'blip.tv'
7745f5d8 2888
7745f5d8
PH
2889 def report_extraction(self, file_id):
2890 """Report information extraction."""
aded78d9 2891 self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
7745f5d8
PH
2892
2893 def _simplify_title(self, title):
2894 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
2895 res = res.strip(ur'_')
2896 return res
2897
2898 def _real_extract(self, url):
2899 mobj = re.match(self._VALID_URL, url)
2900 if mobj is None:
2901 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2902 return
2903
1293ce58
PH
2904 if '?' in url:
2905 cchar = '&'
2906 else:
2907 cchar = '?'
2908 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
7745f5d8 2909 request = urllib2.Request(json_url)
aded78d9 2910 self.report_extraction(mobj.group(1))
7745f5d8
PH
2911 try:
2912 json_code = urllib2.urlopen(request).read()
2913 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2914 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2915 return
2916 try:
2917 json_data = json.loads(json_code)
1293ce58
PH
2918 if 'Post' in json_data:
2919 data = json_data['Post']
2920 else:
2921 data = json_data
7745f5d8
PH
2922
2923 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2924 video_url = data['media']['url']
2925 umobj = re.match(self._URL_EXT, video_url)
2926 if umobj is None:
2927 raise ValueError('Can not determine filename extension')
2928 ext = umobj.group(1)
2929
a1cab7ce
PH
2930 self._downloader.increment_downloads()
2931
7745f5d8
PH
2932 info = {
2933 'id': data['item_id'],
2934 'url': video_url,
2935 'uploader': data['display_name'],
2936 'upload_date': upload_date,
2937 'title': data['title'],
2938 'stitle': self._simplify_title(data['title']),
2939 'ext': ext,
2940 'format': data['media']['mimeType'],
2941 'thumbnail': data['thumbnailUrl'],
2942 'description': data['description'],
2943 'player_url': data['embedUrl']
2944 }
2945 except (ValueError,KeyError), err:
aded78d9 2946 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
7745f5d8
PH
2947 return
2948
2949 try:
2950 self._downloader.process_info(info)
2951 except UnavailableVideoError, err:
2952 self._downloader.trouble(u'\nERROR: unable to download video')
2953
2954
9b0a8bc1
PH
2955class MyVideoIE(InfoExtractor):
2956 """Information Extractor for myvideo.de."""
2957
2958 _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
f3098c4d 2959 IE_NAME = u'myvideo'
9b0a8bc1
PH
2960
2961 def __init__(self, downloader=None):
2962 InfoExtractor.__init__(self, downloader)
2963
9b0a8bc1
PH
2964 def report_download_webpage(self, video_id):
2965 """Report webpage download."""
2966 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
2967
2968 def report_extraction(self, video_id):
2969 """Report information extraction."""
2970 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
2971
2972 def _real_initialize(self):
2973 return
2974
2975 def _real_extract(self,url):
2976 mobj = re.match(self._VALID_URL, url)
2977 if mobj is None:
2978 self._download.trouble(u'ERROR: invalid URL: %s' % url)
2979 return
2980
2981 video_id = mobj.group(1)
2982 simple_title = mobj.group(2).decode('utf-8')
2983 # should actually not be necessary
2984 simple_title = sanitize_title(simple_title)
2985 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
2986
2987 # Get video webpage
2988 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
2989 try:
2990 self.report_download_webpage(video_id)
2991 webpage = urllib2.urlopen(request).read()
2992 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2993 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2994 return
2995
2996 self.report_extraction(video_id)
2997 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
2998 webpage)
2999 if mobj is None:
3000 self._downloader.trouble(u'ERROR: unable to extract media URL')
3001 return
3002 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3003
3004 mobj = re.search('<title>([^<]+)</title>', webpage)
3005 if mobj is None:
3006 self._downloader.trouble(u'ERROR: unable to extract title')
3007 return
3008
3009 video_title = mobj.group(1)
3010 video_title = sanitize_title(video_title)
3011
3012 try:
3013 print(video_url)
3014 self._downloader.process_info({
3015 'id': video_id,
3016 'url': video_url,
3017 'uploader': u'NA',
3018 'upload_date': u'NA',
3019 'title': video_title,
3020 'stitle': simple_title,
3021 'ext': u'flv',
3022 'format': u'NA',
3023 'player_url': None,
3024 })
3025 except UnavailableVideoError:
3026 self._downloader.trouble(u'\nERROR: Unable to download video')
3027
c8e30044 3028class ComedyCentralIE(InfoExtractor):
f166bccc 3029 """Information extractor for The Daily Show and Colbert Report """
c8e30044 3030
f3098c4d
PH
3031 _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3032 IE_NAME = u'comedycentral'
c8e30044 3033
c8e30044
PH
3034 def report_extraction(self, episode_id):
3035 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3036
3037 def report_config_download(self, episode_id):
3038 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3039
b487ef08
PH
3040 def report_index_download(self, episode_id):
3041 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
3042
fedf9f39
PH
3043 def report_player_url(self, episode_id):
3044 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3045
c8e30044
PH
3046 def _simplify_title(self, title):
3047 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3048 res = res.strip(ur'_')
3049 return res
3050
3051 def _real_extract(self, url):
3052 mobj = re.match(self._VALID_URL, url)
3053 if mobj is None:
3054 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3055 return
f166bccc
PH
3056
3057 if mobj.group('shortname'):
3058 if mobj.group('shortname') in ('tds', 'thedailyshow'):
3059 url = 'http://www.thedailyshow.com/full-episodes/'
3060 else:
3061 url = 'http://www.colbertnation.com/full-episodes/'
3062 mobj = re.match(self._VALID_URL, url)
3063 assert mobj is not None
3064
3065 dlNewest = not mobj.group('episode')
3066 if dlNewest:
3067 epTitle = mobj.group('showname')
3068 else:
3069 epTitle = mobj.group('episode')
c8e30044
PH
3070
3071 req = urllib2.Request(url)
3072 self.report_extraction(epTitle)
3073 try:
f166bccc
PH
3074 htmlHandle = urllib2.urlopen(req)
3075 html = htmlHandle.read()
c8e30044
PH
3076 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3077 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3078 return
f166bccc
PH
3079 if dlNewest:
3080 url = htmlHandle.geturl()
3081 mobj = re.match(self._VALID_URL, url)
3082 if mobj is None:
3083 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3084 return
3085 if mobj.group('episode') == '':
3086 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3087 return
3088 epTitle = mobj.group('episode')
c8e30044 3089
b487ef08 3090 mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html)
c8e30044
PH
3091 if len(mMovieParams) == 0:
3092 self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3093 return
b487ef08
PH
3094
3095 playerUrl_raw = mMovieParams[0][0]
fedf9f39
PH
3096 self.report_player_url(epTitle)
3097 try:
b487ef08
PH
3098 urlHandle = urllib2.urlopen(playerUrl_raw)
3099 playerUrl = urlHandle.geturl()
3100 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3101 self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
3102 return
3103
3104 uri = mMovieParams[0][1]
3105 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
3106 self.report_index_download(epTitle)
3107 try:
3108 indexXml = urllib2.urlopen(indexUrl).read()
fedf9f39 3109 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
b487ef08 3110 self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
fedf9f39 3111 return
fedf9f39 3112
b487ef08
PH
3113 idoc = xml.etree.ElementTree.fromstring(indexXml)
3114 itemEls = idoc.findall('.//item')
3115 for itemEl in itemEls:
3116 mediaId = itemEl.findall('./guid')[0].text
3117 shortMediaId = mediaId.split(':')[-1]
3118 showId = mediaId.split(':')[-2].replace('.com', '')
3119 officialTitle = itemEl.findall('./title')[0].text
3120 officialDate = itemEl.findall('./pubDate')[0].text
3121
c8e30044
PH
3122 configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3123 urllib.urlencode({'uri': mediaId}))
3124 configReq = urllib2.Request(configUrl)
3125 self.report_config_download(epTitle)
3126 try:
3127 configXml = urllib2.urlopen(configReq).read()
3128 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3129 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3130 return
46c8c432 3131
c8e30044
PH
3132 cdoc = xml.etree.ElementTree.fromstring(configXml)
3133 turls = []
3134 for rendition in cdoc.findall('.//rendition'):
3135 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3136 turls.append(finfo)
3137
a88bc6bb 3138 if len(turls) == 0:
b487ef08 3139 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
a88bc6bb
PH
3140 continue
3141
c8e30044
PH
3142 # For now, just pick the highest bitrate
3143 format,video_url = turls[-1]
3144
3145 self._downloader.increment_downloads()
a88bc6bb 3146
b487ef08 3147 effTitle = showId + '-' + epTitle
c8e30044 3148 info = {
b487ef08 3149 'id': shortMediaId,
c8e30044 3150 'url': video_url,
b487ef08
PH
3151 'uploader': showId,
3152 'upload_date': officialDate,
a88bc6bb
PH
3153 'title': effTitle,
3154 'stitle': self._simplify_title(effTitle),
c8e30044
PH
3155 'ext': 'mp4',
3156 'format': format,
3157 'thumbnail': None,
b487ef08
PH
3158 'description': officialTitle,
3159 'player_url': playerUrl
c8e30044 3160 }
46c8c432 3161
c8e30044
PH
3162 try:
3163 self._downloader.process_info(info)
3164 except UnavailableVideoError, err:
b487ef08 3165 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
a88bc6bb 3166 continue
c8e30044
PH
3167
3168
f9c68787
PH
3169class EscapistIE(InfoExtractor):
3170 """Information extractor for The Escapist """
3171
3172 _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?].*$'
f3098c4d 3173 IE_NAME = u'escapist'
f9c68787 3174
f9c68787
PH
3175 def report_extraction(self, showName):
3176 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
3177
3178 def report_config_download(self, showName):
3179 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
3180
3181 def _simplify_title(self, title):
3182 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3183 res = res.strip(ur'_')
3184 return res
3185
3186 def _real_extract(self, url):
3187 htmlParser = HTMLParser.HTMLParser()
3188
3189 mobj = re.match(self._VALID_URL, url)
3190 if mobj is None:
3191 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3192 return
3193 showName = mobj.group('showname')
3194 videoId = mobj.group('episode')
3195
3196 self.report_extraction(showName)
3197 try:
3198 webPage = urllib2.urlopen(url).read()
3199 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3200 self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
3201 return
3202
3203 descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
3204 description = htmlParser.unescape(descMatch.group(1))
3205 imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
3206 imgUrl = htmlParser.unescape(imgMatch.group(1))
3207 playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
3208 playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
3209 configUrlMatch = re.search('config=(.*)$', playerUrl)
3210 configUrl = urllib2.unquote(configUrlMatch.group(1))
3211
3212 self.report_config_download(showName)
3213 try:
3214 configJSON = urllib2.urlopen(configUrl).read()
3215 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3216 self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
3217 return
3218
3219 # Technically, it's JavaScript, not JSON
3220 configJSON = configJSON.replace("'", '"')
3221
3222 try:
3223 config = json.loads(configJSON)
3224 except (ValueError,), err:
3225 self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
3226 return
3227
3228 playlist = config['playlist']
3229 videoUrl = playlist[1]['url']
3230
3231 self._downloader.increment_downloads()
3232 info = {
3233 'id': videoId,
3234 'url': videoUrl,
3235 'uploader': showName,
3236 'upload_date': None,
3237 'title': showName,
3238 'stitle': self._simplify_title(showName),
3239 'ext': 'flv',
3240 'format': 'flv',
3241 'thumbnail': imgUrl,
3242 'description': description,
3243 'player_url': playerUrl,
3244 }
3245
3246 try:
3247 self._downloader.process_info(info)
3248 except UnavailableVideoError, err:
3249 self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
3250
3251
3252
65cd34c5
RG
3253class PostProcessor(object):
3254 """Post Processor class.
3255
3256 PostProcessor objects can be added to downloaders with their
3257 add_post_processor() method. When the downloader has finished a
3258 successful download, it will take its internal chain of PostProcessors
3259 and start calling the run() method on each one of them, first with
3260 an initial argument and then with the returned value of the previous
3261 PostProcessor.
3262
3263 The chain will be stopped if one of them ever returns None or the end
3264 of the chain is reached.
3265
3266 PostProcessor objects follow a "mutual registration" process similar
3267 to InfoExtractor objects.
3268 """
3269
3270 _downloader = None
3271
3272 def __init__(self, downloader=None):
3273 self._downloader = downloader
3274
65cd34c5
RG
3275 def set_downloader(self, downloader):
3276 """Sets the downloader for this PP."""
3277 self._downloader = downloader
d3975459 3278
65cd34c5
RG
3279 def run(self, information):
3280 """Run the PostProcessor.
3281
3282 The "information" argument is a dictionary like the ones
2f11508a 3283 composed by InfoExtractors. The only difference is that this
65cd34c5
RG
3284 one has an extra field called "filepath" that points to the
3285 downloaded file.
3286
3287 When this method returns None, the postprocessing chain is
3288 stopped. However, this method may return an information
3289 dictionary that will be passed to the next postprocessing
3290 object in the chain. It can be the one it received after
3291 changing some fields.
3292
3293 In addition, this method may raise a PostProcessingError
3294 exception that will be taken into account by the downloader
3295 it was called from.
3296 """
3297 return information # by default, do nothing
d3975459 3298
c0a10ca8 3299
3072fab1
RG
3300class FFmpegExtractAudioPP(PostProcessor):
3301
c99dcbd2 3302 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
3072fab1
RG
3303 PostProcessor.__init__(self, downloader)
3304 if preferredcodec is None:
3305 preferredcodec = 'best'
3306 self._preferredcodec = preferredcodec
18b7f874 3307 self._preferredquality = preferredquality
3308 self._keepvideo = keepvideo
3072fab1
RG
3309
3310 @staticmethod
3311 def get_audio_codec(path):
da273188 3312 try:
2727dbf7
RG
3313 cmd = ['ffprobe', '-show_streams', '--', path]
3314 handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
da273188
RG
3315 output = handle.communicate()[0]
3316 if handle.wait() != 0:
3317 return None
3318 except (IOError, OSError):
3072fab1
RG
3319 return None
3320 audio_codec = None
3321 for line in output.split('\n'):
3322 if line.startswith('codec_name='):
3323 audio_codec = line.split('=')[1].strip()
3324 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
3325 return audio_codec
3326 return None
3327
3328 @staticmethod
3329 def run_ffmpeg(path, out_path, codec, more_opts):
3330 try:
2727dbf7
RG
3331 cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
3332 ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
3072fab1
RG
3333 return (ret == 0)
3334 except (IOError, OSError):
3335 return False
3336
3337 def run(self, information):
3338 path = information['filepath']
3339
3340 filecodec = self.get_audio_codec(path)
3341 if filecodec is None:
da273188 3342 self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
3072fab1
RG
3343 return None
3344
3345 more_opts = []
3346 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
3347 if filecodec == 'aac' or filecodec == 'mp3':
3348 # Lossless if possible
3349 acodec = 'copy'
3350 extension = filecodec
3351 if filecodec == 'aac':
3352 more_opts = ['-f', 'adts']
3353 else:
3354 # MP3 otherwise.
3355 acodec = 'libmp3lame'
3356 extension = 'mp3'
c99dcbd2
PH
3357 more_opts = []
3358 if self._preferredquality is not None:
3359 more_opts += ['-ab', self._preferredquality]
3072fab1
RG
3360 else:
3361 # We convert the audio (lossy)
3362 acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
3363 extension = self._preferredcodec
c99dcbd2
PH
3364 more_opts = []
3365 if self._preferredquality is not None:
3366 more_opts += ['-ab', self._preferredquality]
3072fab1
RG
3367 if self._preferredcodec == 'aac':
3368 more_opts += ['-f', 'adts']
3369
3370 (prefix, ext) = os.path.splitext(path)
3371 new_path = prefix + '.' + extension
3372 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
3373 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
3374
3375 if not status:
1bd92582 3376 self._downloader.to_stderr(u'WARNING: error running ffmpeg')
3072fab1
RG
3377 return None
3378
36597dc4
K
3379 # Try to update the date time for extracted audio file.
3380 if information.get('filetime') is not None:
3381 try:
3382 os.utime(new_path, (time.time(), information['filetime']))
3383 except:
3384 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
3385
18b7f874 3386 if not self._keepvideo:
3387 try:
3388 os.remove(path)
3389 except (IOError, OSError):
3390 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
3391 return None
3072fab1
RG
3392
3393 information['filepath'] = new_path
3394 return information
3395
5fb3df4a
GV
3396
3397def updateSelf(downloader, filename):
3398 ''' Update the program file with the latest version from the repository '''
3399 # Note: downloader only used for options
3400 if not os.access(filename, os.W_OK):
3401 sys.exit('ERROR: no write permissions on %s' % filename)
3402
d207e7cf 3403 downloader.to_screen('Updating to latest version...')
5fb3df4a 3404
4fa74b52 3405 try:
d207e7cf
PH
3406 try:
3407 urlh = urllib.urlopen(UPDATE_URL)
3408 newcontent = urlh.read()
3409 finally:
3410 urlh.close()
5fb3df4a
GV
3411 except (IOError, OSError), err:
3412 sys.exit('ERROR: unable to download latest version')
f9f1e798 3413
5fb3df4a 3414 try:
d207e7cf
PH
3415 outf = open(filename, 'wb')
3416 try:
3417 outf.write(newcontent)
3418 finally:
3419 outf.close()
5fb3df4a
GV
3420 except (IOError, OSError), err:
3421 sys.exit('ERROR: unable to overwrite current version')
4bec29ef 3422
d207e7cf 3423 downloader.to_screen('Updated youtube-dl. Restart to use the new version.')
80066952 3424
4f9f96f6
GV
3425def parseOpts():
3426 # Deferred imports
3427 import getpass
3428 import optparse
e7cf18cb 3429
4f9f96f6
GV
3430 def _format_option_string(option):
3431 ''' ('-o', '--option') -> -o, --format METAVAR'''
80066952 3432
4f9f96f6
GV
3433 opts = []
3434
3435 if option._short_opts: opts.append(option._short_opts[0])
3436 if option._long_opts: opts.append(option._long_opts[0])
3437 if len(opts) > 1: opts.insert(1, ', ')
3438
3439 if option.takes_value(): opts.append(' %s' % option.metavar)
3440
3441 return "".join(opts)
3442
6a4f0a11
GV
3443 def _find_term_columns():
3444 columns = os.environ.get('COLUMNS', None)
2c8d32de
PH
3445 if columns:
3446 return int(columns)
3447
4f2a5e06
PH
3448 try:
3449 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3450 out,err = sp.communicate()
eb0387a8 3451 return int(out.split()[1])
4f2a5e06
PH
3452 except:
3453 pass
2c8d32de 3454 return None
6a4f0a11 3455
51c8e53f
GV
3456 max_width = 80
3457 max_help_position = 80
3458
3459 # No need to wrap help messages if we're on a wide console
6a4f0a11 3460 columns = _find_term_columns()
51c8e53f
GV
3461 if columns: max_width = columns
3462
3463 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
4f9f96f6
GV
3464 fmt.format_option_strings = _format_option_string
3465
3466 kw = {
3467 'version' : __version__,
3468 'formatter' : fmt,
a2f7e3a5 3469 'usage' : '%prog [options] url [url...]',
4f9f96f6
GV
3470 'conflict_handler' : 'resolve',
3471 }
3472
3473 parser = optparse.OptionParser(**kw)
3474
3475 # option groups
3476 general = optparse.OptionGroup(parser, 'General Options')
20e91e83 3477 selection = optparse.OptionGroup(parser, 'Video Selection')
4f9f96f6
GV
3478 authentication = optparse.OptionGroup(parser, 'Authentication Options')
3479 video_format = optparse.OptionGroup(parser, 'Video Format Options')
3480 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
3481 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
3482 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
3483
3484 general.add_option('-h', '--help',
3485 action='help', help='print this help text and exit')
3486 general.add_option('-v', '--version',
3487 action='version', help='print program version and exit')
3488 general.add_option('-U', '--update',
e0e56865 3489 action='store_true', dest='update_self', help='update this program to latest version')
4f9f96f6
GV
3490 general.add_option('-i', '--ignore-errors',
3491 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
3492 general.add_option('-r', '--rate-limit',
3493 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
3494 general.add_option('-R', '--retries',
3495 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
4f9f96f6
GV
3496 general.add_option('--dump-user-agent',
3497 action='store_true', dest='dump_user_agent',
3498 help='display the current browser identification', default=False)
f3098c4d
PH
3499 general.add_option('--list-extractors',
3500 action='store_true', dest='list_extractors',
3501 help='List all supported extractors and the URLs they would handle', default=False)
4f9f96f6 3502
20e91e83
ABP
3503 selection.add_option('--playlist-start',
3504 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
3505 selection.add_option('--playlist-end',
3506 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
3507 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
3508 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
3509
4f9f96f6
GV
3510 authentication.add_option('-u', '--username',
3511 dest='username', metavar='USERNAME', help='account username')
3512 authentication.add_option('-p', '--password',
3513 dest='password', metavar='PASSWORD', help='account password')
3514 authentication.add_option('-n', '--netrc',
3515 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
3516
3517
3518 video_format.add_option('-f', '--format',
3519 action='store', dest='format', metavar='FORMAT', help='video format code')
3520 video_format.add_option('--all-formats',
5260e68f 3521 action='store_const', dest='format', help='download all available video formats', const='all')
4f9f96f6
GV
3522 video_format.add_option('--max-quality',
3523 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
3524
3525
3526 verbosity.add_option('-q', '--quiet',
3527 action='store_true', dest='quiet', help='activates quiet mode', default=False)
3528 verbosity.add_option('-s', '--simulate',
9b4556c4
PH
3529 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
3530 verbosity.add_option('--skip-download',
3531 action='store_true', dest='skip_download', help='do not download the video', default=False)
4f9f96f6
GV
3532 verbosity.add_option('-g', '--get-url',
3533 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
3534 verbosity.add_option('-e', '--get-title',
3535 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
3536 verbosity.add_option('--get-thumbnail',
3537 action='store_true', dest='getthumbnail',
3538 help='simulate, quiet but print thumbnail URL', default=False)
3539 verbosity.add_option('--get-description',
3540 action='store_true', dest='getdescription',
3541 help='simulate, quiet but print video description', default=False)
3542 verbosity.add_option('--get-filename',
3543 action='store_true', dest='getfilename',
3544 help='simulate, quiet but print output filename', default=False)
da0db53a
DH
3545 verbosity.add_option('--get-format',
3546 action='store_true', dest='getformat',
3547 help='simulate, quiet but print output format', default=False)
4f9f96f6
GV
3548 verbosity.add_option('--no-progress',
3549 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
3550 verbosity.add_option('--console-title',
3551 action='store_true', dest='consoletitle',
3552 help='display progress in console titlebar', default=False)
3553
3554
3555 filesystem.add_option('-t', '--title',
3556 action='store_true', dest='usetitle', help='use title in file name', default=False)
3557 filesystem.add_option('-l', '--literal',
3558 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
3559 filesystem.add_option('-A', '--auto-number',
3560 action='store_true', dest='autonumber',
3561 help='number downloaded files starting from 00000', default=False)
3562 filesystem.add_option('-o', '--output',
3563 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
3564 filesystem.add_option('-a', '--batch-file',
3565 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
3566 filesystem.add_option('-w', '--no-overwrites',
3567 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
3568 filesystem.add_option('-c', '--continue',
3569 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
3570 filesystem.add_option('--cookies',
3571 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
3572 filesystem.add_option('--no-part',
3573 action='store_true', dest='nopart', help='do not use .part files', default=False)
3574 filesystem.add_option('--no-mtime',
3575 action='store_false', dest='updatetime',
3576 help='do not use the Last-modified header to set the file modification time', default=True)
2c8d32de
PH
3577 filesystem.add_option('--write-description',
3578 action='store_true', dest='writedescription',
3579 help='write video description to a .description file', default=False)
3580 filesystem.add_option('--write-info-json',
3581 action='store_true', dest='writeinfojson',
3582 help='write video metadata to a .info.json file', default=False)
4f9f96f6
GV
3583
3584
3585 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
3586 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
3587 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
3588 help='"best", "aac" or "mp3"; best by default')
c99dcbd2
PH
3589 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
3590 help='ffmpeg audio bitrate specification, 128k by default')
3591 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
3592 help='keeps the video file on disk after the post-processing; the video is erased by default')
4f9f96f6
GV
3593
3594
3595 parser.add_option_group(general)
20e91e83 3596 parser.add_option_group(selection)
4f9f96f6
GV
3597 parser.add_option_group(filesystem)
3598 parser.add_option_group(verbosity)
3599 parser.add_option_group(video_format)
3600 parser.add_option_group(authentication)
3601 parser.add_option_group(postproc)
3602
3603 opts, args = parser.parse_args()
3604
3605 return parser, opts, args
3606
f3098c4d
PH
3607def gen_extractors():
3608 """ Return a list of an instance of every supported extractor.
3609 The order does matter; the first extractor matched is the one handling the URL.
3610 """
3611 youtube_ie = YoutubeIE()
3612 google_ie = GoogleIE()
3613 yahoo_ie = YahooIE()
3614 return [
3615 youtube_ie,
3616 MetacafeIE(youtube_ie),
3617 DailymotionIE(),
3618 YoutubePlaylistIE(youtube_ie),
3619 YoutubeUserIE(youtube_ie),
3620 YoutubeSearchIE(youtube_ie),
3621 google_ie,
3622 GoogleSearchIE(google_ie),
3623 PhotobucketIE(),
3624 yahoo_ie,
3625 YahooSearchIE(yahoo_ie),
3626 DepositFilesIE(),
3627 FacebookIE(),
3628 BlipTVIE(),
3629 VimeoIE(),
3630 MyVideoIE(),
3631 ComedyCentralIE(),
3632 EscapistIE(),
3633
3634 GenericIE()
3635 ]
3636
5adcaa43
GV
3637def main():
3638 parser, opts, args = parseOpts()
4f9f96f6 3639
5adcaa43
GV
3640 # Open appropriate CookieJar
3641 if opts.cookiefile is None:
3642 jar = cookielib.CookieJar()
3643 else:
8cc44341 3644 try:
5adcaa43
GV
3645 jar = cookielib.MozillaCookieJar(opts.cookiefile)
3646 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
3647 jar.load()
3648 except (IOError, OSError), err:
3649 sys.exit(u'ERROR: unable to open cookie file')
80066952 3650
5adcaa43
GV
3651 # Dump user agent
3652 if opts.dump_user_agent:
3653 print std_headers['User-Agent']
3654 sys.exit(0)
e7cf18cb 3655
5adcaa43
GV
3656 # Batch file verification
3657 batchurls = []
3658 if opts.batchfile is not None:
8cc44341 3659 try:
5adcaa43
GV
3660 if opts.batchfile == '-':
3661 batchfd = sys.stdin
4bec29ef 3662 else:
5adcaa43
GV
3663 batchfd = open(opts.batchfile, 'r')
3664 batchurls = batchfd.readlines()
3665 batchurls = [x.strip() for x in batchurls]
3666 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
3667 except IOError:
3668 sys.exit(u'ERROR: batch file could not be read')
3669 all_urls = batchurls + args
3670
f3098c4d
PH
3671 # General configuration
3672 cookie_processor = urllib2.HTTPCookieProcessor(jar)
3673 opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
3674 urllib2.install_opener(opener)
3675 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
3676
3677 extractors = gen_extractors()
3678
3679 if opts.list_extractors:
3680 for ie in extractors:
3681 print(ie.IE_NAME)
3682 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
3683 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
3684 for mu in matchedUrls:
3685 print(u' ' + mu)
3686 sys.exit(0)
3687
5adcaa43
GV
3688 # Conflicting, missing and erroneous options
3689 if opts.usenetrc and (opts.username is not None or opts.password is not None):
3690 parser.error(u'using .netrc conflicts with giving username/password')
3691 if opts.password is not None and opts.username is None:
3692 parser.error(u'account username missing')
3693 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
3694 parser.error(u'using output template conflicts with using title, literal title or auto number')
3695 if opts.usetitle and opts.useliteral:
3696 parser.error(u'using title conflicts with using literal title')
3697 if opts.username is not None and opts.password is None:
3698 opts.password = getpass.getpass(u'Type account password and press return:')
3699 if opts.ratelimit is not None:
3700 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
3701 if numeric_limit is None:
3702 parser.error(u'invalid rate limit specified')
3703 opts.ratelimit = numeric_limit
3704 if opts.retries is not None:
8cc44341 3705 try:
5adcaa43 3706 opts.retries = long(opts.retries)
8cc44341 3707 except (TypeError, ValueError), err:
5adcaa43
GV
3708 parser.error(u'invalid retry count specified')
3709 try:
2c8d32de 3710 opts.playliststart = int(opts.playliststart)
5adcaa43 3711 if opts.playliststart <= 0:
2c8d32de 3712 raise ValueError(u'Playlist start must be positive')
5adcaa43
GV
3713 except (TypeError, ValueError), err:
3714 parser.error(u'invalid playlist start number specified')
3715 try:
2c8d32de 3716 opts.playlistend = int(opts.playlistend)
5adcaa43 3717 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2c8d32de 3718 raise ValueError(u'Playlist end must be greater than playlist start')
5adcaa43
GV
3719 except (TypeError, ValueError), err:
3720 parser.error(u'invalid playlist end number specified')
3721 if opts.extractaudio:
3722 if opts.audioformat not in ['best', 'aac', 'mp3']:
3723 parser.error(u'invalid audio format specified')
3724
5adcaa43
GV
3725 # File downloader
3726 fd = FileDownloader({
3727 'usenetrc': opts.usenetrc,
3728 'username': opts.username,
3729 'password': opts.password,
da0db53a 3730 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
5adcaa43
GV
3731 'forceurl': opts.geturl,
3732 'forcetitle': opts.gettitle,
3733 'forcethumbnail': opts.getthumbnail,
3734 'forcedescription': opts.getdescription,
3735 'forcefilename': opts.getfilename,
da0db53a 3736 'forceformat': opts.getformat,
9b4556c4 3737 'simulate': opts.simulate,
da0db53a 3738 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
5adcaa43
GV
3739 'format': opts.format,
3740 'format_limit': opts.format_limit,
3741 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3742 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3743 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3744 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3745 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3746 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3747 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3748 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3749 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3750 or u'%(id)s.%(ext)s'),
3751 'ignoreerrors': opts.ignoreerrors,
3752 'ratelimit': opts.ratelimit,
3753 'nooverwrites': opts.nooverwrites,
3754 'retries': opts.retries,
3755 'continuedl': opts.continue_dl,
3756 'noprogress': opts.noprogress,
3757 'playliststart': opts.playliststart,
3758 'playlistend': opts.playlistend,
3759 'logtostderr': opts.outtmpl == '-',
3760 'consoletitle': opts.consoletitle,
3761 'nopart': opts.nopart,
3762 'updatetime': opts.updatetime,
2c8d32de
PH
3763 'writedescription': opts.writedescription,
3764 'writeinfojson': opts.writeinfojson,
20e91e83
ABP
3765 'matchtitle': opts.matchtitle,
3766 'rejecttitle': opts.rejecttitle,
5adcaa43 3767 })
8c5dc3ad
PH
3768 for extractor in extractors:
3769 fd.add_info_extractor(extractor)
5adcaa43
GV
3770
3771 # PostProcessors
3772 if opts.extractaudio:
c99dcbd2 3773 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
5adcaa43
GV
3774
3775 # Update version
3776 if opts.update_self:
3777 updateSelf(fd, sys.argv[0])
3778
3779 # Maybe do nothing
3780 if len(all_urls) < 1:
3781 if not opts.update_self:
3782 parser.error(u'you must provide at least one URL')
3783 else:
3784 sys.exit()
3785 retcode = fd.download(all_urls)
80066952 3786
5adcaa43
GV
3787 # Dump cookie jar if requested
3788 if opts.cookiefile is not None:
3789 try:
3790 jar.save()
3791 except (IOError, OSError), err:
3792 sys.exit(u'ERROR: unable to save cookie jar')
80066952 3793
5adcaa43 3794 sys.exit(retcode)
80066952 3795
4fa74b52 3796
5adcaa43
GV
3797if __name__ == '__main__':
3798 try:
3799 main()
e5bf0f55
RG
3800 except DownloadError:
3801 sys.exit(1)
3802 except SameFileError:
76a7f364 3803 sys.exit(u'ERROR: fixed output name but more than one file to download')
4fa74b52 3804 except KeyboardInterrupt:
76a7f364 3805 sys.exit(u'\nERROR: Interrupted by user')
e9cb9c28
GV
3806
3807# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: