]> jfr.im git - yt-dlp.git/blame - youtube-dl
Simplify test
[yt-dlp.git] / youtube-dl
CommitLineData
4fa74b52
RG
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
2770590d
GV
3
4__author__ = (
2c8d32de
PH
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
6ae796b1 12 'Rogério Brito',
eb11aacc 13 'Philipp Hagemeister',
6fc5b0bb 14 'Sören Schulze',
2770590d
GV
15 )
16
2c8d32de 17__license__ = 'Public Domain'
daa982bc 18__version__ = '2011.09.18c'
2770590d 19
8236e851 20UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
d207e7cf 21
80066952 22import cookielib
a1f03c7b 23import datetime
1987c232 24import gzip
4fa74b52 25import htmlentitydefs
f9c68787 26import HTMLParser
4fa74b52 27import httplib
2546e767 28import locale
4fa74b52
RG
29import math
30import netrc
31import os
32import os.path
33import re
34import socket
35import string
0487b407 36import subprocess
4fa74b52
RG
37import sys
38import time
39import urllib
40import urllib2
c6b55a8d 41import warnings
1987c232 42import zlib
a04e80a4 43
0a3c8b62
PH
44if os.name == 'nt':
45 import ctypes
46
47try:
48 import email.utils
49except ImportError: # Python 2.4
50 import email.Utils
c6b55a8d
PH
51try:
52 import cStringIO as StringIO
53except ImportError:
54 import StringIO
55
a04e80a4
RG
56# parse_qs was moved from the cgi module to the urlparse module recently.
57try:
58 from urlparse import parse_qs
59except ImportError:
60 from cgi import parse_qs
4fa74b52 61
c6b55a8d
PH
62try:
63 import lxml.etree
2b70537d 64except ImportError:
c6b55a8d
PH
65 pass # Handled below
66
c8e30044
PH
67try:
68 import xml.etree.ElementTree
afb5b55d
PH
69except ImportError: # Python<2.5: Not officially supported, but let it slip
70 warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
c8e30044 71
f995f712 72std_headers = {
c44b9ee9 73 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
4fa74b52 74 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
96942e62 75 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
a57ed21f 76 'Accept-Encoding': 'gzip, deflate',
4fa74b52
RG
77 'Accept-Language': 'en-us,en;q=0.5',
78}
79
80simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
81
437d76c1
PH
82try:
83 import json
91e6a385 84except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
437d76c1
PH
85 import re
86 class json(object):
87 @staticmethod
88 def loads(s):
89 s = s.decode('UTF-8')
90 def raiseError(msg, i):
91 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
92 def skipSpace(i, expectMore=True):
93 while i < len(s) and s[i] in ' \t\r\n':
94 i += 1
95 if expectMore:
96 if i >= len(s):
97 raiseError('Premature end', i)
98 return i
99 def decodeEscape(match):
100 esc = match.group(1)
101 _STATIC = {
102 '"': '"',
103 '\\': '\\',
104 '/': '/',
105 'b': unichr(0x8),
106 'f': unichr(0xc),
107 'n': '\n',
108 'r': '\r',
109 't': '\t',
110 }
111 if esc in _STATIC:
112 return _STATIC[esc]
113 if esc[0] == 'u':
114 if len(esc) == 1+4:
115 return unichr(int(esc[1:5], 16))
116 if len(esc) == 5+6 and esc[5:7] == '\\u':
117 hi = int(esc[1:5], 16)
118 low = int(esc[7:11], 16)
119 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
120 raise ValueError('Unknown escape ' + str(esc))
121 def parseString(i):
122 i += 1
123 e = i
124 while True:
125 e = s.index('"', e)
126 bslashes = 0
127 while s[e-bslashes-1] == '\\':
128 bslashes += 1
129 if bslashes % 2 == 1:
130 e += 1
131 continue
132 break
133 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
134 stri = rexp.sub(decodeEscape, s[i:e])
135 return (e+1,stri)
136 def parseObj(i):
137 i += 1
138 res = {}
139 i = skipSpace(i)
140 if s[i] == '}': # Empty dictionary
141 return (i+1,res)
142 while True:
143 if s[i] != '"':
144 raiseError('Expected a string object key', i)
145 i,key = parseString(i)
146 i = skipSpace(i)
147 if i >= len(s) or s[i] != ':':
148 raiseError('Expected a colon', i)
149 i,val = parse(i+1)
150 res[key] = val
151 i = skipSpace(i)
152 if s[i] == '}':
153 return (i+1, res)
154 if s[i] != ',':
155 raiseError('Expected comma or closing curly brace', i)
156 i = skipSpace(i+1)
157 def parseArray(i):
158 res = []
159 i = skipSpace(i+1)
160 if s[i] == ']': # Empty array
161 return (i+1,res)
162 while True:
163 i,val = parse(i)
164 res.append(val)
165 i = skipSpace(i) # Raise exception if premature end
166 if s[i] == ']':
167 return (i+1, res)
168 if s[i] != ',':
169 raiseError('Expected a comma or closing bracket', i)
170 i = skipSpace(i+1)
171 def parseDiscrete(i):
172 for k,v in {'true': True, 'false': False, 'null': None}.items():
173 if s.startswith(k, i):
174 return (i+len(k), v)
175 raiseError('Not a boolean (or null)', i)
176 def parseNumber(i):
177 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
178 if mobj is None:
179 raiseError('Not a number', i)
180 nums = mobj.group(1)
181 if '.' in nums or 'e' in nums or 'E' in nums:
182 return (i+len(nums), float(nums))
183 return (i+len(nums), int(nums))
184 CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
185 def parse(i):
186 i = skipSpace(i)
187 i,res = CHARMAP.get(s[i], parseNumber)(i)
188 i = skipSpace(i, False)
189 return (i,res)
190 i,res = parse(0)
191 if i < len(s):
192 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
193 return res
194
eae2666c
RG
195def preferredencoding():
196 """Get preferred encoding.
197
198 Returns the best encoding scheme for the system, based on
199 locale.getpreferredencoding() and some further tweaks.
200 """
f94b636c
RG
201 def yield_preferredencoding():
202 try:
203 pref = locale.getpreferredencoding()
204 u'TEST'.encode(pref)
205 except:
206 pref = 'UTF-8'
207 while True:
208 yield pref
209 return yield_preferredencoding().next()
eae2666c 210
c0a10ca8 211
490fd7ae
RG
212def htmlentity_transform(matchobj):
213 """Transforms an HTML entity to a Unicode character.
d3975459 214
490fd7ae
RG
215 This function receives a match object and is intended to be used with
216 the re.sub() function.
217 """
218 entity = matchobj.group(1)
219
220 # Known non-numeric HTML entity
221 if entity in htmlentitydefs.name2codepoint:
222 return unichr(htmlentitydefs.name2codepoint[entity])
223
224 # Unicode character
225 mobj = re.match(ur'(?u)#(x?\d+)', entity)
226 if mobj is not None:
227 numstr = mobj.group(1)
228 if numstr.startswith(u'x'):
229 base = 16
230 numstr = u'0%s' % numstr
231 else:
232 base = 10
233 return unichr(long(numstr, base))
234
235 # Unknown entity in name, return its literal representation
236 return (u'&%s;' % entity)
237
c0a10ca8 238
490fd7ae 239def sanitize_title(utitle):
31bcb480 240 """Sanitizes a video title so it could be used as part of a filename."""
490fd7ae 241 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
490fd7ae
RG
242 return utitle.replace(unicode(os.sep), u'%')
243
c0a10ca8 244
31bcb480
RG
245def sanitize_open(filename, open_mode):
246 """Try to open the given filename, and slightly tweak it if this fails.
247
248 Attempts to open the given filename. If this fails, it tries to change
249 the filename slightly, step by step, until it's either able to open it
250 or it fails and raises a final exception, like the standard open()
251 function.
252
253 It returns the tuple (stream, definitive_file_name).
254 """
255 try:
131bc765 256 if filename == u'-':
e08878f4
RG
257 if sys.platform == 'win32':
258 import msvcrt
259 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
131bc765 260 return (sys.stdout, filename)
31bcb480
RG
261 stream = open(filename, open_mode)
262 return (stream, filename)
263 except (IOError, OSError), err:
264 # In case of error, try to remove win32 forbidden chars
ca6a11fa 265 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
31bcb480
RG
266
267 # An exception here should be caught in the caller
268 stream = open(filename, open_mode)
269 return (stream, filename)
270
c0a10ca8 271
09bd408c 272def timeconvert(timestr):
c0a10ca8
F
273 """Convert RFC 2822 defined time string into system timestamp"""
274 timestamp = None
275 timetuple = email.utils.parsedate_tz(timestr)
276 if timetuple is not None:
277 timestamp = email.utils.mktime_tz(timetuple)
278 return timestamp
279
09bd408c 280
e5bf0f55
RG
281class DownloadError(Exception):
282 """Download Error exception.
d3975459 283
e5bf0f55
RG
284 This exception may be thrown by FileDownloader objects if they are not
285 configured to continue on errors. They will contain the appropriate
286 error message.
287 """
288 pass
289
c0a10ca8 290
e5bf0f55
RG
291class SameFileError(Exception):
292 """Same File exception.
293
294 This exception will be thrown by FileDownloader objects if they detect
295 multiple files would have to be downloaded to the same file on disk.
296 """
297 pass
298
c0a10ca8 299
65cd34c5
RG
300class PostProcessingError(Exception):
301 """Post Processing exception.
302
303 This exception may be raised by PostProcessor's .run() method to
304 indicate an error in the postprocessing task.
305 """
306 pass
307
c0a10ca8 308
73f4e7af 309class UnavailableVideoError(Exception):
7b7759f5 310 """Unavailable Format exception.
311
312 This exception will be thrown when a video is requested
313 in a format that is not available for that video.
314 """
d69a1c91
RG
315 pass
316
c0a10ca8 317
d69a1c91
RG
318class ContentTooShortError(Exception):
319 """Content Too Short exception.
320
321 This exception may be raised by FileDownloader objects when a file they
322 download is too small for what the server announced first, indicating
323 the connection was probably interrupted.
324 """
325 # Both in bytes
326 downloaded = None
327 expected = None
328
329 def __init__(self, downloaded, expected):
330 self.downloaded = downloaded
331 self.expected = expected
7b7759f5 332
c0a10ca8 333
1987c232
RG
334class YoutubeDLHandler(urllib2.HTTPHandler):
335 """Handler for HTTP requests and responses.
336
337 This class, when installed with an OpenerDirector, automatically adds
338 the standard headers to every HTTP request and handles gzipped and
339 deflated responses from web servers. If compression is to be avoided in
340 a particular request, the original request in the program code only has
341 to include the HTTP header "Youtubedl-No-Compression", which will be
342 removed before making the real request.
c0a10ca8 343
1987c232
RG
344 Part of this code was copied from:
345
c0a10ca8
F
346 http://techknack.net/python-urllib2-handlers/
347
1987c232
RG
348 Andrew Rowls, the author of that code, agreed to release it to the
349 public domain.
350 """
351
352 @staticmethod
353 def deflate(data):
354 try:
355 return zlib.decompress(data, -zlib.MAX_WBITS)
356 except zlib.error:
357 return zlib.decompress(data)
c0a10ca8 358
7b531c0b
RG
359 @staticmethod
360 def addinfourl_wrapper(stream, headers, url, code):
361 if hasattr(urllib2.addinfourl, 'getcode'):
362 return urllib2.addinfourl(stream, headers, url, code)
0f6b00b5
RG
363 ret = urllib2.addinfourl(stream, headers, url)
364 ret.code = code
365 return ret
c0a10ca8 366
1987c232
RG
367 def http_request(self, req):
368 for h in std_headers:
369 if h in req.headers:
370 del req.headers[h]
371 req.add_header(h, std_headers[h])
372 if 'Youtubedl-no-compression' in req.headers:
373 if 'Accept-encoding' in req.headers:
374 del req.headers['Accept-encoding']
375 del req.headers['Youtubedl-no-compression']
376 return req
377
378 def http_response(self, req, resp):
379 old_resp = resp
380 # gzip
381 if resp.headers.get('Content-encoding', '') == 'gzip':
382 gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
7b531c0b 383 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
384 resp.msg = old_resp.msg
385 # deflate
386 if resp.headers.get('Content-encoding', '') == 'deflate':
387 gz = StringIO.StringIO(self.deflate(resp.read()))
7b531c0b 388 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
389 resp.msg = old_resp.msg
390 return resp
391
c0a10ca8 392
4fa74b52
RG
393class FileDownloader(object):
394 """File Downloader class.
395
396 File downloader objects are the ones responsible of downloading the
397 actual video file and writing it to disk if the user has requested
398 it, among some other tasks. In most cases there should be one per
399 program. As, given a video URL, the downloader doesn't know how to
400 extract all the needed information, task that InfoExtractors do, it
401 has to pass the URL to one of them.
402
403 For this, file downloader objects have a method that allows
404 InfoExtractors to be registered in a given order. When it is passed
405 a URL, the file downloader handles it to the first InfoExtractor it
2851b2ca
RG
406 finds that reports being able to handle it. The InfoExtractor extracts
407 all the information about the video or videos the URL refers to, and
408 asks the FileDownloader to process the video information, possibly
409 downloading the video.
4fa74b52
RG
410
411 File downloaders accept a lot of parameters. In order not to saturate
412 the object constructor with arguments, it receives a dictionary of
d0a9affb
RG
413 options instead. These options are available through the params
414 attribute for the InfoExtractors to use. The FileDownloader also
415 registers itself as the downloader in charge for the InfoExtractors
416 that are added to it, so this is a "mutual registration".
4fa74b52
RG
417
418 Available options:
419
80066952
RG
420 username: Username for authentication purposes.
421 password: Password for authentication purposes.
422 usenetrc: Use netrc for authentication instead.
423 quiet: Do not print messages to stdout.
424 forceurl: Force printing final URL.
425 forcetitle: Force printing title.
426 forcethumbnail: Force printing thumbnail URL.
427 forcedescription: Force printing description.
9f796346 428 forcefilename: Force printing final filename.
80066952
RG
429 simulate: Do not download the video files.
430 format: Video format code.
431 format_limit: Highest quality format to try.
432 outtmpl: Template for output names.
433 ignoreerrors: Do not stop on download errors.
434 ratelimit: Download speed limit, in bytes/sec.
435 nooverwrites: Prevent overwriting files.
436 retries: Number of times to retry for HTTP error 5xx
437 continuedl: Try to continue downloads if possible.
438 noprogress: Do not print the progress bar.
439 playliststart: Playlist item to start at.
8cc44341 440 playlistend: Playlist item to end at.
20e91e83
ABP
441 matchtitle: Download only matching titles.
442 rejecttitle: Reject downloads for matching titles.
331ce0a0 443 logtostderr: Log messages to stderr instead of stdout.
ccbd296b 444 consoletitle: Display progress in console window's titlebar.
3fb2c487 445 nopart: Do not use temporary .part files.
e3018902 446 updatetime: Use the Last-modified header to set output file timestamps.
8b95c387 447 writedescription: Write the video description to a .description file
6eb08fbf 448 writeinfojson: Write the video description to a .info.json file
4fa74b52
RG
449 """
450
d0a9affb 451 params = None
4fa74b52 452 _ies = []
65cd34c5 453 _pps = []
9bf386d7 454 _download_retcode = None
7d8d0612 455 _num_downloads = None
331ce0a0 456 _screen_file = None
4fa74b52
RG
457
458 def __init__(self, params):
1c5e2302 459 """Create a FileDownloader object with the given options."""
4fa74b52 460 self._ies = []
65cd34c5 461 self._pps = []
9bf386d7 462 self._download_retcode = 0
7d8d0612 463 self._num_downloads = 0
331ce0a0 464 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
d0a9affb 465 self.params = params
d3975459 466
4fa74b52
RG
467 @staticmethod
468 def format_bytes(bytes):
469 if bytes is None:
470 return 'N/A'
8497c36d
RG
471 if type(bytes) is str:
472 bytes = float(bytes)
473 if bytes == 0.0:
4fa74b52
RG
474 exponent = 0
475 else:
8497c36d 476 exponent = long(math.log(bytes, 1024.0))
4fa74b52 477 suffix = 'bkMGTPEZY'[exponent]
c0a10ca8 478 converted = float(bytes) / float(1024 ** exponent)
4fa74b52
RG
479 return '%.2f%s' % (converted, suffix)
480
481 @staticmethod
482 def calc_percent(byte_counter, data_len):
483 if data_len is None:
484 return '---.-%'
485 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
486
487 @staticmethod
488 def calc_eta(start, now, total, current):
489 if total is None:
490 return '--:--'
491 dif = now - start
492 if current == 0 or dif < 0.001: # One millisecond
493 return '--:--'
494 rate = float(current) / dif
495 eta = long((float(total) - float(current)) / rate)
496 (eta_mins, eta_secs) = divmod(eta, 60)
497 if eta_mins > 99:
498 return '--:--'
499 return '%02d:%02d' % (eta_mins, eta_secs)
500
5121ef20 501 @staticmethod
4fa74b52
RG
502 def calc_speed(start, now, bytes):
503 dif = now - start
504 if bytes == 0 or dif < 0.001: # One millisecond
9fcd8355 505 return '%10s' % '---b/s'
4fa74b52
RG
506 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
507
508 @staticmethod
509 def best_block_size(elapsed_time, bytes):
510 new_min = max(bytes / 2.0, 1.0)
511 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
512 if elapsed_time < 0.001:
e1f18b8a 513 return long(new_max)
4fa74b52
RG
514 rate = bytes / elapsed_time
515 if rate > new_max:
e1f18b8a 516 return long(new_max)
4fa74b52 517 if rate < new_min:
e1f18b8a
RG
518 return long(new_min)
519 return long(rate)
4fa74b52 520
acd3d842
RG
521 @staticmethod
522 def parse_bytes(bytestr):
523 """Parse a string indicating a byte quantity into a long integer."""
524 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
525 if matchobj is None:
526 return None
527 number = float(matchobj.group(1))
528 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
529 return long(round(number * multiplier))
530
4fa74b52
RG
531 def add_info_extractor(self, ie):
532 """Add an InfoExtractor object to the end of the list."""
533 self._ies.append(ie)
534 ie.set_downloader(self)
d3975459 535
65cd34c5
RG
536 def add_post_processor(self, pp):
537 """Add a PostProcessor object to the end of the chain."""
538 self._pps.append(pp)
539 pp.set_downloader(self)
d3975459 540
331ce0a0 541 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
9fcd8355 542 """Print message to stdout if not in quiet mode."""
43ab0ca4
RG
543 try:
544 if not self.params.get('quiet', False):
331ce0a0
RG
545 terminator = [u'\n', u''][skip_eol]
546 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
547 self._screen_file.flush()
43ab0ca4
RG
548 except (UnicodeEncodeError), err:
549 if not ignore_encoding_errors:
550 raise
d3975459 551
7e5cab67
RG
552 def to_stderr(self, message):
553 """Print message to stderr."""
eae2666c 554 print >>sys.stderr, message.encode(preferredencoding())
d3975459 555
ccbd296b
MM
556 def to_cons_title(self, message):
557 """Set console/terminal window title to message."""
558 if not self.params.get('consoletitle', False):
559 return
560 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
561 # c_wchar_p() might not be necessary if `message` is
562 # already of type unicode()
563 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
564 elif 'TERM' in os.environ:
565 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
566
22899cea
RG
567 def fixed_template(self):
568 """Checks if the output template is fixed."""
d0a9affb 569 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
9fcd8355 570
0086d1ec
RG
571 def trouble(self, message=None):
572 """Determine action to take when a download problem appears.
573
574 Depending on if the downloader has been configured to ignore
e5bf0f55 575 download errors or not, this method may throw an exception or
9bf386d7 576 not when errors are found, after printing the message.
0086d1ec
RG
577 """
578 if message is not None:
579 self.to_stderr(message)
d0a9affb 580 if not self.params.get('ignoreerrors', False):
e5bf0f55 581 raise DownloadError(message)
9bf386d7 582 self._download_retcode = 1
0086d1ec 583
acd3d842
RG
584 def slow_down(self, start_time, byte_counter):
585 """Sleep if the download speed is over the rate limit."""
d0a9affb 586 rate_limit = self.params.get('ratelimit', None)
acd3d842
RG
587 if rate_limit is None or byte_counter == 0:
588 return
589 now = time.time()
590 elapsed = now - start_time
591 if elapsed <= 0.0:
592 return
593 speed = float(byte_counter) / elapsed
594 if speed > rate_limit:
595 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
3fb2c487
RG
596
597 def temp_name(self, filename):
598 """Returns a temporary filename for the given filename."""
599 if self.params.get('nopart', False) or filename == u'-' or \
600 (os.path.exists(filename) and not os.path.isfile(filename)):
601 return filename
602 return filename + u'.part'
603
8cc42e7c
RG
604 def undo_temp_name(self, filename):
605 if filename.endswith(u'.part'):
606 return filename[:-len(u'.part')]
607 return filename
608
62cf7aaf
RG
609 def try_rename(self, old_filename, new_filename):
610 try:
7d950ca1
RG
611 if old_filename == new_filename:
612 return
62cf7aaf
RG
613 os.rename(old_filename, new_filename)
614 except (IOError, OSError), err:
615 self.trouble(u'ERROR: unable to rename file')
c0a10ca8 616
e3018902
RG
617 def try_utime(self, filename, last_modified_hdr):
618 """Try to set the last-modified time of the given file."""
619 if last_modified_hdr is None:
620 return
621 if not os.path.isfile(filename):
622 return
623 timestr = last_modified_hdr
624 if timestr is None:
625 return
626 filetime = timeconvert(timestr)
627 if filetime is None:
36597dc4 628 return filetime
e3018902 629 try:
c0a10ca8 630 os.utime(filename, (time.time(), filetime))
e3018902
RG
631 except:
632 pass
36597dc4 633 return filetime
acd3d842 634
8b95c387 635 def report_writedescription(self, descfn):
6eb08fbf
PH
636 """ Report that the description file is being written """
637 self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
638
639 def report_writeinfojson(self, infofn):
640 """ Report that the metadata file has been written """
641 self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
8b95c387 642
bafa5cd9
RG
643 def report_destination(self, filename):
644 """Report destination filename."""
331ce0a0 645 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
d3975459 646
bafa5cd9
RG
647 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
648 """Report download progress."""
d9835247
RG
649 if self.params.get('noprogress', False):
650 return
331ce0a0 651 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
bafa5cd9 652 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
ccbd296b
MM
653 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
654 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
7db85b2c
RG
655
656 def report_resuming_byte(self, resume_len):
8a9f53be 657 """Report attempt to resume at given byte."""
331ce0a0 658 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
d3975459 659
7031008c 660 def report_retry(self, count, retries):
e86e9474 661 """Report retry in case of HTTP error 5xx"""
331ce0a0 662 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
d3975459 663
7db85b2c
RG
664 def report_file_already_downloaded(self, file_name):
665 """Report file has already been fully downloaded."""
43ab0ca4 666 try:
331ce0a0 667 self.to_screen(u'[download] %s has already been downloaded' % file_name)
43ab0ca4 668 except (UnicodeEncodeError), err:
331ce0a0 669 self.to_screen(u'[download] The file has already been downloaded')
d3975459 670
7db85b2c
RG
671 def report_unable_to_resume(self):
672 """Report it was impossible to resume download."""
331ce0a0 673 self.to_screen(u'[download] Unable to resume')
d3975459 674
bafa5cd9
RG
675 def report_finish(self):
676 """Report download finished."""
d9835247 677 if self.params.get('noprogress', False):
331ce0a0 678 self.to_screen(u'[download] Download completed')
d9835247 679 else:
331ce0a0 680 self.to_screen(u'')
d3975459 681
df372a65
RG
682 def increment_downloads(self):
683 """Increment the ordinal that assigns a number to each file."""
684 self._num_downloads += 1
bafa5cd9 685
9f796346
GI
686 def prepare_filename(self, info_dict):
687 """Generate the output filename."""
688 try:
689 template_dict = dict(info_dict)
690 template_dict['epoch'] = unicode(long(time.time()))
691 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
692 filename = self.params['outtmpl'] % template_dict
693 return filename
694 except (ValueError, KeyError), err:
695 self.trouble(u'ERROR: invalid system charset or erroneous output template')
696 return None
697
c8619e01
RG
698 def process_info(self, info_dict):
699 """Process a single dictionary returned by an InfoExtractor."""
9f796346 700 filename = self.prepare_filename(info_dict)
9b4556c4
PH
701
702 # Forced printings
703 if self.params.get('forcetitle', False):
704 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
705 if self.params.get('forceurl', False):
706 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
707 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
708 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
709 if self.params.get('forcedescription', False) and 'description' in info_dict:
710 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
711 if self.params.get('forcefilename', False) and filename is not None:
712 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
da0db53a
DH
713 if self.params.get('forceformat', False):
714 print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
9b4556c4 715
c8619e01
RG
716 # Do nothing else if in simulate mode
717 if self.params.get('simulate', False):
9bf386d7 718 return
d3975459 719
9f796346 720 if filename is None:
38ed1344 721 return
20e91e83
ABP
722
723 matchtitle=self.params.get('matchtitle',False)
724 rejecttitle=self.params.get('rejecttitle',False)
725 title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
726 if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
727 self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
728 return
729 if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
730 self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
731 return
732
850ab765 733 if self.params.get('nooverwrites', False) and os.path.exists(filename):
5c44af18 734 self.to_stderr(u'WARNING: file exists and will be skipped')
9bf386d7 735 return
7b7759f5 736
c8619e01 737 try:
e5e74ffb
PH
738 dn = os.path.dirname(filename)
739 if dn != '' and not os.path.exists(dn):
740 os.makedirs(dn)
c8619e01 741 except (OSError, IOError), err:
cec3a53c 742 self.trouble(u'ERROR: unable to create directory ' + unicode(err))
9bf386d7 743 return
7b7759f5 744
8b95c387
PH
745 if self.params.get('writedescription', False):
746 try:
747 descfn = filename + '.description'
6eb08fbf 748 self.report_writedescription(descfn)
1293ce58
PH
749 descfile = open(descfn, 'wb')
750 try:
8b95c387 751 descfile.write(info_dict['description'].encode('utf-8'))
1293ce58
PH
752 finally:
753 descfile.close()
8b95c387 754 except (OSError, IOError):
cec3a53c 755 self.trouble(u'ERROR: Cannot write description file ' + descfn)
8b95c387
PH
756 return
757
6eb08fbf
PH
758 if self.params.get('writeinfojson', False):
759 infofn = filename + '.info.json'
760 self.report_writeinfojson(infofn)
761 try:
762 json.dump
763 except (NameError,AttributeError):
764 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
765 return
766 try:
1293ce58
PH
767 infof = open(infofn, 'wb')
768 try:
6eb08fbf 769 json.dump(info_dict, infof)
1293ce58
PH
770 finally:
771 infof.close()
6eb08fbf 772 except (OSError, IOError):
cec3a53c 773 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
6eb08fbf
PH
774 return
775
9b4556c4 776 if not self.params.get('skip_download', False):
55e7c75e 777 try:
366cbfb0 778 success = self._do_download(filename, info_dict)
9b4556c4
PH
779 except (OSError, IOError), err:
780 raise UnavailableVideoError
781 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
782 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
783 return
784 except (ContentTooShortError, ), err:
785 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
55e7c75e 786 return
9b4556c4
PH
787
788 if success:
789 try:
790 self.post_process(filename, info_dict)
791 except (PostProcessingError), err:
792 self.trouble(u'ERROR: postprocessing: %s' % str(err))
793 return
c8619e01 794
4fa74b52
RG
795 def download(self, url_list):
796 """Download a given list of URLs."""
22899cea 797 if len(url_list) > 1 and self.fixed_template():
d0a9affb 798 raise SameFileError(self.params['outtmpl'])
22899cea 799
4fa74b52
RG
800 for url in url_list:
801 suitable_found = False
802 for ie in self._ies:
c8619e01 803 # Go to next InfoExtractor if not suitable
4fa74b52
RG
804 if not ie.suitable(url):
805 continue
c8619e01 806
4fa74b52
RG
807 # Suitable InfoExtractor found
808 suitable_found = True
c8619e01 809
6f21f686
RG
810 # Extract information from URL and process it
811 ie.extract(url)
65cd34c5 812
c8619e01 813 # Suitable InfoExtractor had been found; go to next URL
4fa74b52 814 break
c8619e01 815
4fa74b52 816 if not suitable_found:
db7e31b8 817 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
bb681b88 818
9bf386d7 819 return self._download_retcode
65cd34c5
RG
820
821 def post_process(self, filename, ie_info):
822 """Run the postprocessing chain on the given file."""
823 info = dict(ie_info)
824 info['filepath'] = filename
825 for pp in self._pps:
826 info = pp.run(info)
827 if info is None:
828 break
d3975459 829
e616ec0c 830 def _download_with_rtmpdump(self, filename, url, player_url):
0487b407 831 self.report_destination(filename)
62cf7aaf 832 tmpfilename = self.temp_name(filename)
0487b407
RG
833
834 # Check for rtmpdump first
835 try:
836 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
837 except (OSError, IOError):
838 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
839 return False
840
841 # Download using rtmpdump. rtmpdump returns exit code 2 when
842 # the connection was interrumpted and resuming appears to be
843 # possible. This is part of rtmpdump's normal usage, AFAIK.
b487ef08 844 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
1c1821f8
RG
845 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
846 while retval == 2 or retval == 1:
62cf7aaf 847 prevsize = os.path.getsize(tmpfilename)
331ce0a0 848 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
e616ec0c 849 time.sleep(5.0) # This seems to be needed
1c1821f8 850 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
62cf7aaf 851 cursize = os.path.getsize(tmpfilename)
e616ec0c
RG
852 if prevsize == cursize and retval == 1:
853 break
b487ef08
PH
854 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
855 if prevsize == cursize and retval == 2 and cursize > 1024:
856 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
857 retval = 0
858 break
0487b407 859 if retval == 0:
62cf7aaf
RG
860 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
861 self.try_rename(tmpfilename, filename)
0487b407
RG
862 return True
863 else:
db7e31b8 864 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
0487b407
RG
865 return False
866
366cbfb0
PH
867 def _do_download(self, filename, info_dict):
868 url = info_dict['url']
869 player_url = info_dict.get('player_url', None)
870
62cf7aaf 871 # Check file already present
3fb2c487 872 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
62cf7aaf
RG
873 self.report_file_already_downloaded(filename)
874 return True
875
0487b407
RG
876 # Attempt to download using rtmpdump
877 if url.startswith('rtmp'):
e616ec0c 878 return self._download_with_rtmpdump(filename, url, player_url)
0487b407 879
62cf7aaf 880 tmpfilename = self.temp_name(filename)
55e7c75e 881 stream = None
1987c232
RG
882
883 # Do not include the Accept-Encoding header
884 headers = {'Youtubedl-no-compression': 'True'}
885 basic_request = urllib2.Request(url, None, headers)
886 request = urllib2.Request(url, None, headers)
7db85b2c 887
9c457d2a 888 # Establish possible resume length
62cf7aaf
RG
889 if os.path.isfile(tmpfilename):
890 resume_len = os.path.getsize(tmpfilename)
55e7c75e
RG
891 else:
892 resume_len = 0
9c457d2a 893
10e7194d
MH
894 open_mode = 'wb'
895 if resume_len != 0:
896 if self.params.get('continuedl', False):
897 self.report_resuming_byte(resume_len)
898 request.add_header('Range','bytes=%d-' % resume_len)
899 open_mode = 'ab'
900 else:
901 resume_len = 0
55e7c75e 902
7031008c
RG
903 count = 0
904 retries = self.params.get('retries', 0)
101e0d1e 905 while count <= retries:
7031008c
RG
906 # Establish connection
907 try:
908 data = urllib2.urlopen(request)
909 break
910 except (urllib2.HTTPError, ), err:
ac249f42 911 if (err.code < 500 or err.code >= 600) and err.code != 416:
101e0d1e 912 # Unexpected HTTP error
7031008c 913 raise
101e0d1e
RG
914 elif err.code == 416:
915 # Unable to resume (requested range not satisfiable)
916 try:
917 # Open the connection again without the range header
918 data = urllib2.urlopen(basic_request)
919 content_length = data.info()['Content-Length']
920 except (urllib2.HTTPError, ), err:
ac249f42 921 if err.code < 500 or err.code >= 600:
101e0d1e
RG
922 raise
923 else:
924 # Examine the reported length
268fb2bd 925 if (content_length is not None and
c0a10ca8 926 (resume_len - 100 < long(content_length) < resume_len + 100)):
268fb2bd
RG
927 # The file had already been fully downloaded.
928 # Explanation to the above condition: in issue #175 it was revealed that
929 # YouTube sometimes adds or removes a few bytes from the end of the file,
930 # changing the file size slightly and causing problems for some users. So
931 # I decided to implement a suggested change and consider the file
932 # completely downloaded if the file size differs less than 100 bytes from
933 # the one in the hard drive.
101e0d1e 934 self.report_file_already_downloaded(filename)
62cf7aaf 935 self.try_rename(tmpfilename, filename)
101e0d1e
RG
936 return True
937 else:
938 # The length does not match, we start the download over
939 self.report_unable_to_resume()
940 open_mode = 'wb'
941 break
942 # Retry
943 count += 1
944 if count <= retries:
945 self.report_retry(count, retries)
946
947 if count > retries:
948 self.trouble(u'ERROR: giving up after %s retries' % retries)
949 return False
7db85b2c 950
4fa74b52 951 data_len = data.info().get('Content-length', None)
106d091e
RG
952 if data_len is not None:
953 data_len = long(data_len) + resume_len
4fa74b52 954 data_len_str = self.format_bytes(data_len)
106d091e 955 byte_counter = 0 + resume_len
4fa74b52
RG
956 block_size = 1024
957 start = time.time()
958 while True:
bafa5cd9 959 # Download and write
4fa74b52
RG
960 before = time.time()
961 data_block = data.read(block_size)
962 after = time.time()
975a91d0 963 if len(data_block) == 0:
4fa74b52 964 break
975a91d0 965 byte_counter += len(data_block)
55e7c75e
RG
966
967 # Open file just in time
968 if stream is None:
969 try:
62cf7aaf 970 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
dbddab27 971 assert stream is not None
8cc42e7c 972 filename = self.undo_temp_name(tmpfilename)
55e7c75e
RG
973 self.report_destination(filename)
974 except (OSError, IOError), err:
db7e31b8 975 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
55e7c75e 976 return False
131efd1a
RG
977 try:
978 stream.write(data_block)
979 except (IOError, OSError), err:
d67e0974
RG
980 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
981 return False
975a91d0 982 block_size = self.best_block_size(after - before, len(data_block))
4fa74b52 983
55e7c75e 984 # Progress message
975a91d0 985 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
67035ede
PH
986 if data_len is None:
987 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
988 else:
989 percent_str = self.calc_percent(byte_counter, data_len)
990 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
991 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
55e7c75e 992
acd3d842 993 # Apply rate limit
975a91d0 994 self.slow_down(start, byte_counter - resume_len)
acd3d842 995
dbddab27
PH
996 if stream is None:
997 self.trouble(u'\nERROR: Did not get any data blocks')
998 return False
6f0ff3ba 999 stream.close()
bafa5cd9 1000 self.report_finish()
b905e5f5 1001 if data_len is not None and byte_counter != data_len:
d69a1c91 1002 raise ContentTooShortError(byte_counter, long(data_len))
62cf7aaf 1003 self.try_rename(tmpfilename, filename)
e3018902 1004
09bd408c 1005 # Update file modification time
e3018902 1006 if self.params.get('updatetime', True):
366cbfb0 1007 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
e3018902 1008
366cbfb0 1009 return True
4fa74b52 1010
c0a10ca8 1011
4fa74b52
RG
1012class InfoExtractor(object):
1013 """Information Extractor class.
1014
1015 Information extractors are the classes that, given a URL, extract
1016 information from the video (or videos) the URL refers to. This
1017 information includes the real video URL, the video title and simplified
2851b2ca
RG
1018 title, author and others. The information is stored in a dictionary
1019 which is then passed to the FileDownloader. The FileDownloader
1020 processes this information possibly downloading the video to the file
1021 system, among other possible outcomes. The dictionaries must include
4fa74b52
RG
1022 the following fields:
1023
1024 id: Video identifier.
1025 url: Final video URL.
1026 uploader: Nickname of the video uploader.
1027 title: Literal title.
1028 stitle: Simplified title.
1029 ext: Video filename extension.
6ba562b0 1030 format: Video format.
e616ec0c 1031 player_url: SWF Player URL (may be None).
4fa74b52 1032
7e58d568
RG
1033 The following fields are optional. Their primary purpose is to allow
1034 youtube-dl to serve as the backend for a video search function, such
1035 as the one in youtube2mp3. They are only used when their respective
1036 forced printing functions are called:
1037
1038 thumbnail: Full URL to a video thumbnail image.
1039 description: One-line video description.
1040
4fa74b52 1041 Subclasses of this one should re-define the _real_initialize() and
bdb3f7a7
PH
1042 _real_extract() methods and define a _VALID_URL regexp.
1043 Probably, they should also be added to the list of extractors.
4fa74b52
RG
1044 """
1045
1046 _ready = False
1047 _downloader = None
1048
1049 def __init__(self, downloader=None):
1050 """Constructor. Receives an optional downloader."""
1051 self._ready = False
1052 self.set_downloader(downloader)
1053
bdb3f7a7 1054 def suitable(self, url):
4fa74b52 1055 """Receives a URL and returns True if suitable for this IE."""
bdb3f7a7 1056 return re.match(self._VALID_URL, url) is not None
4fa74b52
RG
1057
1058 def initialize(self):
1c5e2302 1059 """Initializes an instance (authentication, etc)."""
4fa74b52
RG
1060 if not self._ready:
1061 self._real_initialize()
1062 self._ready = True
1063
1064 def extract(self, url):
1065 """Extracts URL information and returns it in list of dicts."""
1066 self.initialize()
1067 return self._real_extract(url)
1068
1069 def set_downloader(self, downloader):
1070 """Sets the downloader for this IE."""
1071 self._downloader = downloader
d3975459 1072
4fa74b52
RG
1073 def _real_initialize(self):
1074 """Real initialization process. Redefine in subclasses."""
1075 pass
1076
1077 def _real_extract(self, url):
1078 """Real extraction process. Redefine in subclasses."""
1079 pass
1080
c0a10ca8 1081
4fa74b52
RG
1082class YoutubeIE(InfoExtractor):
1083 """Information extractor for youtube.com."""
1084
1cde6f1d 1085 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
9715661c 1086 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
7df4635f 1087 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
72ac78b8 1088 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
4fa74b52 1089 _NETRC_MACHINE = 'youtube'
497cd3e6 1090 # Listed in order of quality
767414a2 1091 _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
7b7759f5 1092 _video_extensions = {
1093 '13': '3gp',
1094 '17': 'mp4',
1095 '18': 'mp4',
1096 '22': 'mp4',
d9bc015b 1097 '37': 'mp4',
9e9647d9 1098 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
0b59bf4a 1099 '43': 'webm',
7b417b38 1100 '44': 'webm',
0b59bf4a 1101 '45': 'webm',
7b7759f5 1102 }
f3098c4d 1103 IE_NAME = u'youtube'
4fa74b52 1104
72ac78b8
RG
1105 def report_lang(self):
1106 """Report attempt to set language."""
331ce0a0 1107 self._downloader.to_screen(u'[youtube] Setting language')
72ac78b8 1108
bafa5cd9
RG
1109 def report_login(self):
1110 """Report attempt to log in."""
331ce0a0 1111 self._downloader.to_screen(u'[youtube] Logging in')
d3975459 1112
bafa5cd9
RG
1113 def report_age_confirmation(self):
1114 """Report attempt to confirm age."""
331ce0a0 1115 self._downloader.to_screen(u'[youtube] Confirming age')
d3975459 1116
e616ec0c
RG
1117 def report_video_webpage_download(self, video_id):
1118 """Report attempt to download video webpage."""
331ce0a0 1119 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
d3975459 1120
71b7300e
RG
1121 def report_video_info_webpage_download(self, video_id):
1122 """Report attempt to download video info webpage."""
331ce0a0 1123 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
d3975459 1124
bafa5cd9
RG
1125 def report_information_extraction(self, video_id):
1126 """Report attempt to extract video information."""
331ce0a0 1127 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
d3975459 1128
7b7759f5 1129 def report_unavailable_format(self, video_id, format):
1130 """Report extracted video URL."""
331ce0a0 1131 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
d3975459 1132
0487b407
RG
1133 def report_rtmp_download(self):
1134 """Indicate the download will use the RTMP protocol."""
331ce0a0 1135 self._downloader.to_screen(u'[youtube] RTMP download detected')
d3975459 1136
4fa74b52
RG
1137 def _real_initialize(self):
1138 if self._downloader is None:
1139 return
1140
1141 username = None
1142 password = None
d0a9affb 1143 downloader_params = self._downloader.params
4fa74b52
RG
1144
1145 # Attempt to use provided username and password or .netrc data
1146 if downloader_params.get('username', None) is not None:
1147 username = downloader_params['username']
1148 password = downloader_params['password']
1149 elif downloader_params.get('usenetrc', False):
1150 try:
1151 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1152 if info is not None:
1153 username = info[0]
1154 password = info[2]
1155 else:
1156 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1157 except (IOError, netrc.NetrcParseError), err:
6f21f686 1158 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
4fa74b52
RG
1159 return
1160
72ac78b8 1161 # Set language
1987c232 1162 request = urllib2.Request(self._LANG_URL)
72ac78b8
RG
1163 try:
1164 self.report_lang()
1165 urllib2.urlopen(request).read()
1166 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1167 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
72ac78b8
RG
1168 return
1169
cc109403
RG
1170 # No authentication to be performed
1171 if username is None:
1172 return
1173
4fa74b52 1174 # Log in
9fcd8355
RG
1175 login_form = {
1176 'current_form': 'loginForm',
4fa74b52
RG
1177 'next': '/',
1178 'action_login': 'Log In',
1179 'username': username,
9fcd8355
RG
1180 'password': password,
1181 }
1987c232 1182 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
4fa74b52 1183 try:
bafa5cd9 1184 self.report_login()
4fa74b52
RG
1185 login_results = urllib2.urlopen(request).read()
1186 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
6f21f686 1187 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
4fa74b52
RG
1188 return
1189 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1190 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
4fa74b52 1191 return
d3975459 1192
4fa74b52 1193 # Confirm age
9fcd8355
RG
1194 age_form = {
1195 'next_url': '/',
1196 'action_confirm': 'Confirm',
1197 }
1987c232 1198 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
4fa74b52 1199 try:
bafa5cd9 1200 self.report_age_confirmation()
4fa74b52
RG
1201 age_results = urllib2.urlopen(request).read()
1202 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1203 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
e5bf0f55 1204 return
4fa74b52
RG
1205
1206 def _real_extract(self, url):
1207 # Extract video id from URL
020f7150 1208 mobj = re.match(self._VALID_URL, url)
4fa74b52 1209 if mobj is None:
147753eb 1210 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1211 return
4fa74b52
RG
1212 video_id = mobj.group(2)
1213
497cd3e6
RG
1214 # Get video webpage
1215 self.report_video_webpage_download(video_id)
1987c232 1216 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
497cd3e6
RG
1217 try:
1218 video_webpage = urllib2.urlopen(request).read()
1219 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1220 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1221 return
968aa884 1222
497cd3e6 1223 # Attempt to extract SWF player URL
b620a5f8 1224 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
497cd3e6 1225 if mobj is not None:
b620a5f8 1226 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
497cd3e6
RG
1227 else:
1228 player_url = None
1229
1230 # Get video info
1231 self.report_video_info_webpage_download(video_id)
1232 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1233 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
c0a10ca8 1234 % (video_id, el_type))
1987c232 1235 request = urllib2.Request(video_info_url)
e616ec0c 1236 try:
497cd3e6
RG
1237 video_info_webpage = urllib2.urlopen(request).read()
1238 video_info = parse_qs(video_info_webpage)
1239 if 'token' in video_info:
1240 break
e616ec0c 1241 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
497cd3e6 1242 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
e616ec0c 1243 return
f95f29fd
RG
1244 if 'token' not in video_info:
1245 if 'reason' in video_info:
8e686771 1246 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
f95f29fd
RG
1247 else:
1248 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1249 return
1250
1251 # Start extracting information
497cd3e6
RG
1252 self.report_information_extraction(video_id)
1253
1254 # uploader
1255 if 'author' not in video_info:
1256 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1257 return
1258 video_uploader = urllib.unquote_plus(video_info['author'][0])
e616ec0c 1259
497cd3e6
RG
1260 # title
1261 if 'title' not in video_info:
1262 self._downloader.trouble(u'ERROR: unable to extract video title')
1263 return
1264 video_title = urllib.unquote_plus(video_info['title'][0])
1265 video_title = video_title.decode('utf-8')
1266 video_title = sanitize_title(video_title)
1267
1268 # simplified title
1269 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1270 simple_title = simple_title.strip(ur'_')
1271
1272 # thumbnail image
1273 if 'thumbnail_url' not in video_info:
1274 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1275 video_thumbnail = ''
1276 else: # don't panic if we can't find it
1277 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1278
b3a27b52
NA
1279 # upload date
1280 upload_date = u'NA'
3efa45c3 1281 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
b3a27b52 1282 if mobj is not None:
a1f03c7b 1283 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
87cbd213 1284 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
a1f03c7b
NA
1285 for expression in format_expressions:
1286 try:
1287 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1288 except:
1289 pass
b3a27b52 1290
497cd3e6 1291 # description
c6b55a8d
PH
1292 try:
1293 lxml.etree
1294 except NameError:
1295 video_description = u'No description available.'
8b95c387 1296 if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
c6b55a8d
PH
1297 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1298 if mobj is not None:
1299 video_description = mobj.group(1).decode('utf-8')
1300 else:
1301 html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1302 vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1303 video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
91e6a385 1304 # TODO use another parser
497cd3e6 1305
5ce7d172
RG
1306 # token
1307 video_token = urllib.unquote_plus(video_info['token'][0])
1308
497cd3e6 1309 # Decide which formats to download
f83ae781 1310 req_format = self._downloader.params.get('format', None)
2e3a32e4 1311
f137bef9
PH
1312 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1313 self.report_rtmp_download()
1314 video_url_list = [(None, video_info['conn'][0])]
f137bef9 1315 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
0ac22e4f 1316 url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
8519c32d 1317 url_data = [parse_qs(uds) for uds in url_data_strs]
f137bef9 1318 url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
8519c32d 1319 url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
2b70537d 1320
497cd3e6
RG
1321 format_limit = self._downloader.params.get('format_limit', None)
1322 if format_limit is not None and format_limit in self._available_formats:
1323 format_list = self._available_formats[self._available_formats.index(format_limit):]
e616ec0c 1324 else:
497cd3e6
RG
1325 format_list = self._available_formats
1326 existing_formats = [x for x in format_list if x in url_map]
1327 if len(existing_formats) == 0:
1328 self._downloader.trouble(u'ERROR: no known formats available for video')
968aa884 1329 return
5260e68f 1330 if req_format is None or req_format == 'best':
d157d259 1331 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
c52b01f3
K
1332 elif req_format == 'worst':
1333 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
5260e68f 1334 elif req_format in ('-1', 'all'):
d157d259 1335 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
497cd3e6 1336 else:
5260e68f
PH
1337 # Specific formats. We pick the first in a slash-delimeted sequence.
1338 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1339 req_formats = req_format.split('/')
1340 video_url_list = None
1341 for rf in req_formats:
1342 if rf in url_map:
1343 video_url_list = [(rf, url_map[rf])]
1344 break
1345 if video_url_list is None:
5c132793
RG
1346 self._downloader.trouble(u'ERROR: requested format not available')
1347 return
497cd3e6 1348 else:
f3dc18d8 1349 self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
497cd3e6 1350 return
7b7759f5 1351
497cd3e6
RG
1352 for format_param, video_real_url in video_url_list:
1353 # At this point we have a new video
1354 self._downloader.increment_downloads()
1355
1356 # Extension
1357 video_extension = self._video_extensions.get(format_param, 'flv')
7e58d568 1358
968aa884 1359 try:
7b7759f5 1360 # Process video information
1361 self._downloader.process_info({
1362 'id': video_id.decode('utf-8'),
1363 'url': video_real_url.decode('utf-8'),
1364 'uploader': video_uploader.decode('utf-8'),
138b11f3 1365 'upload_date': upload_date,
7b7759f5 1366 'title': video_title,
1367 'stitle': simple_title,
1368 'ext': video_extension.decode('utf-8'),
6ba562b0 1369 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
7e58d568 1370 'thumbnail': video_thumbnail.decode('utf-8'),
c6b55a8d 1371 'description': video_description,
e616ec0c 1372 'player_url': player_url,
7b7759f5 1373 })
497cd3e6 1374 except UnavailableVideoError, err:
09cc744c 1375 self._downloader.trouble(u'\nERROR: unable to download video')
42bcd27d 1376
4fa74b52 1377
020f7150
RG
1378class MetacafeIE(InfoExtractor):
1379 """Information Extractor for metacafe.com."""
1380
1381 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
2546e767 1382 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
dbccb6cd 1383 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
020f7150 1384 _youtube_ie = None
f3098c4d 1385 IE_NAME = u'metacafe'
020f7150
RG
1386
1387 def __init__(self, youtube_ie, downloader=None):
1388 InfoExtractor.__init__(self, downloader)
1389 self._youtube_ie = youtube_ie
1390
020f7150
RG
1391 def report_disclaimer(self):
1392 """Report disclaimer retrieval."""
331ce0a0 1393 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
020f7150
RG
1394
1395 def report_age_confirmation(self):
1396 """Report attempt to confirm age."""
331ce0a0 1397 self._downloader.to_screen(u'[metacafe] Confirming age')
d3975459 1398
020f7150
RG
1399 def report_download_webpage(self, video_id):
1400 """Report webpage download."""
331ce0a0 1401 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
d3975459 1402
020f7150
RG
1403 def report_extraction(self, video_id):
1404 """Report information extraction."""
331ce0a0 1405 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
020f7150
RG
1406
1407 def _real_initialize(self):
1408 # Retrieve disclaimer
1987c232 1409 request = urllib2.Request(self._DISCLAIMER)
020f7150
RG
1410 try:
1411 self.report_disclaimer()
1412 disclaimer = urllib2.urlopen(request).read()
1413 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1414 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
020f7150
RG
1415 return
1416
1417 # Confirm age
1418 disclaimer_form = {
2546e767 1419 'filters': '0',
020f7150
RG
1420 'submit': "Continue - I'm over 18",
1421 }
1987c232 1422 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
020f7150
RG
1423 try:
1424 self.report_age_confirmation()
1425 disclaimer = urllib2.urlopen(request).read()
1426 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1427 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
020f7150 1428 return
d3975459 1429
020f7150
RG
1430 def _real_extract(self, url):
1431 # Extract id and simplified title from URL
1432 mobj = re.match(self._VALID_URL, url)
1433 if mobj is None:
147753eb 1434 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1435 return
020f7150
RG
1436
1437 video_id = mobj.group(1)
1438
1439 # Check if video comes from YouTube
1440 mobj2 = re.match(r'^yt-(.*)$', video_id)
1441 if mobj2 is not None:
6f21f686
RG
1442 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1443 return
020f7150 1444
df372a65 1445 # At this point we have a new video
9bf7fa52 1446 self._downloader.increment_downloads()
df372a65 1447
020f7150 1448 simple_title = mobj.group(2).decode('utf-8')
020f7150
RG
1449
1450 # Retrieve video webpage to extract further information
1451 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1452 try:
1453 self.report_download_webpage(video_id)
1454 webpage = urllib2.urlopen(request).read()
1455 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1456 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
6f21f686 1457 return
020f7150
RG
1458
1459 # Extract URL, uploader and title from webpage
1460 self.report_extraction(video_id)
18963a36 1461 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
c6c555cf
RG
1462 if mobj is not None:
1463 mediaURL = urllib.unquote(mobj.group(1))
6b57e8c5 1464 video_extension = mediaURL[-3:]
d3975459 1465
c6c555cf
RG
1466 # Extract gdaKey if available
1467 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1468 if mobj is None:
1469 video_url = mediaURL
1470 else:
1471 gdaKey = mobj.group(1)
1472 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
109626fc 1473 else:
c6c555cf
RG
1474 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1475 if mobj is None:
1476 self._downloader.trouble(u'ERROR: unable to extract media URL')
1477 return
1478 vardict = parse_qs(mobj.group(1))
1479 if 'mediaData' not in vardict:
1480 self._downloader.trouble(u'ERROR: unable to extract media URL')
1481 return
1482 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1483 if mobj is None:
1484 self._downloader.trouble(u'ERROR: unable to extract media URL')
1485 return
6b57e8c5
RG
1486 mediaURL = mobj.group(1).replace('\\/', '/')
1487 video_extension = mediaURL[-3:]
1488 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
020f7150 1489
2546e767 1490 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
020f7150 1491 if mobj is None:
147753eb 1492 self._downloader.trouble(u'ERROR: unable to extract title')
6f21f686 1493 return
020f7150 1494 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1495 video_title = sanitize_title(video_title)
020f7150 1496
29f07568 1497 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
020f7150 1498 if mobj is None:
147753eb 1499 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
6f21f686 1500 return
dbccb6cd 1501 video_uploader = mobj.group(1)
020f7150 1502
42bcd27d 1503 try:
1504 # Process video information
1505 self._downloader.process_info({
1506 'id': video_id.decode('utf-8'),
1507 'url': video_url.decode('utf-8'),
1508 'uploader': video_uploader.decode('utf-8'),
138b11f3 1509 'upload_date': u'NA',
42bcd27d 1510 'title': video_title,
1511 'stitle': simple_title,
1512 'ext': video_extension.decode('utf-8'),
6ba562b0 1513 'format': u'NA',
e616ec0c 1514 'player_url': None,
42bcd27d 1515 })
73f4e7af 1516 except UnavailableVideoError:
09cc744c 1517 self._downloader.trouble(u'\nERROR: unable to download video')
020f7150 1518
25af2bce 1519
4135fa45
WB
1520class DailymotionIE(InfoExtractor):
1521 """Information Extractor for Dailymotion"""
1522
1523 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
f3098c4d 1524 IE_NAME = u'dailymotion'
4135fa45
WB
1525
1526 def __init__(self, downloader=None):
1527 InfoExtractor.__init__(self, downloader)
1528
4135fa45
WB
1529 def report_download_webpage(self, video_id):
1530 """Report webpage download."""
331ce0a0 1531 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
d3975459 1532
4135fa45
WB
1533 def report_extraction(self, video_id):
1534 """Report information extraction."""
331ce0a0 1535 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
4135fa45
WB
1536
1537 def _real_initialize(self):
1538 return
1539
4135fa45
WB
1540 def _real_extract(self, url):
1541 # Extract id and simplified title from URL
1542 mobj = re.match(self._VALID_URL, url)
1543 if mobj is None:
1544 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1545 return
1546
df372a65 1547 # At this point we have a new video
9bf7fa52 1548 self._downloader.increment_downloads()
4135fa45
WB
1549 video_id = mobj.group(1)
1550
1551 simple_title = mobj.group(2).decode('utf-8')
1552 video_extension = 'flv'
1553
1554 # Retrieve video webpage to extract further information
1555 request = urllib2.Request(url)
62a29bbf 1556 request.add_header('Cookie', 'family_filter=off')
4135fa45
WB
1557 try:
1558 self.report_download_webpage(video_id)
1559 webpage = urllib2.urlopen(request).read()
1560 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1561 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1562 return
1563
1564 # Extract URL, uploader and title from webpage
1565 self.report_extraction(video_id)
62a29bbf 1566 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
4135fa45
WB
1567 if mobj is None:
1568 self._downloader.trouble(u'ERROR: unable to extract media URL')
1569 return
62a29bbf 1570 sequence = urllib.unquote(mobj.group(1))
1571 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1572 if mobj is None:
1573 self._downloader.trouble(u'ERROR: unable to extract media URL')
1574 return
1575 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
4135fa45
WB
1576
1577 # if needed add http://www.dailymotion.com/ if relative URL
1578
1579 video_url = mediaURL
1580
62a29bbf 1581 mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
4135fa45
WB
1582 if mobj is None:
1583 self._downloader.trouble(u'ERROR: unable to extract title')
1584 return
1585 video_title = mobj.group(1).decode('utf-8')
1586 video_title = sanitize_title(video_title)
1587
62a29bbf 1588 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
4135fa45
WB
1589 if mobj is None:
1590 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1591 return
1592 video_uploader = mobj.group(1)
1593
1594 try:
1595 # Process video information
1596 self._downloader.process_info({
1597 'id': video_id.decode('utf-8'),
1598 'url': video_url.decode('utf-8'),
1599 'uploader': video_uploader.decode('utf-8'),
138b11f3 1600 'upload_date': u'NA',
4135fa45
WB
1601 'title': video_title,
1602 'stitle': simple_title,
1603 'ext': video_extension.decode('utf-8'),
1604 'format': u'NA',
1605 'player_url': None,
1606 })
73f4e7af 1607 except UnavailableVideoError:
09cc744c 1608 self._downloader.trouble(u'\nERROR: unable to download video')
4135fa45 1609
c0a10ca8 1610
49c0028a 1611class GoogleIE(InfoExtractor):
1612 """Information extractor for video.google.com."""
1613
490fd7ae 1614 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
f3098c4d 1615 IE_NAME = u'video.google'
49c0028a 1616
1617 def __init__(self, downloader=None):
1618 InfoExtractor.__init__(self, downloader)
1619
49c0028a 1620 def report_download_webpage(self, video_id):
1621 """Report webpage download."""
331ce0a0 1622 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
49c0028a 1623
1624 def report_extraction(self, video_id):
1625 """Report information extraction."""
331ce0a0 1626 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
49c0028a 1627
1628 def _real_initialize(self):
1629 return
1630
1631 def _real_extract(self, url):
1632 # Extract id from URL
1633 mobj = re.match(self._VALID_URL, url)
1634 if mobj is None:
1635 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1636 return
1637
df372a65 1638 # At this point we have a new video
9bf7fa52 1639 self._downloader.increment_downloads()
49c0028a 1640 video_id = mobj.group(1)
1641
1642 video_extension = 'mp4'
1643
1644 # Retrieve video webpage to extract further information
490fd7ae 1645 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
49c0028a 1646 try:
1647 self.report_download_webpage(video_id)
1648 webpage = urllib2.urlopen(request).read()
1649 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1650 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1651 return
1652
1653 # Extract URL, uploader, and title from webpage
1654 self.report_extraction(video_id)
490fd7ae
RG
1655 mobj = re.search(r"download_url:'([^']+)'", webpage)
1656 if mobj is None:
1657 video_extension = 'flv'
1658 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
49c0028a 1659 if mobj is None:
1660 self._downloader.trouble(u'ERROR: unable to extract media URL')
1661 return
1662 mediaURL = urllib.unquote(mobj.group(1))
1663 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1664 mediaURL = mediaURL.replace('\\x26', '\x26')
1665
1666 video_url = mediaURL
1667
1668 mobj = re.search(r'<title>(.*)</title>', webpage)
1669 if mobj is None:
1670 self._downloader.trouble(u'ERROR: unable to extract title')
1671 return
1672 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1673 video_title = sanitize_title(video_title)
31cbdaaf 1674 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1675
7e58d568
RG
1676 # Extract video description
1677 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1678 if mobj is None:
1679 self._downloader.trouble(u'ERROR: unable to extract video description')
1680 return
1681 video_description = mobj.group(1).decode('utf-8')
1682 if not video_description:
1683 video_description = 'No description available.'
1684
1685 # Extract video thumbnail
1686 if self._downloader.params.get('forcethumbnail', False):
1687 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1688 try:
1689 webpage = urllib2.urlopen(request).read()
1690 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1691 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1692 return
1693 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1694 if mobj is None:
1695 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1696 return
1697 video_thumbnail = mobj.group(1)
1698 else: # we need something to pass to process_info
1699 video_thumbnail = ''
1700
49c0028a 1701 try:
1702 # Process video information
1703 self._downloader.process_info({
1704 'id': video_id.decode('utf-8'),
1705 'url': video_url.decode('utf-8'),
6ba562b0 1706 'uploader': u'NA',
138b11f3 1707 'upload_date': u'NA',
490fd7ae 1708 'title': video_title,
31cbdaaf 1709 'stitle': simple_title,
49c0028a 1710 'ext': video_extension.decode('utf-8'),
6ba562b0 1711 'format': u'NA',
e616ec0c 1712 'player_url': None,
49c0028a 1713 })
73f4e7af 1714 except UnavailableVideoError:
09cc744c 1715 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 1716
1717
1718class PhotobucketIE(InfoExtractor):
1719 """Information extractor for photobucket.com."""
1720
1721 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
f3098c4d 1722 IE_NAME = u'photobucket'
49c0028a 1723
1724 def __init__(self, downloader=None):
1725 InfoExtractor.__init__(self, downloader)
1726
49c0028a 1727 def report_download_webpage(self, video_id):
1728 """Report webpage download."""
331ce0a0 1729 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
49c0028a 1730
1731 def report_extraction(self, video_id):
1732 """Report information extraction."""
331ce0a0 1733 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
49c0028a 1734
1735 def _real_initialize(self):
1736 return
1737
1738 def _real_extract(self, url):
1739 # Extract id from URL
1740 mobj = re.match(self._VALID_URL, url)
1741 if mobj is None:
1742 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1743 return
1744
df372a65 1745 # At this point we have a new video
9bf7fa52 1746 self._downloader.increment_downloads()
49c0028a 1747 video_id = mobj.group(1)
1748
1749 video_extension = 'flv'
1750
1751 # Retrieve video webpage to extract further information
1752 request = urllib2.Request(url)
1753 try:
1754 self.report_download_webpage(video_id)
1755 webpage = urllib2.urlopen(request).read()
1756 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1757 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1758 return
1759
1760 # Extract URL, uploader, and title from webpage
1761 self.report_extraction(video_id)
1762 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1763 if mobj is None:
1764 self._downloader.trouble(u'ERROR: unable to extract media URL')
1765 return
1766 mediaURL = urllib.unquote(mobj.group(1))
1767
1768 video_url = mediaURL
1769
1770 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1771 if mobj is None:
1772 self._downloader.trouble(u'ERROR: unable to extract title')
1773 return
1774 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1775 video_title = sanitize_title(video_title)
31cbdaaf 1776 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1777
1778 video_uploader = mobj.group(2).decode('utf-8')
1779
1780 try:
1781 # Process video information
1782 self._downloader.process_info({
1783 'id': video_id.decode('utf-8'),
1784 'url': video_url.decode('utf-8'),
490fd7ae 1785 'uploader': video_uploader,
138b11f3 1786 'upload_date': u'NA',
490fd7ae 1787 'title': video_title,
31cbdaaf 1788 'stitle': simple_title,
490fd7ae 1789 'ext': video_extension.decode('utf-8'),
6ba562b0 1790 'format': u'NA',
e616ec0c 1791 'player_url': None,
490fd7ae 1792 })
73f4e7af 1793 except UnavailableVideoError:
09cc744c 1794 self._downloader.trouble(u'\nERROR: unable to download video')
490fd7ae
RG
1795
1796
61945318
RG
1797class YahooIE(InfoExtractor):
1798 """Information extractor for video.yahoo.com."""
1799
1800 # _VALID_URL matches all Yahoo! Video URLs
1801 # _VPAGE_URL matches only the extractable '/watch/' URLs
1802 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1803 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
f3098c4d 1804 IE_NAME = u'video.yahoo'
61945318
RG
1805
1806 def __init__(self, downloader=None):
1807 InfoExtractor.__init__(self, downloader)
1808
61945318
RG
1809 def report_download_webpage(self, video_id):
1810 """Report webpage download."""
331ce0a0 1811 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
61945318
RG
1812
1813 def report_extraction(self, video_id):
1814 """Report information extraction."""
331ce0a0 1815 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
61945318
RG
1816
1817 def _real_initialize(self):
1818 return
1819
df372a65 1820 def _real_extract(self, url, new_video=True):
61945318
RG
1821 # Extract ID from URL
1822 mobj = re.match(self._VALID_URL, url)
1823 if mobj is None:
1824 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1825 return
1826
df372a65 1827 # At this point we have a new video
9bf7fa52 1828 self._downloader.increment_downloads()
61945318
RG
1829 video_id = mobj.group(2)
1830 video_extension = 'flv'
1831
1832 # Rewrite valid but non-extractable URLs as
1833 # extractable English language /watch/ URLs
1834 if re.match(self._VPAGE_URL, url) is None:
1835 request = urllib2.Request(url)
1836 try:
1837 webpage = urllib2.urlopen(request).read()
1838 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1839 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1840 return
1841
1842 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1843 if mobj is None:
1844 self._downloader.trouble(u'ERROR: Unable to extract id field')
1845 return
1846 yahoo_id = mobj.group(1)
1847
1848 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1849 if mobj is None:
1850 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1851 return
1852 yahoo_vid = mobj.group(1)
1853
1854 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
df372a65 1855 return self._real_extract(url, new_video=False)
61945318
RG
1856
1857 # Retrieve video webpage to extract further information
1858 request = urllib2.Request(url)
1859 try:
1860 self.report_download_webpage(video_id)
1861 webpage = urllib2.urlopen(request).read()
1862 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1863 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1864 return
1865
1866 # Extract uploader and title from webpage
1867 self.report_extraction(video_id)
1868 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1869 if mobj is None:
1870 self._downloader.trouble(u'ERROR: unable to extract video title')
1871 return
1872 video_title = mobj.group(1).decode('utf-8')
1873 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1874
1875 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1876 if mobj is None:
1877 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1878 return
1879 video_uploader = mobj.group(1).decode('utf-8')
1880
7e58d568
RG
1881 # Extract video thumbnail
1882 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1883 if mobj is None:
1884 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1885 return
1886 video_thumbnail = mobj.group(1).decode('utf-8')
1887
1888 # Extract video description
1889 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1890 if mobj is None:
1891 self._downloader.trouble(u'ERROR: unable to extract video description')
1892 return
1893 video_description = mobj.group(1).decode('utf-8')
c0a10ca8
F
1894 if not video_description:
1895 video_description = 'No description available.'
7e58d568 1896
61945318
RG
1897 # Extract video height and width
1898 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1899 if mobj is None:
1900 self._downloader.trouble(u'ERROR: unable to extract video height')
1901 return
1902 yv_video_height = mobj.group(1)
1903
1904 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1905 if mobj is None:
1906 self._downloader.trouble(u'ERROR: unable to extract video width')
1907 return
1908 yv_video_width = mobj.group(1)
1909
1910 # Retrieve video playlist to extract media URL
1911 # I'm not completely sure what all these options are, but we
1912 # seem to need most of them, otherwise the server sends a 401.
1913 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1914 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1915 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
c0a10ca8
F
1916 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1917 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
61945318
RG
1918 try:
1919 self.report_download_webpage(video_id)
1920 webpage = urllib2.urlopen(request).read()
1921 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1922 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1923 return
1924
1925 # Extract media URL from playlist XML
1926 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1927 if mobj is None:
1928 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1929 return
1930 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1931 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1932
1933 try:
1934 # Process video information
1935 self._downloader.process_info({
1936 'id': video_id.decode('utf-8'),
1937 'url': video_url,
1938 'uploader': video_uploader,
138b11f3 1939 'upload_date': u'NA',
61945318
RG
1940 'title': video_title,
1941 'stitle': simple_title,
1942 'ext': video_extension.decode('utf-8'),
7e58d568
RG
1943 'thumbnail': video_thumbnail.decode('utf-8'),
1944 'description': video_description,
1945 'thumbnail': video_thumbnail,
e616ec0c 1946 'player_url': None,
61945318 1947 })
73f4e7af 1948 except UnavailableVideoError:
09cc744c 1949 self._downloader.trouble(u'\nERROR: unable to download video')
61945318
RG
1950
1951
92743d42
RB
1952class VimeoIE(InfoExtractor):
1953 """Information extractor for vimeo.com."""
1954
1955 # _VALID_URL matches Vimeo URLs
44c636df 1956 _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
f3098c4d 1957 IE_NAME = u'vimeo'
92743d42
RB
1958
1959 def __init__(self, downloader=None):
1960 InfoExtractor.__init__(self, downloader)
1961
92743d42
RB
1962 def report_download_webpage(self, video_id):
1963 """Report webpage download."""
0ecedbdb 1964 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
92743d42
RB
1965
1966 def report_extraction(self, video_id):
1967 """Report information extraction."""
0ecedbdb 1968 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
92743d42
RB
1969
1970 def _real_initialize(self):
1971 return
1972
1973 def _real_extract(self, url, new_video=True):
1974 # Extract ID from URL
1975 mobj = re.match(self._VALID_URL, url)
1976 if mobj is None:
1977 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1978 return
1979
1980 # At this point we have a new video
1981 self._downloader.increment_downloads()
1982 video_id = mobj.group(1)
92743d42
RB
1983
1984 # Retrieve video webpage to extract further information
1985 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
1986 try:
1987 self.report_download_webpage(video_id)
1988 webpage = urllib2.urlopen(request).read()
1989 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1990 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1991 return
1992
f24c674b
RB
1993 # Now we begin extracting as much information as we can from what we
1994 # retrieved. First we extract the information common to all extractors,
1995 # and latter we extract those that are Vimeo specific.
92743d42 1996 self.report_extraction(video_id)
f24c674b
RB
1997
1998 # Extract title
c5a088d3 1999 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
92743d42
RB
2000 if mobj is None:
2001 self._downloader.trouble(u'ERROR: unable to extract video title')
2002 return
2003 video_title = mobj.group(1).decode('utf-8')
2004 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2005
f24c674b 2006 # Extract uploader
c5a088d3 2007 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
92743d42
RB
2008 if mobj is None:
2009 self._downloader.trouble(u'ERROR: unable to extract video uploader')
2010 return
2011 video_uploader = mobj.group(1).decode('utf-8')
2012
2013 # Extract video thumbnail
c5a088d3 2014 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
92743d42
RB
2015 if mobj is None:
2016 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
2017 return
2018 video_thumbnail = mobj.group(1).decode('utf-8')
2019
2020 # # Extract video description
2021 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
2022 # if mobj is None:
2023 # self._downloader.trouble(u'ERROR: unable to extract video description')
2024 # return
2025 # video_description = mobj.group(1).decode('utf-8')
2026 # if not video_description: video_description = 'No description available.'
2027 video_description = 'Foo.'
2028
f24c674b 2029 # Vimeo specific: extract request signature
c5a088d3 2030 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
92743d42
RB
2031 if mobj is None:
2032 self._downloader.trouble(u'ERROR: unable to extract request signature')
2033 return
2034 sig = mobj.group(1).decode('utf-8')
2035
f24c674b 2036 # Vimeo specific: Extract request signature expiration
c5a088d3 2037 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
92743d42
RB
2038 if mobj is None:
2039 self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2040 return
2041 sig_exp = mobj.group(1).decode('utf-8')
2042
2043 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
2044
2045 try:
2046 # Process video information
2047 self._downloader.process_info({
2048 'id': video_id.decode('utf-8'),
2049 'url': video_url,
2050 'uploader': video_uploader,
2051 'upload_date': u'NA',
2052 'title': video_title,
2053 'stitle': simple_title,
2fc31a48 2054 'ext': u'mp4',
92743d42
RB
2055 'thumbnail': video_thumbnail.decode('utf-8'),
2056 'description': video_description,
2057 'thumbnail': video_thumbnail,
2058 'description': video_description,
2059 'player_url': None,
2060 })
2061 except UnavailableVideoError:
2062 self._downloader.trouble(u'ERROR: unable to download video')
2063
2064
490fd7ae
RG
2065class GenericIE(InfoExtractor):
2066 """Generic last-resort information extractor."""
2067
f3098c4d
PH
2068 _VALID_URL = r'.*'
2069 IE_NAME = u'generic'
bdb3f7a7 2070
490fd7ae
RG
2071 def __init__(self, downloader=None):
2072 InfoExtractor.__init__(self, downloader)
2073
490fd7ae
RG
2074 def report_download_webpage(self, video_id):
2075 """Report webpage download."""
331ce0a0
RG
2076 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2077 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
490fd7ae
RG
2078
2079 def report_extraction(self, video_id):
2080 """Report information extraction."""
331ce0a0 2081 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
490fd7ae
RG
2082
2083 def _real_initialize(self):
2084 return
2085
2086 def _real_extract(self, url):
df372a65 2087 # At this point we have a new video
9bf7fa52 2088 self._downloader.increment_downloads()
df372a65 2089
490fd7ae
RG
2090 video_id = url.split('/')[-1]
2091 request = urllib2.Request(url)
2092 try:
2093 self.report_download_webpage(video_id)
2094 webpage = urllib2.urlopen(request).read()
2095 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2096 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2097 return
2098 except ValueError, err:
2099 # since this is the last-resort InfoExtractor, if
2100 # this error is thrown, it'll be thrown here
2101 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2102 return
2103
a9806fd8 2104 self.report_extraction(video_id)
490fd7ae
RG
2105 # Start with something easy: JW Player in SWFObject
2106 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2107 if mobj is None:
2108 # Broaden the search a little bit
2109 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2110 if mobj is None:
2111 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2112 return
2113
2114 # It's possible that one of the regexes
2115 # matched, but returned an empty group:
2116 if mobj.group(1) is None:
2117 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2118 return
2119
2120 video_url = urllib.unquote(mobj.group(1))
c0a10ca8 2121 video_id = os.path.basename(video_url)
490fd7ae
RG
2122
2123 # here's a fun little line of code for you:
2124 video_extension = os.path.splitext(video_id)[1][1:]
c0a10ca8 2125 video_id = os.path.splitext(video_id)[0]
490fd7ae
RG
2126
2127 # it's tempting to parse this further, but you would
2128 # have to take into account all the variations like
2129 # Video Title - Site Name
2130 # Site Name | Video Title
2131 # Video Title - Tagline | Site Name
2132 # and so on and so forth; it's just not practical
2133 mobj = re.search(r'<title>(.*)</title>', webpage)
2134 if mobj is None:
2135 self._downloader.trouble(u'ERROR: unable to extract title')
2136 return
2137 video_title = mobj.group(1).decode('utf-8')
2138 video_title = sanitize_title(video_title)
31cbdaaf 2139 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
490fd7ae
RG
2140
2141 # video uploader is domain name
2142 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2143 if mobj is None:
2144 self._downloader.trouble(u'ERROR: unable to extract title')
2145 return
2146 video_uploader = mobj.group(1).decode('utf-8')
2147
2148 try:
2149 # Process video information
2150 self._downloader.process_info({
2151 'id': video_id.decode('utf-8'),
2152 'url': video_url.decode('utf-8'),
2153 'uploader': video_uploader,
138b11f3 2154 'upload_date': u'NA',
490fd7ae 2155 'title': video_title,
31cbdaaf 2156 'stitle': simple_title,
49c0028a 2157 'ext': video_extension.decode('utf-8'),
6ba562b0 2158 'format': u'NA',
e616ec0c 2159 'player_url': None,
49c0028a 2160 })
73f4e7af 2161 except UnavailableVideoError, err:
09cc744c 2162 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 2163
2164
25af2bce
RG
2165class YoutubeSearchIE(InfoExtractor):
2166 """Information Extractor for YouTube search queries."""
bdb3f7a7 2167 _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
25af2bce
RG
2168 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2169 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
304a4d85 2170 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
25af2bce 2171 _youtube_ie = None
fd9288c3 2172 _max_youtube_results = 1000
f3098c4d 2173 IE_NAME = u'youtube:search'
25af2bce 2174
f995f712 2175 def __init__(self, youtube_ie, downloader=None):
25af2bce
RG
2176 InfoExtractor.__init__(self, downloader)
2177 self._youtube_ie = youtube_ie
d3975459 2178
25af2bce
RG
2179 def report_download_page(self, query, pagenum):
2180 """Report attempt to download playlist page with given number."""
490fd7ae 2181 query = query.decode(preferredencoding())
331ce0a0 2182 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
25af2bce
RG
2183
2184 def _real_initialize(self):
2185 self._youtube_ie.initialize()
d3975459 2186
25af2bce 2187 def _real_extract(self, query):
bdb3f7a7 2188 mobj = re.match(self._VALID_URL, query)
25af2bce 2189 if mobj is None:
147753eb 2190 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
6f21f686 2191 return
25af2bce
RG
2192
2193 prefix, query = query.split(':')
2194 prefix = prefix[8:]
c0a10ca8 2195 query = query.encode('utf-8')
f995f712 2196 if prefix == '':
6f21f686
RG
2197 self._download_n_results(query, 1)
2198 return
f995f712 2199 elif prefix == 'all':
6f21f686
RG
2200 self._download_n_results(query, self._max_youtube_results)
2201 return
f995f712 2202 else:
25af2bce 2203 try:
e1f18b8a 2204 n = long(prefix)
25af2bce 2205 if n <= 0:
147753eb 2206 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
6f21f686 2207 return
257453b9 2208 elif n > self._max_youtube_results:
c0a10ca8 2209 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
257453b9 2210 n = self._max_youtube_results
6f21f686
RG
2211 self._download_n_results(query, n)
2212 return
e1f18b8a 2213 except ValueError: # parsing prefix as integer fails
6f21f686
RG
2214 self._download_n_results(query, 1)
2215 return
25af2bce
RG
2216
2217 def _download_n_results(self, query, n):
2218 """Downloads a specified number of results for a query"""
2219
2220 video_ids = []
2221 already_seen = set()
2222 pagenum = 1
2223
2224 while True:
2225 self.report_download_page(query, pagenum)
a9633f14 2226 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2227 request = urllib2.Request(result_url)
25af2bce
RG
2228 try:
2229 page = urllib2.urlopen(request).read()
2230 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2231 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2232 return
25af2bce
RG
2233
2234 # Extract video identifiers
2235 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2236 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2237 if video_id not in already_seen:
2238 video_ids.append(video_id)
2239 already_seen.add(video_id)
2240 if len(video_ids) == n:
2241 # Specified n videos reached
25af2bce 2242 for id in video_ids:
6f21f686
RG
2243 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2244 return
25af2bce 2245
304a4d85 2246 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
25af2bce 2247 for id in video_ids:
6f21f686
RG
2248 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2249 return
25af2bce
RG
2250
2251 pagenum = pagenum + 1
2252
c0a10ca8 2253
7e58d568
RG
2254class GoogleSearchIE(InfoExtractor):
2255 """Information Extractor for Google Video search queries."""
bdb3f7a7 2256 _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
7e58d568
RG
2257 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2258 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2259 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2260 _google_ie = None
2261 _max_google_results = 1000
f3098c4d 2262 IE_NAME = u'video.google:search'
7e58d568
RG
2263
2264 def __init__(self, google_ie, downloader=None):
2265 InfoExtractor.__init__(self, downloader)
2266 self._google_ie = google_ie
d3975459 2267
7e58d568
RG
2268 def report_download_page(self, query, pagenum):
2269 """Report attempt to download playlist page with given number."""
2270 query = query.decode(preferredencoding())
331ce0a0 2271 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2272
2273 def _real_initialize(self):
2274 self._google_ie.initialize()
d3975459 2275
7e58d568 2276 def _real_extract(self, query):
bdb3f7a7 2277 mobj = re.match(self._VALID_URL, query)
7e58d568
RG
2278 if mobj is None:
2279 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2280 return
2281
2282 prefix, query = query.split(':')
2283 prefix = prefix[8:]
c0a10ca8 2284 query = query.encode('utf-8')
7e58d568
RG
2285 if prefix == '':
2286 self._download_n_results(query, 1)
2287 return
2288 elif prefix == 'all':
2289 self._download_n_results(query, self._max_google_results)
2290 return
2291 else:
2292 try:
2293 n = long(prefix)
2294 if n <= 0:
2295 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2296 return
2297 elif n > self._max_google_results:
c0a10ca8 2298 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
7e58d568
RG
2299 n = self._max_google_results
2300 self._download_n_results(query, n)
2301 return
2302 except ValueError: # parsing prefix as integer fails
2303 self._download_n_results(query, 1)
2304 return
2305
2306 def _download_n_results(self, query, n):
2307 """Downloads a specified number of results for a query"""
2308
2309 video_ids = []
2310 already_seen = set()
2311 pagenum = 1
2312
2313 while True:
2314 self.report_download_page(query, pagenum)
2315 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2316 request = urllib2.Request(result_url)
7e58d568
RG
2317 try:
2318 page = urllib2.urlopen(request).read()
2319 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2320 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2321 return
2322
2323 # Extract video identifiers
2324 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2325 video_id = mobj.group(1)
2326 if video_id not in already_seen:
2327 video_ids.append(video_id)
2328 already_seen.add(video_id)
2329 if len(video_ids) == n:
2330 # Specified n videos reached
2331 for id in video_ids:
2332 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2333 return
2334
2335 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2336 for id in video_ids:
2337 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2338 return
2339
2340 pagenum = pagenum + 1
2341
c0a10ca8 2342
7e58d568
RG
2343class YahooSearchIE(InfoExtractor):
2344 """Information Extractor for Yahoo! Video search queries."""
bdb3f7a7 2345 _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
7e58d568
RG
2346 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2347 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2348 _MORE_PAGES_INDICATOR = r'\s*Next'
2349 _yahoo_ie = None
2350 _max_yahoo_results = 1000
f3098c4d 2351 IE_NAME = u'video.yahoo:search'
7e58d568
RG
2352
2353 def __init__(self, yahoo_ie, downloader=None):
2354 InfoExtractor.__init__(self, downloader)
2355 self._yahoo_ie = yahoo_ie
d3975459 2356
7e58d568
RG
2357 def report_download_page(self, query, pagenum):
2358 """Report attempt to download playlist page with given number."""
2359 query = query.decode(preferredencoding())
331ce0a0 2360 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2361
2362 def _real_initialize(self):
2363 self._yahoo_ie.initialize()
d3975459 2364
7e58d568 2365 def _real_extract(self, query):
bdb3f7a7 2366 mobj = re.match(self._VALID_URL, query)
7e58d568
RG
2367 if mobj is None:
2368 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2369 return
2370
2371 prefix, query = query.split(':')
2372 prefix = prefix[8:]
c0a10ca8 2373 query = query.encode('utf-8')
7e58d568
RG
2374 if prefix == '':
2375 self._download_n_results(query, 1)
2376 return
2377 elif prefix == 'all':
2378 self._download_n_results(query, self._max_yahoo_results)
2379 return
2380 else:
2381 try:
2382 n = long(prefix)
2383 if n <= 0:
2384 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2385 return
2386 elif n > self._max_yahoo_results:
c0a10ca8 2387 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
7e58d568
RG
2388 n = self._max_yahoo_results
2389 self._download_n_results(query, n)
2390 return
2391 except ValueError: # parsing prefix as integer fails
2392 self._download_n_results(query, 1)
2393 return
2394
2395 def _download_n_results(self, query, n):
2396 """Downloads a specified number of results for a query"""
2397
2398 video_ids = []
2399 already_seen = set()
2400 pagenum = 1
2401
2402 while True:
2403 self.report_download_page(query, pagenum)
2404 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2405 request = urllib2.Request(result_url)
7e58d568
RG
2406 try:
2407 page = urllib2.urlopen(request).read()
2408 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2409 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2410 return
2411
2412 # Extract video identifiers
2413 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2414 video_id = mobj.group(1)
2415 if video_id not in already_seen:
2416 video_ids.append(video_id)
2417 already_seen.add(video_id)
2418 if len(video_ids) == n:
2419 # Specified n videos reached
2420 for id in video_ids:
2421 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2422 return
2423
2424 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2425 for id in video_ids:
2426 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2427 return
2428
2429 pagenum = pagenum + 1
2430
c0a10ca8 2431
0c2dc87d
RG
2432class YoutubePlaylistIE(InfoExtractor):
2433 """Information Extractor for YouTube playlists."""
2434
2152ee86 2435 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
f74e22ae 2436 _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
0c2dc87d 2437 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
ce5cafea 2438 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
0c2dc87d 2439 _youtube_ie = None
f3098c4d 2440 IE_NAME = u'youtube:playlist'
0c2dc87d
RG
2441
2442 def __init__(self, youtube_ie, downloader=None):
2443 InfoExtractor.__init__(self, downloader)
2444 self._youtube_ie = youtube_ie
d3975459 2445
0c2dc87d
RG
2446 def report_download_page(self, playlist_id, pagenum):
2447 """Report attempt to download playlist page with given number."""
331ce0a0 2448 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
0c2dc87d
RG
2449
2450 def _real_initialize(self):
2451 self._youtube_ie.initialize()
d3975459 2452
0c2dc87d
RG
2453 def _real_extract(self, url):
2454 # Extract playlist id
2455 mobj = re.match(self._VALID_URL, url)
2456 if mobj is None:
147753eb 2457 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
6f21f686 2458 return
0c2dc87d 2459
d119b54d
RG
2460 # Single video case
2461 if mobj.group(3) is not None:
2462 self._youtube_ie.extract(mobj.group(3))
2463 return
2464
0c2dc87d 2465 # Download playlist pages
f74e22ae
GI
2466 # prefix is 'p' as default for playlists but there are other types that need extra care
2467 playlist_prefix = mobj.group(1)
2468 if playlist_prefix == 'a':
2469 playlist_access = 'artist'
2470 else:
7cc3c6fd 2471 playlist_prefix = 'p'
f74e22ae
GI
2472 playlist_access = 'view_play_list'
2473 playlist_id = mobj.group(2)
0c2dc87d
RG
2474 video_ids = []
2475 pagenum = 1
2476
2477 while True:
2478 self.report_download_page(playlist_id, pagenum)
f74e22ae 2479 request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
0c2dc87d
RG
2480 try:
2481 page = urllib2.urlopen(request).read()
2482 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2483 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2484 return
0c2dc87d
RG
2485
2486 # Extract video identifiers
27d98b6e 2487 ids_in_page = []
0c2dc87d 2488 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
27d98b6e
RG
2489 if mobj.group(1) not in ids_in_page:
2490 ids_in_page.append(mobj.group(1))
2491 video_ids.extend(ids_in_page)
0c2dc87d 2492
ce5cafea 2493 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
0c2dc87d
RG
2494 break
2495 pagenum = pagenum + 1
2496
8cc44341
RG
2497 playliststart = self._downloader.params.get('playliststart', 1) - 1
2498 playlistend = self._downloader.params.get('playlistend', -1)
2499 video_ids = video_ids[playliststart:playlistend]
2500
0c2dc87d 2501 for id in video_ids:
6f21f686
RG
2502 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2503 return
0c2dc87d 2504
c0a10ca8 2505
c39c05cd
A
2506class YoutubeUserIE(InfoExtractor):
2507 """Information Extractor for YouTube users."""
2508
5aba6ea4 2509 _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
c39c05cd 2510 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
5aba6ea4
RG
2511 _GDATA_PAGE_SIZE = 50
2512 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2513 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
c39c05cd 2514 _youtube_ie = None
f3098c4d 2515 IE_NAME = u'youtube:user'
c39c05cd
A
2516
2517 def __init__(self, youtube_ie, downloader=None):
2518 InfoExtractor.__init__(self, downloader)
2519 self._youtube_ie = youtube_ie
d3975459 2520
5aba6ea4 2521 def report_download_page(self, username, start_index):
c39c05cd 2522 """Report attempt to download user page."""
5aba6ea4 2523 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
c0a10ca8 2524 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
c39c05cd
A
2525
2526 def _real_initialize(self):
2527 self._youtube_ie.initialize()
d3975459 2528
c39c05cd
A
2529 def _real_extract(self, url):
2530 # Extract username
2531 mobj = re.match(self._VALID_URL, url)
2532 if mobj is None:
2533 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2534 return
2535
c39c05cd 2536 username = mobj.group(1)
5aba6ea4
RG
2537
2538 # Download video ids using YouTube Data API. Result size per
2539 # query is limited (currently to 50 videos) so we need to query
2540 # page by page until there are no video ids - it means we got
2541 # all of them.
2542
c39c05cd 2543 video_ids = []
5aba6ea4 2544 pagenum = 0
c39c05cd 2545
5aba6ea4
RG
2546 while True:
2547 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2548 self.report_download_page(username, start_index)
c39c05cd 2549
5aba6ea4 2550 request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
c39c05cd 2551
5aba6ea4
RG
2552 try:
2553 page = urllib2.urlopen(request).read()
2554 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2555 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2556 return
c39c05cd 2557
5aba6ea4
RG
2558 # Extract video identifiers
2559 ids_in_page = []
2560
2561 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2562 if mobj.group(1) not in ids_in_page:
2563 ids_in_page.append(mobj.group(1))
2564
2565 video_ids.extend(ids_in_page)
2566
2567 # A little optimization - if current page is not
2568 # "full", ie. does not contain PAGE_SIZE video ids then
2569 # we can assume that this page is the last one - there
2570 # are no more ids on further pages - no need to query
2571 # again.
2572
2573 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2574 break
2575
2576 pagenum += 1
2577
2578 all_ids_count = len(video_ids)
8cc44341
RG
2579 playliststart = self._downloader.params.get('playliststart', 1) - 1
2580 playlistend = self._downloader.params.get('playlistend', -1)
204c9398 2581
5aba6ea4
RG
2582 if playlistend == -1:
2583 video_ids = video_ids[playliststart:]
2584 else:
2585 video_ids = video_ids[playliststart:playlistend]
7a9054ec 2586
5aba6ea4 2587 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
c0a10ca8 2588 (username, all_ids_count, len(video_ids)))
5aba6ea4
RG
2589
2590 for video_id in video_ids:
2591 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2592
c39c05cd 2593
27179cfd
VV
2594class DepositFilesIE(InfoExtractor):
2595 """Information extractor for depositfiles.com"""
2596
2597 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
f3098c4d 2598 IE_NAME = u'DepositFiles'
27179cfd
VV
2599
2600 def __init__(self, downloader=None):
2601 InfoExtractor.__init__(self, downloader)
2602
27179cfd
VV
2603 def report_download_webpage(self, file_id):
2604 """Report webpage download."""
2605 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2606
2607 def report_extraction(self, file_id):
2608 """Report information extraction."""
2609 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2610
2611 def _real_initialize(self):
2612 return
2613
2614 def _real_extract(self, url):
2615 # At this point we have a new file
2616 self._downloader.increment_downloads()
2617
2618 file_id = url.split('/')[-1]
2619 # Rebuild url in english locale
2620 url = 'http://depositfiles.com/en/files/' + file_id
2621
2622 # Retrieve file webpage with 'Free download' button pressed
2623 free_download_indication = { 'gateway_result' : '1' }
1987c232 2624 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
27179cfd
VV
2625 try:
2626 self.report_download_webpage(file_id)
2627 webpage = urllib2.urlopen(request).read()
2628 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2629 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2630 return
2631
2632 # Search for the real file URL
2633 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2634 if (mobj is None) or (mobj.group(1) is None):
2635 # Try to figure out reason of the error.
2636 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2637 if (mobj is not None) and (mobj.group(1) is not None):
2638 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2639 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2640 else:
2641 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2642 return
2643
2644 file_url = mobj.group(1)
2645 file_extension = os.path.splitext(file_url)[1][1:]
2646
2647 # Search for file title
2648 mobj = re.search(r'<b title="(.*?)">', webpage)
2649 if mobj is None:
2650 self._downloader.trouble(u'ERROR: unable to extract title')
2651 return
2652 file_title = mobj.group(1).decode('utf-8')
2653
2654 try:
2655 # Process file information
2656 self._downloader.process_info({
2657 'id': file_id.decode('utf-8'),
2658 'url': file_url.decode('utf-8'),
2659 'uploader': u'NA',
2660 'upload_date': u'NA',
2661 'title': file_title,
2662 'stitle': file_title,
2663 'ext': file_extension.decode('utf-8'),
2664 'format': u'NA',
2665 'player_url': None,
2666 })
2667 except UnavailableVideoError, err:
2668 self._downloader.trouble(u'ERROR: unable to download file')
2669
c0a10ca8 2670
9f5f9602
GI
2671class FacebookIE(InfoExtractor):
2672 """Information Extractor for Facebook"""
2673
2674 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2675 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2676 _NETRC_MACHINE = 'facebook'
2677 _available_formats = ['highqual', 'lowqual']
2678 _video_extensions = {
2679 'highqual': 'mp4',
2680 'lowqual': 'mp4',
2681 }
f3098c4d 2682 IE_NAME = u'facebook'
9f5f9602
GI
2683
2684 def __init__(self, downloader=None):
2685 InfoExtractor.__init__(self, downloader)
2686
9f5f9602
GI
2687 def _reporter(self, message):
2688 """Add header and report message."""
2689 self._downloader.to_screen(u'[facebook] %s' % message)
2690
2691 def report_login(self):
2692 """Report attempt to log in."""
2693 self._reporter(u'Logging in')
2694
2695 def report_video_webpage_download(self, video_id):
2696 """Report attempt to download video webpage."""
2697 self._reporter(u'%s: Downloading video webpage' % video_id)
2698
2699 def report_information_extraction(self, video_id):
2700 """Report attempt to extract video information."""
2701 self._reporter(u'%s: Extracting video information' % video_id)
2702
2703 def _parse_page(self, video_webpage):
2704 """Extract video information from page"""
2705 # General data
2706 data = {'title': r'class="video_title datawrap">(.*?)</',
2707 'description': r'<div class="datawrap">(.*?)</div>',
2708 'owner': r'\("video_owner_name", "(.*?)"\)',
2709 'upload_date': r'data-date="(.*?)"',
2710 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2711 }
2712 video_info = {}
2713 for piece in data.keys():
2714 mobj = re.search(data[piece], video_webpage)
2715 if mobj is not None:
2716 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2717
2718 # Video urls
2719 video_urls = {}
2720 for fmt in self._available_formats:
2721 mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2722 if mobj is not None:
2723 # URL is in a Javascript segment inside an escaped Unicode format within
2724 # the generally utf-8 page
2725 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2726 video_info['video_urls'] = video_urls
2727
2728 return video_info
2729
2730 def _real_initialize(self):
2731 if self._downloader is None:
2732 return
2733
2734 useremail = None
2735 password = None
2736 downloader_params = self._downloader.params
2737
2738 # Attempt to use provided username and password or .netrc data
2739 if downloader_params.get('username', None) is not None:
2740 useremail = downloader_params['username']
2741 password = downloader_params['password']
2742 elif downloader_params.get('usenetrc', False):
2743 try:
2744 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2745 if info is not None:
2746 useremail = info[0]
2747 password = info[2]
2748 else:
2749 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2750 except (IOError, netrc.NetrcParseError), err:
2751 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2752 return
2753
2754 if useremail is None:
2755 return
2756
2757 # Log in
2758 login_form = {
2759 'email': useremail,
2760 'pass': password,
2761 'login': 'Log+In'
2762 }
2763 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2764 try:
2765 self.report_login()
2766 login_results = urllib2.urlopen(request).read()
2767 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2768 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2769 return
2770 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2771 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2772 return
2773
2774 def _real_extract(self, url):
2775 mobj = re.match(self._VALID_URL, url)
2776 if mobj is None:
2777 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2778 return
2779 video_id = mobj.group('ID')
2780
2781 # Get video webpage
2782 self.report_video_webpage_download(video_id)
2783 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2784 try:
2785 page = urllib2.urlopen(request)
2786 video_webpage = page.read()
2787 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2788 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2789 return
2790
2791 # Start extracting information
2792 self.report_information_extraction(video_id)
2793
2794 # Extract information
2795 video_info = self._parse_page(video_webpage)
2796
2797 # uploader
2798 if 'owner' not in video_info:
2799 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2800 return
2801 video_uploader = video_info['owner']
2802
2803 # title
2804 if 'title' not in video_info:
2805 self._downloader.trouble(u'ERROR: unable to extract video title')
2806 return
2807 video_title = video_info['title']
2808 video_title = video_title.decode('utf-8')
2809 video_title = sanitize_title(video_title)
2810
2811 # simplified title
2812 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2813 simple_title = simple_title.strip(ur'_')
2814
2815 # thumbnail image
2816 if 'thumbnail' not in video_info:
2817 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2818 video_thumbnail = ''
2819 else:
2820 video_thumbnail = video_info['thumbnail']
2821
2822 # upload date
2823 upload_date = u'NA'
2824 if 'upload_date' in video_info:
2825 upload_time = video_info['upload_date']
2826 timetuple = email.utils.parsedate_tz(upload_time)
2827 if timetuple is not None:
2828 try:
2829 upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2830 except:
2831 pass
2832
2833 # description
8b95c387 2834 video_description = video_info.get('description', 'No description available.')
9f5f9602
GI
2835
2836 url_map = video_info['video_urls']
2837 if len(url_map.keys()) > 0:
2838 # Decide which formats to download
2839 req_format = self._downloader.params.get('format', None)
2840 format_limit = self._downloader.params.get('format_limit', None)
2841
2842 if format_limit is not None and format_limit in self._available_formats:
2843 format_list = self._available_formats[self._available_formats.index(format_limit):]
2844 else:
2845 format_list = self._available_formats
2846 existing_formats = [x for x in format_list if x in url_map]
2847 if len(existing_formats) == 0:
2848 self._downloader.trouble(u'ERROR: no known formats available for video')
2849 return
2850 if req_format is None:
2851 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
c52b01f3
K
2852 elif req_format == 'worst':
2853 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
9f5f9602
GI
2854 elif req_format == '-1':
2855 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2856 else:
2857 # Specific format
2858 if req_format not in url_map:
2859 self._downloader.trouble(u'ERROR: requested format not available')
2860 return
2861 video_url_list = [(req_format, url_map[req_format])] # Specific format
2862
2863 for format_param, video_real_url in video_url_list:
2864
2865 # At this point we have a new video
2866 self._downloader.increment_downloads()
2867
2868 # Extension
2869 video_extension = self._video_extensions.get(format_param, 'mp4')
2870
9f5f9602
GI
2871 try:
2872 # Process video information
2873 self._downloader.process_info({
2874 'id': video_id.decode('utf-8'),
2875 'url': video_real_url.decode('utf-8'),
2876 'uploader': video_uploader.decode('utf-8'),
2877 'upload_date': upload_date,
2878 'title': video_title,
2879 'stitle': simple_title,
2880 'ext': video_extension.decode('utf-8'),
2881 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2882 'thumbnail': video_thumbnail.decode('utf-8'),
2883 'description': video_description.decode('utf-8'),
2884 'player_url': None,
2885 })
2886 except UnavailableVideoError, err:
2887 self._downloader.trouble(u'\nERROR: unable to download video')
2888
7745f5d8
PH
2889class BlipTVIE(InfoExtractor):
2890 """Information extractor for blip.tv"""
2891
1cab2c6d 2892 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
7745f5d8 2893 _URL_EXT = r'^.*\.([a-z0-9]+)$'
f3098c4d 2894 IE_NAME = u'blip.tv'
7745f5d8 2895
7745f5d8
PH
2896 def report_extraction(self, file_id):
2897 """Report information extraction."""
aded78d9 2898 self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
7745f5d8
PH
2899
2900 def _simplify_title(self, title):
2901 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
2902 res = res.strip(ur'_')
2903 return res
2904
2905 def _real_extract(self, url):
2906 mobj = re.match(self._VALID_URL, url)
2907 if mobj is None:
2908 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2909 return
2910
1293ce58
PH
2911 if '?' in url:
2912 cchar = '&'
2913 else:
2914 cchar = '?'
2915 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
7745f5d8 2916 request = urllib2.Request(json_url)
aded78d9 2917 self.report_extraction(mobj.group(1))
7745f5d8
PH
2918 try:
2919 json_code = urllib2.urlopen(request).read()
2920 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2921 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2922 return
2923 try:
2924 json_data = json.loads(json_code)
1293ce58
PH
2925 if 'Post' in json_data:
2926 data = json_data['Post']
2927 else:
2928 data = json_data
7745f5d8
PH
2929
2930 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2931 video_url = data['media']['url']
2932 umobj = re.match(self._URL_EXT, video_url)
2933 if umobj is None:
2934 raise ValueError('Can not determine filename extension')
2935 ext = umobj.group(1)
2936
a1cab7ce
PH
2937 self._downloader.increment_downloads()
2938
7745f5d8
PH
2939 info = {
2940 'id': data['item_id'],
2941 'url': video_url,
2942 'uploader': data['display_name'],
2943 'upload_date': upload_date,
2944 'title': data['title'],
2945 'stitle': self._simplify_title(data['title']),
2946 'ext': ext,
2947 'format': data['media']['mimeType'],
2948 'thumbnail': data['thumbnailUrl'],
2949 'description': data['description'],
2950 'player_url': data['embedUrl']
2951 }
2952 except (ValueError,KeyError), err:
aded78d9 2953 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
7745f5d8
PH
2954 return
2955
2956 try:
2957 self._downloader.process_info(info)
2958 except UnavailableVideoError, err:
2959 self._downloader.trouble(u'\nERROR: unable to download video')
2960
2961
9b0a8bc1
PH
2962class MyVideoIE(InfoExtractor):
2963 """Information Extractor for myvideo.de."""
2964
2965 _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
f3098c4d 2966 IE_NAME = u'myvideo'
9b0a8bc1
PH
2967
2968 def __init__(self, downloader=None):
2969 InfoExtractor.__init__(self, downloader)
2970
9b0a8bc1
PH
2971 def report_download_webpage(self, video_id):
2972 """Report webpage download."""
2973 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
2974
2975 def report_extraction(self, video_id):
2976 """Report information extraction."""
2977 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
2978
2979 def _real_initialize(self):
2980 return
2981
2982 def _real_extract(self,url):
2983 mobj = re.match(self._VALID_URL, url)
2984 if mobj is None:
2985 self._download.trouble(u'ERROR: invalid URL: %s' % url)
2986 return
2987
2988 video_id = mobj.group(1)
2989 simple_title = mobj.group(2).decode('utf-8')
2990 # should actually not be necessary
2991 simple_title = sanitize_title(simple_title)
2992 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
2993
2994 # Get video webpage
2995 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
2996 try:
2997 self.report_download_webpage(video_id)
2998 webpage = urllib2.urlopen(request).read()
2999 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3000 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
3001 return
3002
3003 self.report_extraction(video_id)
3004 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3005 webpage)
3006 if mobj is None:
3007 self._downloader.trouble(u'ERROR: unable to extract media URL')
3008 return
3009 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3010
3011 mobj = re.search('<title>([^<]+)</title>', webpage)
3012 if mobj is None:
3013 self._downloader.trouble(u'ERROR: unable to extract title')
3014 return
3015
3016 video_title = mobj.group(1)
3017 video_title = sanitize_title(video_title)
3018
3019 try:
3020 print(video_url)
3021 self._downloader.process_info({
3022 'id': video_id,
3023 'url': video_url,
3024 'uploader': u'NA',
3025 'upload_date': u'NA',
3026 'title': video_title,
3027 'stitle': simple_title,
3028 'ext': u'flv',
3029 'format': u'NA',
3030 'player_url': None,
3031 })
3032 except UnavailableVideoError:
3033 self._downloader.trouble(u'\nERROR: Unable to download video')
3034
c8e30044 3035class ComedyCentralIE(InfoExtractor):
f166bccc 3036 """Information extractor for The Daily Show and Colbert Report """
c8e30044 3037
f3098c4d
PH
3038 _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3039 IE_NAME = u'comedycentral'
c8e30044 3040
c8e30044
PH
3041 def report_extraction(self, episode_id):
3042 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3043
3044 def report_config_download(self, episode_id):
3045 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3046
b487ef08
PH
3047 def report_index_download(self, episode_id):
3048 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
3049
fedf9f39
PH
3050 def report_player_url(self, episode_id):
3051 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3052
c8e30044
PH
3053 def _simplify_title(self, title):
3054 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3055 res = res.strip(ur'_')
3056 return res
3057
3058 def _real_extract(self, url):
3059 mobj = re.match(self._VALID_URL, url)
3060 if mobj is None:
3061 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3062 return
f166bccc
PH
3063
3064 if mobj.group('shortname'):
3065 if mobj.group('shortname') in ('tds', 'thedailyshow'):
3066 url = 'http://www.thedailyshow.com/full-episodes/'
3067 else:
3068 url = 'http://www.colbertnation.com/full-episodes/'
3069 mobj = re.match(self._VALID_URL, url)
3070 assert mobj is not None
3071
3072 dlNewest = not mobj.group('episode')
3073 if dlNewest:
3074 epTitle = mobj.group('showname')
3075 else:
3076 epTitle = mobj.group('episode')
c8e30044
PH
3077
3078 req = urllib2.Request(url)
3079 self.report_extraction(epTitle)
3080 try:
f166bccc
PH
3081 htmlHandle = urllib2.urlopen(req)
3082 html = htmlHandle.read()
c8e30044
PH
3083 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3084 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3085 return
f166bccc
PH
3086 if dlNewest:
3087 url = htmlHandle.geturl()
3088 mobj = re.match(self._VALID_URL, url)
3089 if mobj is None:
3090 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3091 return
3092 if mobj.group('episode') == '':
3093 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3094 return
3095 epTitle = mobj.group('episode')
c8e30044 3096
b487ef08 3097 mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html)
c8e30044
PH
3098 if len(mMovieParams) == 0:
3099 self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3100 return
b487ef08
PH
3101
3102 playerUrl_raw = mMovieParams[0][0]
fedf9f39
PH
3103 self.report_player_url(epTitle)
3104 try:
b487ef08
PH
3105 urlHandle = urllib2.urlopen(playerUrl_raw)
3106 playerUrl = urlHandle.geturl()
3107 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3108 self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
3109 return
3110
3111 uri = mMovieParams[0][1]
3112 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
3113 self.report_index_download(epTitle)
3114 try:
3115 indexXml = urllib2.urlopen(indexUrl).read()
fedf9f39 3116 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
b487ef08 3117 self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
fedf9f39 3118 return
fedf9f39 3119
b487ef08
PH
3120 idoc = xml.etree.ElementTree.fromstring(indexXml)
3121 itemEls = idoc.findall('.//item')
3122 for itemEl in itemEls:
3123 mediaId = itemEl.findall('./guid')[0].text
3124 shortMediaId = mediaId.split(':')[-1]
3125 showId = mediaId.split(':')[-2].replace('.com', '')
3126 officialTitle = itemEl.findall('./title')[0].text
3127 officialDate = itemEl.findall('./pubDate')[0].text
3128
c8e30044
PH
3129 configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3130 urllib.urlencode({'uri': mediaId}))
3131 configReq = urllib2.Request(configUrl)
3132 self.report_config_download(epTitle)
3133 try:
3134 configXml = urllib2.urlopen(configReq).read()
3135 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3136 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3137 return
46c8c432 3138
c8e30044
PH
3139 cdoc = xml.etree.ElementTree.fromstring(configXml)
3140 turls = []
3141 for rendition in cdoc.findall('.//rendition'):
3142 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3143 turls.append(finfo)
3144
a88bc6bb 3145 if len(turls) == 0:
b487ef08 3146 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
a88bc6bb
PH
3147 continue
3148
c8e30044
PH
3149 # For now, just pick the highest bitrate
3150 format,video_url = turls[-1]
3151
3152 self._downloader.increment_downloads()
a88bc6bb 3153
b487ef08 3154 effTitle = showId + '-' + epTitle
c8e30044 3155 info = {
b487ef08 3156 'id': shortMediaId,
c8e30044 3157 'url': video_url,
b487ef08
PH
3158 'uploader': showId,
3159 'upload_date': officialDate,
a88bc6bb
PH
3160 'title': effTitle,
3161 'stitle': self._simplify_title(effTitle),
c8e30044
PH
3162 'ext': 'mp4',
3163 'format': format,
3164 'thumbnail': None,
b487ef08
PH
3165 'description': officialTitle,
3166 'player_url': playerUrl
c8e30044 3167 }
46c8c432 3168
c8e30044
PH
3169 try:
3170 self._downloader.process_info(info)
3171 except UnavailableVideoError, err:
b487ef08 3172 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
a88bc6bb 3173 continue
c8e30044
PH
3174
3175
f9c68787
PH
3176class EscapistIE(InfoExtractor):
3177 """Information extractor for The Escapist """
3178
2d8acd80 3179 _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
f3098c4d 3180 IE_NAME = u'escapist'
f9c68787 3181
f9c68787
PH
3182 def report_extraction(self, showName):
3183 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
3184
3185 def report_config_download(self, showName):
3186 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
3187
3188 def _simplify_title(self, title):
3189 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3190 res = res.strip(ur'_')
3191 return res
3192
3193 def _real_extract(self, url):
3194 htmlParser = HTMLParser.HTMLParser()
3195
3196 mobj = re.match(self._VALID_URL, url)
3197 if mobj is None:
3198 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3199 return
3200 showName = mobj.group('showname')
3201 videoId = mobj.group('episode')
3202
3203 self.report_extraction(showName)
3204 try:
3205 webPage = urllib2.urlopen(url).read()
3206 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3207 self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
3208 return
3209
3210 descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
3211 description = htmlParser.unescape(descMatch.group(1))
3212 imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
3213 imgUrl = htmlParser.unescape(imgMatch.group(1))
3214 playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
3215 playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
3216 configUrlMatch = re.search('config=(.*)$', playerUrl)
3217 configUrl = urllib2.unquote(configUrlMatch.group(1))
3218
3219 self.report_config_download(showName)
3220 try:
3221 configJSON = urllib2.urlopen(configUrl).read()
3222 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3223 self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
3224 return
3225
3226 # Technically, it's JavaScript, not JSON
3227 configJSON = configJSON.replace("'", '"')
3228
3229 try:
3230 config = json.loads(configJSON)
3231 except (ValueError,), err:
3232 self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
3233 return
3234
3235 playlist = config['playlist']
3236 videoUrl = playlist[1]['url']
3237
3238 self._downloader.increment_downloads()
3239 info = {
3240 'id': videoId,
3241 'url': videoUrl,
3242 'uploader': showName,
3243 'upload_date': None,
3244 'title': showName,
3245 'stitle': self._simplify_title(showName),
3246 'ext': 'flv',
3247 'format': 'flv',
3248 'thumbnail': imgUrl,
3249 'description': description,
3250 'player_url': playerUrl,
3251 }
3252
3253 try:
3254 self._downloader.process_info(info)
3255 except UnavailableVideoError, err:
3256 self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
3257
3258
3259
65cd34c5
RG
3260class PostProcessor(object):
3261 """Post Processor class.
3262
3263 PostProcessor objects can be added to downloaders with their
3264 add_post_processor() method. When the downloader has finished a
3265 successful download, it will take its internal chain of PostProcessors
3266 and start calling the run() method on each one of them, first with
3267 an initial argument and then with the returned value of the previous
3268 PostProcessor.
3269
3270 The chain will be stopped if one of them ever returns None or the end
3271 of the chain is reached.
3272
3273 PostProcessor objects follow a "mutual registration" process similar
3274 to InfoExtractor objects.
3275 """
3276
3277 _downloader = None
3278
3279 def __init__(self, downloader=None):
3280 self._downloader = downloader
3281
65cd34c5
RG
3282 def set_downloader(self, downloader):
3283 """Sets the downloader for this PP."""
3284 self._downloader = downloader
d3975459 3285
65cd34c5
RG
3286 def run(self, information):
3287 """Run the PostProcessor.
3288
3289 The "information" argument is a dictionary like the ones
2f11508a 3290 composed by InfoExtractors. The only difference is that this
65cd34c5
RG
3291 one has an extra field called "filepath" that points to the
3292 downloaded file.
3293
3294 When this method returns None, the postprocessing chain is
3295 stopped. However, this method may return an information
3296 dictionary that will be passed to the next postprocessing
3297 object in the chain. It can be the one it received after
3298 changing some fields.
3299
3300 In addition, this method may raise a PostProcessingError
3301 exception that will be taken into account by the downloader
3302 it was called from.
3303 """
3304 return information # by default, do nothing
d3975459 3305
c0a10ca8 3306
3072fab1
RG
3307class FFmpegExtractAudioPP(PostProcessor):
3308
c99dcbd2 3309 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
3072fab1
RG
3310 PostProcessor.__init__(self, downloader)
3311 if preferredcodec is None:
3312 preferredcodec = 'best'
3313 self._preferredcodec = preferredcodec
18b7f874 3314 self._preferredquality = preferredquality
3315 self._keepvideo = keepvideo
3072fab1
RG
3316
3317 @staticmethod
3318 def get_audio_codec(path):
da273188 3319 try:
2727dbf7
RG
3320 cmd = ['ffprobe', '-show_streams', '--', path]
3321 handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
da273188
RG
3322 output = handle.communicate()[0]
3323 if handle.wait() != 0:
3324 return None
3325 except (IOError, OSError):
3072fab1
RG
3326 return None
3327 audio_codec = None
3328 for line in output.split('\n'):
3329 if line.startswith('codec_name='):
3330 audio_codec = line.split('=')[1].strip()
3331 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
3332 return audio_codec
3333 return None
3334
3335 @staticmethod
3336 def run_ffmpeg(path, out_path, codec, more_opts):
3337 try:
2727dbf7
RG
3338 cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
3339 ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
3072fab1
RG
3340 return (ret == 0)
3341 except (IOError, OSError):
3342 return False
3343
3344 def run(self, information):
3345 path = information['filepath']
3346
3347 filecodec = self.get_audio_codec(path)
3348 if filecodec is None:
da273188 3349 self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
3072fab1
RG
3350 return None
3351
3352 more_opts = []
3353 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
efb113c7 3354 if filecodec in ['aac', 'mp3', 'vorbis']:
3072fab1
RG
3355 # Lossless if possible
3356 acodec = 'copy'
3357 extension = filecodec
3358 if filecodec == 'aac':
3359 more_opts = ['-f', 'adts']
58384838
RC
3360 if filecodec == 'vorbis':
3361 extension = 'ogg'
3072fab1
RG
3362 else:
3363 # MP3 otherwise.
3364 acodec = 'libmp3lame'
3365 extension = 'mp3'
c99dcbd2
PH
3366 more_opts = []
3367 if self._preferredquality is not None:
3368 more_opts += ['-ab', self._preferredquality]
3072fab1
RG
3369 else:
3370 # We convert the audio (lossy)
58384838 3371 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self._preferredcodec]
3072fab1 3372 extension = self._preferredcodec
c99dcbd2
PH
3373 more_opts = []
3374 if self._preferredquality is not None:
3375 more_opts += ['-ab', self._preferredquality]
3072fab1
RG
3376 if self._preferredcodec == 'aac':
3377 more_opts += ['-f', 'adts']
58384838
RC
3378 if self._preferredcodec == 'vorbis':
3379 extension = 'ogg'
3072fab1
RG
3380
3381 (prefix, ext) = os.path.splitext(path)
3382 new_path = prefix + '.' + extension
3383 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
3384 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
3385
3386 if not status:
1bd92582 3387 self._downloader.to_stderr(u'WARNING: error running ffmpeg')
3072fab1
RG
3388 return None
3389
36597dc4
K
3390 # Try to update the date time for extracted audio file.
3391 if information.get('filetime') is not None:
3392 try:
3393 os.utime(new_path, (time.time(), information['filetime']))
3394 except:
3395 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
3396
18b7f874 3397 if not self._keepvideo:
3398 try:
3399 os.remove(path)
3400 except (IOError, OSError):
3401 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
3402 return None
3072fab1
RG
3403
3404 information['filepath'] = new_path
3405 return information
3406
5fb3df4a
GV
3407
3408def updateSelf(downloader, filename):
3409 ''' Update the program file with the latest version from the repository '''
3410 # Note: downloader only used for options
3411 if not os.access(filename, os.W_OK):
3412 sys.exit('ERROR: no write permissions on %s' % filename)
3413
d207e7cf 3414 downloader.to_screen('Updating to latest version...')
5fb3df4a 3415
4fa74b52 3416 try:
d207e7cf
PH
3417 try:
3418 urlh = urllib.urlopen(UPDATE_URL)
3419 newcontent = urlh.read()
27365956
PH
3420
3421 vmatch = re.search("__version__ = '([^']+)'", newcontent)
3422 if vmatch is not None and vmatch.group(1) == __version__:
3423 downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')')
3424 return
d207e7cf
PH
3425 finally:
3426 urlh.close()
5fb3df4a
GV
3427 except (IOError, OSError), err:
3428 sys.exit('ERROR: unable to download latest version')
f9f1e798 3429
5fb3df4a 3430 try:
d207e7cf
PH
3431 outf = open(filename, 'wb')
3432 try:
3433 outf.write(newcontent)
3434 finally:
3435 outf.close()
5fb3df4a
GV
3436 except (IOError, OSError), err:
3437 sys.exit('ERROR: unable to overwrite current version')
4bec29ef 3438
eb6c37da 3439 downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
80066952 3440
4f9f96f6
GV
3441def parseOpts():
3442 # Deferred imports
3443 import getpass
3444 import optparse
e7cf18cb 3445
4f9f96f6
GV
3446 def _format_option_string(option):
3447 ''' ('-o', '--option') -> -o, --format METAVAR'''
80066952 3448
4f9f96f6
GV
3449 opts = []
3450
3451 if option._short_opts: opts.append(option._short_opts[0])
3452 if option._long_opts: opts.append(option._long_opts[0])
3453 if len(opts) > 1: opts.insert(1, ', ')
3454
3455 if option.takes_value(): opts.append(' %s' % option.metavar)
3456
3457 return "".join(opts)
3458
6a4f0a11
GV
3459 def _find_term_columns():
3460 columns = os.environ.get('COLUMNS', None)
2c8d32de
PH
3461 if columns:
3462 return int(columns)
3463
4f2a5e06
PH
3464 try:
3465 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3466 out,err = sp.communicate()
eb0387a8 3467 return int(out.split()[1])
4f2a5e06
PH
3468 except:
3469 pass
2c8d32de 3470 return None
6a4f0a11 3471
51c8e53f
GV
3472 max_width = 80
3473 max_help_position = 80
3474
3475 # No need to wrap help messages if we're on a wide console
6a4f0a11 3476 columns = _find_term_columns()
51c8e53f
GV
3477 if columns: max_width = columns
3478
3479 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
4f9f96f6
GV
3480 fmt.format_option_strings = _format_option_string
3481
3482 kw = {
3483 'version' : __version__,
3484 'formatter' : fmt,
a2f7e3a5 3485 'usage' : '%prog [options] url [url...]',
4f9f96f6
GV
3486 'conflict_handler' : 'resolve',
3487 }
3488
3489 parser = optparse.OptionParser(**kw)
3490
3491 # option groups
3492 general = optparse.OptionGroup(parser, 'General Options')
20e91e83 3493 selection = optparse.OptionGroup(parser, 'Video Selection')
4f9f96f6
GV
3494 authentication = optparse.OptionGroup(parser, 'Authentication Options')
3495 video_format = optparse.OptionGroup(parser, 'Video Format Options')
3496 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
3497 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
3498 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
3499
3500 general.add_option('-h', '--help',
3501 action='help', help='print this help text and exit')
3502 general.add_option('-v', '--version',
3503 action='version', help='print program version and exit')
3504 general.add_option('-U', '--update',
e0e56865 3505 action='store_true', dest='update_self', help='update this program to latest version')
4f9f96f6
GV
3506 general.add_option('-i', '--ignore-errors',
3507 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
3508 general.add_option('-r', '--rate-limit',
3509 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
3510 general.add_option('-R', '--retries',
3511 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
4f9f96f6
GV
3512 general.add_option('--dump-user-agent',
3513 action='store_true', dest='dump_user_agent',
3514 help='display the current browser identification', default=False)
f3098c4d
PH
3515 general.add_option('--list-extractors',
3516 action='store_true', dest='list_extractors',
3517 help='List all supported extractors and the URLs they would handle', default=False)
4f9f96f6 3518
20e91e83
ABP
3519 selection.add_option('--playlist-start',
3520 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
3521 selection.add_option('--playlist-end',
3522 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
3523 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
3524 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
3525
4f9f96f6
GV
3526 authentication.add_option('-u', '--username',
3527 dest='username', metavar='USERNAME', help='account username')
3528 authentication.add_option('-p', '--password',
3529 dest='password', metavar='PASSWORD', help='account password')
3530 authentication.add_option('-n', '--netrc',
3531 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
3532
3533
3534 video_format.add_option('-f', '--format',
3535 action='store', dest='format', metavar='FORMAT', help='video format code')
3536 video_format.add_option('--all-formats',
5260e68f 3537 action='store_const', dest='format', help='download all available video formats', const='all')
4f9f96f6
GV
3538 video_format.add_option('--max-quality',
3539 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
3540
3541
3542 verbosity.add_option('-q', '--quiet',
3543 action='store_true', dest='quiet', help='activates quiet mode', default=False)
3544 verbosity.add_option('-s', '--simulate',
9b4556c4
PH
3545 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
3546 verbosity.add_option('--skip-download',
3547 action='store_true', dest='skip_download', help='do not download the video', default=False)
4f9f96f6
GV
3548 verbosity.add_option('-g', '--get-url',
3549 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
3550 verbosity.add_option('-e', '--get-title',
3551 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
3552 verbosity.add_option('--get-thumbnail',
3553 action='store_true', dest='getthumbnail',
3554 help='simulate, quiet but print thumbnail URL', default=False)
3555 verbosity.add_option('--get-description',
3556 action='store_true', dest='getdescription',
3557 help='simulate, quiet but print video description', default=False)
3558 verbosity.add_option('--get-filename',
3559 action='store_true', dest='getfilename',
3560 help='simulate, quiet but print output filename', default=False)
da0db53a
DH
3561 verbosity.add_option('--get-format',
3562 action='store_true', dest='getformat',
3563 help='simulate, quiet but print output format', default=False)
4f9f96f6
GV
3564 verbosity.add_option('--no-progress',
3565 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
3566 verbosity.add_option('--console-title',
3567 action='store_true', dest='consoletitle',
3568 help='display progress in console titlebar', default=False)
3569
3570
3571 filesystem.add_option('-t', '--title',
3572 action='store_true', dest='usetitle', help='use title in file name', default=False)
3573 filesystem.add_option('-l', '--literal',
3574 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
3575 filesystem.add_option('-A', '--auto-number',
3576 action='store_true', dest='autonumber',
3577 help='number downloaded files starting from 00000', default=False)
3578 filesystem.add_option('-o', '--output',
3579 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
3580 filesystem.add_option('-a', '--batch-file',
3581 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
3582 filesystem.add_option('-w', '--no-overwrites',
3583 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
3584 filesystem.add_option('-c', '--continue',
c25303c3 3585 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
18bb3d1e
PH
3586 filesystem.add_option('--no-continue',
3587 action='store_false', dest='continue_dl',
3588 help='do not resume partially downloaded files (restart from beginning)')
4f9f96f6 3589 filesystem.add_option('--cookies',
abb870d1 3590 dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
4f9f96f6
GV
3591 filesystem.add_option('--no-part',
3592 action='store_true', dest='nopart', help='do not use .part files', default=False)
3593 filesystem.add_option('--no-mtime',
3594 action='store_false', dest='updatetime',
3595 help='do not use the Last-modified header to set the file modification time', default=True)
2c8d32de
PH
3596 filesystem.add_option('--write-description',
3597 action='store_true', dest='writedescription',
3598 help='write video description to a .description file', default=False)
3599 filesystem.add_option('--write-info-json',
3600 action='store_true', dest='writeinfojson',
3601 help='write video metadata to a .info.json file', default=False)
4f9f96f6
GV
3602
3603
3604 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
3605 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
3606 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
58384838 3607 help='"best", "aac", "vorbis" or "mp3"; best by default')
c99dcbd2
PH
3608 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
3609 help='ffmpeg audio bitrate specification, 128k by default')
3610 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
3611 help='keeps the video file on disk after the post-processing; the video is erased by default')
4f9f96f6
GV
3612
3613
3614 parser.add_option_group(general)
20e91e83 3615 parser.add_option_group(selection)
4f9f96f6
GV
3616 parser.add_option_group(filesystem)
3617 parser.add_option_group(verbosity)
3618 parser.add_option_group(video_format)
3619 parser.add_option_group(authentication)
3620 parser.add_option_group(postproc)
3621
3622 opts, args = parser.parse_args()
3623
3624 return parser, opts, args
3625
f3098c4d
PH
3626def gen_extractors():
3627 """ Return a list of an instance of every supported extractor.
3628 The order does matter; the first extractor matched is the one handling the URL.
3629 """
3630 youtube_ie = YoutubeIE()
3631 google_ie = GoogleIE()
3632 yahoo_ie = YahooIE()
3633 return [
f3098c4d
PH
3634 YoutubePlaylistIE(youtube_ie),
3635 YoutubeUserIE(youtube_ie),
3636 YoutubeSearchIE(youtube_ie),
1cde6f1d
PH
3637 youtube_ie,
3638 MetacafeIE(youtube_ie),
3639 DailymotionIE(),
f3098c4d
PH
3640 google_ie,
3641 GoogleSearchIE(google_ie),
3642 PhotobucketIE(),
3643 yahoo_ie,
3644 YahooSearchIE(yahoo_ie),
3645 DepositFilesIE(),
3646 FacebookIE(),
3647 BlipTVIE(),
3648 VimeoIE(),
3649 MyVideoIE(),
3650 ComedyCentralIE(),
3651 EscapistIE(),
3652
3653 GenericIE()
3654 ]
3655
5adcaa43
GV
3656def main():
3657 parser, opts, args = parseOpts()
4f9f96f6 3658
5adcaa43
GV
3659 # Open appropriate CookieJar
3660 if opts.cookiefile is None:
3661 jar = cookielib.CookieJar()
3662 else:
8cc44341 3663 try:
5adcaa43
GV
3664 jar = cookielib.MozillaCookieJar(opts.cookiefile)
3665 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
3666 jar.load()
3667 except (IOError, OSError), err:
3668 sys.exit(u'ERROR: unable to open cookie file')
80066952 3669
5adcaa43
GV
3670 # Dump user agent
3671 if opts.dump_user_agent:
3672 print std_headers['User-Agent']
3673 sys.exit(0)
e7cf18cb 3674
5adcaa43
GV
3675 # Batch file verification
3676 batchurls = []
3677 if opts.batchfile is not None:
8cc44341 3678 try:
5adcaa43
GV
3679 if opts.batchfile == '-':
3680 batchfd = sys.stdin
4bec29ef 3681 else:
5adcaa43
GV
3682 batchfd = open(opts.batchfile, 'r')
3683 batchurls = batchfd.readlines()
3684 batchurls = [x.strip() for x in batchurls]
3685 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
3686 except IOError:
3687 sys.exit(u'ERROR: batch file could not be read')
3688 all_urls = batchurls + args
3689
f3098c4d
PH
3690 # General configuration
3691 cookie_processor = urllib2.HTTPCookieProcessor(jar)
3692 opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
3693 urllib2.install_opener(opener)
3694 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
3695
3696 extractors = gen_extractors()
3697
3698 if opts.list_extractors:
3699 for ie in extractors:
3700 print(ie.IE_NAME)
3701 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
3702 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
3703 for mu in matchedUrls:
3704 print(u' ' + mu)
3705 sys.exit(0)
3706
5adcaa43
GV
3707 # Conflicting, missing and erroneous options
3708 if opts.usenetrc and (opts.username is not None or opts.password is not None):
3709 parser.error(u'using .netrc conflicts with giving username/password')
3710 if opts.password is not None and opts.username is None:
3711 parser.error(u'account username missing')
3712 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
3713 parser.error(u'using output template conflicts with using title, literal title or auto number')
3714 if opts.usetitle and opts.useliteral:
3715 parser.error(u'using title conflicts with using literal title')
3716 if opts.username is not None and opts.password is None:
3717 opts.password = getpass.getpass(u'Type account password and press return:')
3718 if opts.ratelimit is not None:
3719 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
3720 if numeric_limit is None:
3721 parser.error(u'invalid rate limit specified')
3722 opts.ratelimit = numeric_limit
3723 if opts.retries is not None:
8cc44341 3724 try:
5adcaa43 3725 opts.retries = long(opts.retries)
8cc44341 3726 except (TypeError, ValueError), err:
5adcaa43
GV
3727 parser.error(u'invalid retry count specified')
3728 try:
2c8d32de 3729 opts.playliststart = int(opts.playliststart)
5adcaa43 3730 if opts.playliststart <= 0:
2c8d32de 3731 raise ValueError(u'Playlist start must be positive')
5adcaa43
GV
3732 except (TypeError, ValueError), err:
3733 parser.error(u'invalid playlist start number specified')
3734 try:
2c8d32de 3735 opts.playlistend = int(opts.playlistend)
5adcaa43 3736 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2c8d32de 3737 raise ValueError(u'Playlist end must be greater than playlist start')
5adcaa43
GV
3738 except (TypeError, ValueError), err:
3739 parser.error(u'invalid playlist end number specified')
3740 if opts.extractaudio:
58384838 3741 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis']:
5adcaa43
GV
3742 parser.error(u'invalid audio format specified')
3743
5adcaa43
GV
3744 # File downloader
3745 fd = FileDownloader({
3746 'usenetrc': opts.usenetrc,
3747 'username': opts.username,
3748 'password': opts.password,
da0db53a 3749 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
5adcaa43
GV
3750 'forceurl': opts.geturl,
3751 'forcetitle': opts.gettitle,
3752 'forcethumbnail': opts.getthumbnail,
3753 'forcedescription': opts.getdescription,
3754 'forcefilename': opts.getfilename,
da0db53a 3755 'forceformat': opts.getformat,
9b4556c4 3756 'simulate': opts.simulate,
da0db53a 3757 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
5adcaa43
GV
3758 'format': opts.format,
3759 'format_limit': opts.format_limit,
3760 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3761 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3762 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3763 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3764 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3765 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3766 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3767 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3768 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3769 or u'%(id)s.%(ext)s'),
3770 'ignoreerrors': opts.ignoreerrors,
3771 'ratelimit': opts.ratelimit,
3772 'nooverwrites': opts.nooverwrites,
3773 'retries': opts.retries,
3774 'continuedl': opts.continue_dl,
3775 'noprogress': opts.noprogress,
3776 'playliststart': opts.playliststart,
3777 'playlistend': opts.playlistend,
3778 'logtostderr': opts.outtmpl == '-',
3779 'consoletitle': opts.consoletitle,
3780 'nopart': opts.nopart,
3781 'updatetime': opts.updatetime,
2c8d32de
PH
3782 'writedescription': opts.writedescription,
3783 'writeinfojson': opts.writeinfojson,
20e91e83
ABP
3784 'matchtitle': opts.matchtitle,
3785 'rejecttitle': opts.rejecttitle,
5adcaa43 3786 })
8c5dc3ad
PH
3787 for extractor in extractors:
3788 fd.add_info_extractor(extractor)
5adcaa43
GV
3789
3790 # PostProcessors
3791 if opts.extractaudio:
c99dcbd2 3792 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
5adcaa43
GV
3793
3794 # Update version
3795 if opts.update_self:
3796 updateSelf(fd, sys.argv[0])
3797
3798 # Maybe do nothing
3799 if len(all_urls) < 1:
3800 if not opts.update_self:
3801 parser.error(u'you must provide at least one URL')
3802 else:
3803 sys.exit()
3804 retcode = fd.download(all_urls)
80066952 3805
5adcaa43
GV
3806 # Dump cookie jar if requested
3807 if opts.cookiefile is not None:
3808 try:
3809 jar.save()
3810 except (IOError, OSError), err:
3811 sys.exit(u'ERROR: unable to save cookie jar')
80066952 3812
5adcaa43 3813 sys.exit(retcode)
80066952 3814
4fa74b52 3815
5adcaa43
GV
3816if __name__ == '__main__':
3817 try:
3818 main()
e5bf0f55
RG
3819 except DownloadError:
3820 sys.exit(1)
3821 except SameFileError:
76a7f364 3822 sys.exit(u'ERROR: fixed output name but more than one file to download')
4fa74b52 3823 except KeyboardInterrupt:
76a7f364 3824 sys.exit(u'\nERROR: Interrupted by user')
e9cb9c28
GV
3825
3826# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: