]> jfr.im git - yt-dlp.git/blame - youtube-dl
If --continue is not enabled, set resume_len to zero.
[yt-dlp.git] / youtube-dl
CommitLineData
4fa74b52
RG
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
2770590d
GV
3
4__author__ = (
2c8d32de
PH
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
6ae796b1 12 'Rogério Brito',
eb11aacc 13 'Philipp Hagemeister',
6fc5b0bb 14 'Sören Schulze',
2770590d
GV
15 )
16
2c8d32de 17__license__ = 'Public Domain'
ef357c4b 18__version__ = '2011.09.16'
2770590d 19
8236e851 20UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
d207e7cf 21
80066952 22import cookielib
a1f03c7b 23import datetime
1987c232 24import gzip
4fa74b52 25import htmlentitydefs
f9c68787 26import HTMLParser
4fa74b52 27import httplib
2546e767 28import locale
4fa74b52
RG
29import math
30import netrc
31import os
32import os.path
33import re
34import socket
35import string
0487b407 36import subprocess
4fa74b52
RG
37import sys
38import time
39import urllib
40import urllib2
c6b55a8d 41import warnings
1987c232 42import zlib
a04e80a4 43
0a3c8b62
PH
44if os.name == 'nt':
45 import ctypes
46
47try:
48 import email.utils
49except ImportError: # Python 2.4
50 import email.Utils
c6b55a8d
PH
51try:
52 import cStringIO as StringIO
53except ImportError:
54 import StringIO
55
a04e80a4
RG
56# parse_qs was moved from the cgi module to the urlparse module recently.
57try:
58 from urlparse import parse_qs
59except ImportError:
60 from cgi import parse_qs
4fa74b52 61
c6b55a8d
PH
62try:
63 import lxml.etree
2b70537d 64except ImportError:
c6b55a8d
PH
65 pass # Handled below
66
c8e30044
PH
67try:
68 import xml.etree.ElementTree
afb5b55d
PH
69except ImportError: # Python<2.5: Not officially supported, but let it slip
70 warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
c8e30044 71
f995f712 72std_headers = {
c44b9ee9 73 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
4fa74b52 74 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
96942e62 75 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
a57ed21f 76 'Accept-Encoding': 'gzip, deflate',
4fa74b52
RG
77 'Accept-Language': 'en-us,en;q=0.5',
78}
79
80simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
81
437d76c1
PH
82try:
83 import json
91e6a385 84except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
437d76c1
PH
85 import re
86 class json(object):
87 @staticmethod
88 def loads(s):
89 s = s.decode('UTF-8')
90 def raiseError(msg, i):
91 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
92 def skipSpace(i, expectMore=True):
93 while i < len(s) and s[i] in ' \t\r\n':
94 i += 1
95 if expectMore:
96 if i >= len(s):
97 raiseError('Premature end', i)
98 return i
99 def decodeEscape(match):
100 esc = match.group(1)
101 _STATIC = {
102 '"': '"',
103 '\\': '\\',
104 '/': '/',
105 'b': unichr(0x8),
106 'f': unichr(0xc),
107 'n': '\n',
108 'r': '\r',
109 't': '\t',
110 }
111 if esc in _STATIC:
112 return _STATIC[esc]
113 if esc[0] == 'u':
114 if len(esc) == 1+4:
115 return unichr(int(esc[1:5], 16))
116 if len(esc) == 5+6 and esc[5:7] == '\\u':
117 hi = int(esc[1:5], 16)
118 low = int(esc[7:11], 16)
119 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
120 raise ValueError('Unknown escape ' + str(esc))
121 def parseString(i):
122 i += 1
123 e = i
124 while True:
125 e = s.index('"', e)
126 bslashes = 0
127 while s[e-bslashes-1] == '\\':
128 bslashes += 1
129 if bslashes % 2 == 1:
130 e += 1
131 continue
132 break
133 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
134 stri = rexp.sub(decodeEscape, s[i:e])
135 return (e+1,stri)
136 def parseObj(i):
137 i += 1
138 res = {}
139 i = skipSpace(i)
140 if s[i] == '}': # Empty dictionary
141 return (i+1,res)
142 while True:
143 if s[i] != '"':
144 raiseError('Expected a string object key', i)
145 i,key = parseString(i)
146 i = skipSpace(i)
147 if i >= len(s) or s[i] != ':':
148 raiseError('Expected a colon', i)
149 i,val = parse(i+1)
150 res[key] = val
151 i = skipSpace(i)
152 if s[i] == '}':
153 return (i+1, res)
154 if s[i] != ',':
155 raiseError('Expected comma or closing curly brace', i)
156 i = skipSpace(i+1)
157 def parseArray(i):
158 res = []
159 i = skipSpace(i+1)
160 if s[i] == ']': # Empty array
161 return (i+1,res)
162 while True:
163 i,val = parse(i)
164 res.append(val)
165 i = skipSpace(i) # Raise exception if premature end
166 if s[i] == ']':
167 return (i+1, res)
168 if s[i] != ',':
169 raiseError('Expected a comma or closing bracket', i)
170 i = skipSpace(i+1)
171 def parseDiscrete(i):
172 for k,v in {'true': True, 'false': False, 'null': None}.items():
173 if s.startswith(k, i):
174 return (i+len(k), v)
175 raiseError('Not a boolean (or null)', i)
176 def parseNumber(i):
177 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
178 if mobj is None:
179 raiseError('Not a number', i)
180 nums = mobj.group(1)
181 if '.' in nums or 'e' in nums or 'E' in nums:
182 return (i+len(nums), float(nums))
183 return (i+len(nums), int(nums))
184 CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
185 def parse(i):
186 i = skipSpace(i)
187 i,res = CHARMAP.get(s[i], parseNumber)(i)
188 i = skipSpace(i, False)
189 return (i,res)
190 i,res = parse(0)
191 if i < len(s):
192 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
193 return res
194
eae2666c
RG
195def preferredencoding():
196 """Get preferred encoding.
197
198 Returns the best encoding scheme for the system, based on
199 locale.getpreferredencoding() and some further tweaks.
200 """
f94b636c
RG
201 def yield_preferredencoding():
202 try:
203 pref = locale.getpreferredencoding()
204 u'TEST'.encode(pref)
205 except:
206 pref = 'UTF-8'
207 while True:
208 yield pref
209 return yield_preferredencoding().next()
eae2666c 210
c0a10ca8 211
490fd7ae
RG
212def htmlentity_transform(matchobj):
213 """Transforms an HTML entity to a Unicode character.
d3975459 214
490fd7ae
RG
215 This function receives a match object and is intended to be used with
216 the re.sub() function.
217 """
218 entity = matchobj.group(1)
219
220 # Known non-numeric HTML entity
221 if entity in htmlentitydefs.name2codepoint:
222 return unichr(htmlentitydefs.name2codepoint[entity])
223
224 # Unicode character
225 mobj = re.match(ur'(?u)#(x?\d+)', entity)
226 if mobj is not None:
227 numstr = mobj.group(1)
228 if numstr.startswith(u'x'):
229 base = 16
230 numstr = u'0%s' % numstr
231 else:
232 base = 10
233 return unichr(long(numstr, base))
234
235 # Unknown entity in name, return its literal representation
236 return (u'&%s;' % entity)
237
c0a10ca8 238
490fd7ae 239def sanitize_title(utitle):
31bcb480 240 """Sanitizes a video title so it could be used as part of a filename."""
490fd7ae 241 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
490fd7ae
RG
242 return utitle.replace(unicode(os.sep), u'%')
243
c0a10ca8 244
31bcb480
RG
245def sanitize_open(filename, open_mode):
246 """Try to open the given filename, and slightly tweak it if this fails.
247
248 Attempts to open the given filename. If this fails, it tries to change
249 the filename slightly, step by step, until it's either able to open it
250 or it fails and raises a final exception, like the standard open()
251 function.
252
253 It returns the tuple (stream, definitive_file_name).
254 """
255 try:
131bc765 256 if filename == u'-':
e08878f4
RG
257 if sys.platform == 'win32':
258 import msvcrt
259 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
131bc765 260 return (sys.stdout, filename)
31bcb480
RG
261 stream = open(filename, open_mode)
262 return (stream, filename)
263 except (IOError, OSError), err:
264 # In case of error, try to remove win32 forbidden chars
ca6a11fa 265 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
31bcb480
RG
266
267 # An exception here should be caught in the caller
268 stream = open(filename, open_mode)
269 return (stream, filename)
270
c0a10ca8 271
09bd408c 272def timeconvert(timestr):
c0a10ca8
F
273 """Convert RFC 2822 defined time string into system timestamp"""
274 timestamp = None
275 timetuple = email.utils.parsedate_tz(timestr)
276 if timetuple is not None:
277 timestamp = email.utils.mktime_tz(timetuple)
278 return timestamp
279
09bd408c 280
e5bf0f55
RG
281class DownloadError(Exception):
282 """Download Error exception.
d3975459 283
e5bf0f55
RG
284 This exception may be thrown by FileDownloader objects if they are not
285 configured to continue on errors. They will contain the appropriate
286 error message.
287 """
288 pass
289
c0a10ca8 290
e5bf0f55
RG
291class SameFileError(Exception):
292 """Same File exception.
293
294 This exception will be thrown by FileDownloader objects if they detect
295 multiple files would have to be downloaded to the same file on disk.
296 """
297 pass
298
c0a10ca8 299
65cd34c5
RG
300class PostProcessingError(Exception):
301 """Post Processing exception.
302
303 This exception may be raised by PostProcessor's .run() method to
304 indicate an error in the postprocessing task.
305 """
306 pass
307
c0a10ca8 308
73f4e7af 309class UnavailableVideoError(Exception):
7b7759f5 310 """Unavailable Format exception.
311
312 This exception will be thrown when a video is requested
313 in a format that is not available for that video.
314 """
d69a1c91
RG
315 pass
316
c0a10ca8 317
d69a1c91
RG
318class ContentTooShortError(Exception):
319 """Content Too Short exception.
320
321 This exception may be raised by FileDownloader objects when a file they
322 download is too small for what the server announced first, indicating
323 the connection was probably interrupted.
324 """
325 # Both in bytes
326 downloaded = None
327 expected = None
328
329 def __init__(self, downloaded, expected):
330 self.downloaded = downloaded
331 self.expected = expected
7b7759f5 332
c0a10ca8 333
1987c232
RG
334class YoutubeDLHandler(urllib2.HTTPHandler):
335 """Handler for HTTP requests and responses.
336
337 This class, when installed with an OpenerDirector, automatically adds
338 the standard headers to every HTTP request and handles gzipped and
339 deflated responses from web servers. If compression is to be avoided in
340 a particular request, the original request in the program code only has
341 to include the HTTP header "Youtubedl-No-Compression", which will be
342 removed before making the real request.
c0a10ca8 343
1987c232
RG
344 Part of this code was copied from:
345
c0a10ca8
F
346 http://techknack.net/python-urllib2-handlers/
347
1987c232
RG
348 Andrew Rowls, the author of that code, agreed to release it to the
349 public domain.
350 """
351
352 @staticmethod
353 def deflate(data):
354 try:
355 return zlib.decompress(data, -zlib.MAX_WBITS)
356 except zlib.error:
357 return zlib.decompress(data)
c0a10ca8 358
7b531c0b
RG
359 @staticmethod
360 def addinfourl_wrapper(stream, headers, url, code):
361 if hasattr(urllib2.addinfourl, 'getcode'):
362 return urllib2.addinfourl(stream, headers, url, code)
0f6b00b5
RG
363 ret = urllib2.addinfourl(stream, headers, url)
364 ret.code = code
365 return ret
c0a10ca8 366
1987c232
RG
367 def http_request(self, req):
368 for h in std_headers:
369 if h in req.headers:
370 del req.headers[h]
371 req.add_header(h, std_headers[h])
372 if 'Youtubedl-no-compression' in req.headers:
373 if 'Accept-encoding' in req.headers:
374 del req.headers['Accept-encoding']
375 del req.headers['Youtubedl-no-compression']
376 return req
377
378 def http_response(self, req, resp):
379 old_resp = resp
380 # gzip
381 if resp.headers.get('Content-encoding', '') == 'gzip':
382 gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
7b531c0b 383 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
384 resp.msg = old_resp.msg
385 # deflate
386 if resp.headers.get('Content-encoding', '') == 'deflate':
387 gz = StringIO.StringIO(self.deflate(resp.read()))
7b531c0b 388 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
389 resp.msg = old_resp.msg
390 return resp
391
c0a10ca8 392
4fa74b52
RG
393class FileDownloader(object):
394 """File Downloader class.
395
396 File downloader objects are the ones responsible of downloading the
397 actual video file and writing it to disk if the user has requested
398 it, among some other tasks. In most cases there should be one per
399 program. As, given a video URL, the downloader doesn't know how to
400 extract all the needed information, task that InfoExtractors do, it
401 has to pass the URL to one of them.
402
403 For this, file downloader objects have a method that allows
404 InfoExtractors to be registered in a given order. When it is passed
405 a URL, the file downloader handles it to the first InfoExtractor it
2851b2ca
RG
406 finds that reports being able to handle it. The InfoExtractor extracts
407 all the information about the video or videos the URL refers to, and
408 asks the FileDownloader to process the video information, possibly
409 downloading the video.
4fa74b52
RG
410
411 File downloaders accept a lot of parameters. In order not to saturate
412 the object constructor with arguments, it receives a dictionary of
d0a9affb
RG
413 options instead. These options are available through the params
414 attribute for the InfoExtractors to use. The FileDownloader also
415 registers itself as the downloader in charge for the InfoExtractors
416 that are added to it, so this is a "mutual registration".
4fa74b52
RG
417
418 Available options:
419
80066952
RG
420 username: Username for authentication purposes.
421 password: Password for authentication purposes.
422 usenetrc: Use netrc for authentication instead.
423 quiet: Do not print messages to stdout.
424 forceurl: Force printing final URL.
425 forcetitle: Force printing title.
426 forcethumbnail: Force printing thumbnail URL.
427 forcedescription: Force printing description.
9f796346 428 forcefilename: Force printing final filename.
80066952
RG
429 simulate: Do not download the video files.
430 format: Video format code.
431 format_limit: Highest quality format to try.
432 outtmpl: Template for output names.
433 ignoreerrors: Do not stop on download errors.
434 ratelimit: Download speed limit, in bytes/sec.
435 nooverwrites: Prevent overwriting files.
436 retries: Number of times to retry for HTTP error 5xx
437 continuedl: Try to continue downloads if possible.
438 noprogress: Do not print the progress bar.
439 playliststart: Playlist item to start at.
8cc44341 440 playlistend: Playlist item to end at.
20e91e83
ABP
441 matchtitle: Download only matching titles.
442 rejecttitle: Reject downloads for matching titles.
331ce0a0 443 logtostderr: Log messages to stderr instead of stdout.
ccbd296b 444 consoletitle: Display progress in console window's titlebar.
3fb2c487 445 nopart: Do not use temporary .part files.
e3018902 446 updatetime: Use the Last-modified header to set output file timestamps.
8b95c387 447 writedescription: Write the video description to a .description file
6eb08fbf 448 writeinfojson: Write the video description to a .info.json file
4fa74b52
RG
449 """
450
d0a9affb 451 params = None
4fa74b52 452 _ies = []
65cd34c5 453 _pps = []
9bf386d7 454 _download_retcode = None
7d8d0612 455 _num_downloads = None
331ce0a0 456 _screen_file = None
4fa74b52
RG
457
458 def __init__(self, params):
1c5e2302 459 """Create a FileDownloader object with the given options."""
4fa74b52 460 self._ies = []
65cd34c5 461 self._pps = []
9bf386d7 462 self._download_retcode = 0
7d8d0612 463 self._num_downloads = 0
331ce0a0 464 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
d0a9affb 465 self.params = params
d3975459 466
4fa74b52
RG
467 @staticmethod
468 def format_bytes(bytes):
469 if bytes is None:
470 return 'N/A'
8497c36d
RG
471 if type(bytes) is str:
472 bytes = float(bytes)
473 if bytes == 0.0:
4fa74b52
RG
474 exponent = 0
475 else:
8497c36d 476 exponent = long(math.log(bytes, 1024.0))
4fa74b52 477 suffix = 'bkMGTPEZY'[exponent]
c0a10ca8 478 converted = float(bytes) / float(1024 ** exponent)
4fa74b52
RG
479 return '%.2f%s' % (converted, suffix)
480
481 @staticmethod
482 def calc_percent(byte_counter, data_len):
483 if data_len is None:
484 return '---.-%'
485 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
486
487 @staticmethod
488 def calc_eta(start, now, total, current):
489 if total is None:
490 return '--:--'
491 dif = now - start
492 if current == 0 or dif < 0.001: # One millisecond
493 return '--:--'
494 rate = float(current) / dif
495 eta = long((float(total) - float(current)) / rate)
496 (eta_mins, eta_secs) = divmod(eta, 60)
497 if eta_mins > 99:
498 return '--:--'
499 return '%02d:%02d' % (eta_mins, eta_secs)
500
5121ef20 501 @staticmethod
4fa74b52
RG
502 def calc_speed(start, now, bytes):
503 dif = now - start
504 if bytes == 0 or dif < 0.001: # One millisecond
9fcd8355 505 return '%10s' % '---b/s'
4fa74b52
RG
506 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
507
508 @staticmethod
509 def best_block_size(elapsed_time, bytes):
510 new_min = max(bytes / 2.0, 1.0)
511 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
512 if elapsed_time < 0.001:
e1f18b8a 513 return long(new_max)
4fa74b52
RG
514 rate = bytes / elapsed_time
515 if rate > new_max:
e1f18b8a 516 return long(new_max)
4fa74b52 517 if rate < new_min:
e1f18b8a
RG
518 return long(new_min)
519 return long(rate)
4fa74b52 520
acd3d842
RG
521 @staticmethod
522 def parse_bytes(bytestr):
523 """Parse a string indicating a byte quantity into a long integer."""
524 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
525 if matchobj is None:
526 return None
527 number = float(matchobj.group(1))
528 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
529 return long(round(number * multiplier))
530
4fa74b52
RG
531 def add_info_extractor(self, ie):
532 """Add an InfoExtractor object to the end of the list."""
533 self._ies.append(ie)
534 ie.set_downloader(self)
d3975459 535
65cd34c5
RG
536 def add_post_processor(self, pp):
537 """Add a PostProcessor object to the end of the chain."""
538 self._pps.append(pp)
539 pp.set_downloader(self)
d3975459 540
331ce0a0 541 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
9fcd8355 542 """Print message to stdout if not in quiet mode."""
43ab0ca4
RG
543 try:
544 if not self.params.get('quiet', False):
331ce0a0
RG
545 terminator = [u'\n', u''][skip_eol]
546 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
547 self._screen_file.flush()
43ab0ca4
RG
548 except (UnicodeEncodeError), err:
549 if not ignore_encoding_errors:
550 raise
d3975459 551
7e5cab67
RG
552 def to_stderr(self, message):
553 """Print message to stderr."""
eae2666c 554 print >>sys.stderr, message.encode(preferredencoding())
d3975459 555
ccbd296b
MM
556 def to_cons_title(self, message):
557 """Set console/terminal window title to message."""
558 if not self.params.get('consoletitle', False):
559 return
560 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
561 # c_wchar_p() might not be necessary if `message` is
562 # already of type unicode()
563 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
564 elif 'TERM' in os.environ:
565 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
566
22899cea
RG
567 def fixed_template(self):
568 """Checks if the output template is fixed."""
d0a9affb 569 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
9fcd8355 570
0086d1ec
RG
571 def trouble(self, message=None):
572 """Determine action to take when a download problem appears.
573
574 Depending on if the downloader has been configured to ignore
e5bf0f55 575 download errors or not, this method may throw an exception or
9bf386d7 576 not when errors are found, after printing the message.
0086d1ec
RG
577 """
578 if message is not None:
579 self.to_stderr(message)
d0a9affb 580 if not self.params.get('ignoreerrors', False):
e5bf0f55 581 raise DownloadError(message)
9bf386d7 582 self._download_retcode = 1
0086d1ec 583
acd3d842
RG
584 def slow_down(self, start_time, byte_counter):
585 """Sleep if the download speed is over the rate limit."""
d0a9affb 586 rate_limit = self.params.get('ratelimit', None)
acd3d842
RG
587 if rate_limit is None or byte_counter == 0:
588 return
589 now = time.time()
590 elapsed = now - start_time
591 if elapsed <= 0.0:
592 return
593 speed = float(byte_counter) / elapsed
594 if speed > rate_limit:
595 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
3fb2c487
RG
596
597 def temp_name(self, filename):
598 """Returns a temporary filename for the given filename."""
599 if self.params.get('nopart', False) or filename == u'-' or \
600 (os.path.exists(filename) and not os.path.isfile(filename)):
601 return filename
602 return filename + u'.part'
603
8cc42e7c
RG
604 def undo_temp_name(self, filename):
605 if filename.endswith(u'.part'):
606 return filename[:-len(u'.part')]
607 return filename
608
62cf7aaf
RG
609 def try_rename(self, old_filename, new_filename):
610 try:
7d950ca1
RG
611 if old_filename == new_filename:
612 return
62cf7aaf
RG
613 os.rename(old_filename, new_filename)
614 except (IOError, OSError), err:
615 self.trouble(u'ERROR: unable to rename file')
c0a10ca8 616
e3018902
RG
617 def try_utime(self, filename, last_modified_hdr):
618 """Try to set the last-modified time of the given file."""
619 if last_modified_hdr is None:
620 return
621 if not os.path.isfile(filename):
622 return
623 timestr = last_modified_hdr
624 if timestr is None:
625 return
626 filetime = timeconvert(timestr)
627 if filetime is None:
36597dc4 628 return filetime
e3018902 629 try:
c0a10ca8 630 os.utime(filename, (time.time(), filetime))
e3018902
RG
631 except:
632 pass
36597dc4 633 return filetime
acd3d842 634
8b95c387 635 def report_writedescription(self, descfn):
6eb08fbf
PH
636 """ Report that the description file is being written """
637 self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
638
639 def report_writeinfojson(self, infofn):
640 """ Report that the metadata file has been written """
641 self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
8b95c387 642
bafa5cd9
RG
643 def report_destination(self, filename):
644 """Report destination filename."""
331ce0a0 645 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
d3975459 646
bafa5cd9
RG
647 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
648 """Report download progress."""
d9835247
RG
649 if self.params.get('noprogress', False):
650 return
331ce0a0 651 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
bafa5cd9 652 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
ccbd296b
MM
653 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
654 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
7db85b2c
RG
655
656 def report_resuming_byte(self, resume_len):
8a9f53be 657 """Report attempt to resume at given byte."""
331ce0a0 658 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
d3975459 659
7031008c 660 def report_retry(self, count, retries):
e86e9474 661 """Report retry in case of HTTP error 5xx"""
331ce0a0 662 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
d3975459 663
7db85b2c
RG
664 def report_file_already_downloaded(self, file_name):
665 """Report file has already been fully downloaded."""
43ab0ca4 666 try:
331ce0a0 667 self.to_screen(u'[download] %s has already been downloaded' % file_name)
43ab0ca4 668 except (UnicodeEncodeError), err:
331ce0a0 669 self.to_screen(u'[download] The file has already been downloaded')
d3975459 670
7db85b2c
RG
671 def report_unable_to_resume(self):
672 """Report it was impossible to resume download."""
331ce0a0 673 self.to_screen(u'[download] Unable to resume')
d3975459 674
bafa5cd9
RG
675 def report_finish(self):
676 """Report download finished."""
d9835247 677 if self.params.get('noprogress', False):
331ce0a0 678 self.to_screen(u'[download] Download completed')
d9835247 679 else:
331ce0a0 680 self.to_screen(u'')
d3975459 681
df372a65
RG
682 def increment_downloads(self):
683 """Increment the ordinal that assigns a number to each file."""
684 self._num_downloads += 1
bafa5cd9 685
9f796346
GI
686 def prepare_filename(self, info_dict):
687 """Generate the output filename."""
688 try:
689 template_dict = dict(info_dict)
690 template_dict['epoch'] = unicode(long(time.time()))
691 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
692 filename = self.params['outtmpl'] % template_dict
693 return filename
694 except (ValueError, KeyError), err:
695 self.trouble(u'ERROR: invalid system charset or erroneous output template')
696 return None
697
c8619e01
RG
698 def process_info(self, info_dict):
699 """Process a single dictionary returned by an InfoExtractor."""
9f796346 700 filename = self.prepare_filename(info_dict)
9b4556c4
PH
701
702 # Forced printings
703 if self.params.get('forcetitle', False):
704 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
705 if self.params.get('forceurl', False):
706 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
707 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
708 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
709 if self.params.get('forcedescription', False) and 'description' in info_dict:
710 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
711 if self.params.get('forcefilename', False) and filename is not None:
712 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
da0db53a
DH
713 if self.params.get('forceformat', False):
714 print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
9b4556c4 715
c8619e01
RG
716 # Do nothing else if in simulate mode
717 if self.params.get('simulate', False):
9bf386d7 718 return
d3975459 719
9f796346 720 if filename is None:
38ed1344 721 return
20e91e83
ABP
722
723 matchtitle=self.params.get('matchtitle',False)
724 rejecttitle=self.params.get('rejecttitle',False)
725 title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
726 if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
727 self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
728 return
729 if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
730 self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
731 return
732
850ab765 733 if self.params.get('nooverwrites', False) and os.path.exists(filename):
5c44af18 734 self.to_stderr(u'WARNING: file exists and will be skipped')
9bf386d7 735 return
7b7759f5 736
c8619e01 737 try:
e5e74ffb
PH
738 dn = os.path.dirname(filename)
739 if dn != '' and not os.path.exists(dn):
740 os.makedirs(dn)
c8619e01 741 except (OSError, IOError), err:
cec3a53c 742 self.trouble(u'ERROR: unable to create directory ' + unicode(err))
9bf386d7 743 return
7b7759f5 744
8b95c387
PH
745 if self.params.get('writedescription', False):
746 try:
747 descfn = filename + '.description'
6eb08fbf 748 self.report_writedescription(descfn)
1293ce58
PH
749 descfile = open(descfn, 'wb')
750 try:
8b95c387 751 descfile.write(info_dict['description'].encode('utf-8'))
1293ce58
PH
752 finally:
753 descfile.close()
8b95c387 754 except (OSError, IOError):
cec3a53c 755 self.trouble(u'ERROR: Cannot write description file ' + descfn)
8b95c387
PH
756 return
757
6eb08fbf
PH
758 if self.params.get('writeinfojson', False):
759 infofn = filename + '.info.json'
760 self.report_writeinfojson(infofn)
761 try:
762 json.dump
763 except (NameError,AttributeError):
764 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
765 return
766 try:
1293ce58
PH
767 infof = open(infofn, 'wb')
768 try:
6eb08fbf 769 json.dump(info_dict, infof)
1293ce58
PH
770 finally:
771 infof.close()
6eb08fbf 772 except (OSError, IOError):
cec3a53c 773 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
6eb08fbf
PH
774 return
775
9b4556c4 776 if not self.params.get('skip_download', False):
55e7c75e 777 try:
36597dc4
K
778 success,add_data = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
779 info_dict.update(add_data)
9b4556c4
PH
780 except (OSError, IOError), err:
781 raise UnavailableVideoError
782 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
783 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
784 return
785 except (ContentTooShortError, ), err:
786 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
55e7c75e 787 return
9b4556c4
PH
788
789 if success:
790 try:
791 self.post_process(filename, info_dict)
792 except (PostProcessingError), err:
793 self.trouble(u'ERROR: postprocessing: %s' % str(err))
794 return
c8619e01 795
4fa74b52
RG
796 def download(self, url_list):
797 """Download a given list of URLs."""
22899cea 798 if len(url_list) > 1 and self.fixed_template():
d0a9affb 799 raise SameFileError(self.params['outtmpl'])
22899cea 800
4fa74b52
RG
801 for url in url_list:
802 suitable_found = False
803 for ie in self._ies:
c8619e01 804 # Go to next InfoExtractor if not suitable
4fa74b52
RG
805 if not ie.suitable(url):
806 continue
c8619e01 807
4fa74b52
RG
808 # Suitable InfoExtractor found
809 suitable_found = True
c8619e01 810
6f21f686
RG
811 # Extract information from URL and process it
812 ie.extract(url)
65cd34c5 813
c8619e01 814 # Suitable InfoExtractor had been found; go to next URL
4fa74b52 815 break
c8619e01 816
4fa74b52 817 if not suitable_found:
db7e31b8 818 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
bb681b88 819
9bf386d7 820 return self._download_retcode
65cd34c5
RG
821
822 def post_process(self, filename, ie_info):
823 """Run the postprocessing chain on the given file."""
824 info = dict(ie_info)
825 info['filepath'] = filename
826 for pp in self._pps:
827 info = pp.run(info)
828 if info is None:
829 break
d3975459 830
e616ec0c 831 def _download_with_rtmpdump(self, filename, url, player_url):
0487b407 832 self.report_destination(filename)
62cf7aaf 833 tmpfilename = self.temp_name(filename)
0487b407
RG
834
835 # Check for rtmpdump first
836 try:
837 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
838 except (OSError, IOError):
839 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
840 return False
841
842 # Download using rtmpdump. rtmpdump returns exit code 2 when
843 # the connection was interrumpted and resuming appears to be
844 # possible. This is part of rtmpdump's normal usage, AFAIK.
b487ef08 845 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
1c1821f8
RG
846 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
847 while retval == 2 or retval == 1:
62cf7aaf 848 prevsize = os.path.getsize(tmpfilename)
331ce0a0 849 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
e616ec0c 850 time.sleep(5.0) # This seems to be needed
1c1821f8 851 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
62cf7aaf 852 cursize = os.path.getsize(tmpfilename)
e616ec0c
RG
853 if prevsize == cursize and retval == 1:
854 break
b487ef08
PH
855 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
856 if prevsize == cursize and retval == 2 and cursize > 1024:
857 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
858 retval = 0
859 break
0487b407 860 if retval == 0:
62cf7aaf
RG
861 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
862 self.try_rename(tmpfilename, filename)
0487b407
RG
863 return True
864 else:
db7e31b8 865 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
0487b407
RG
866 return False
867
e616ec0c 868 def _do_download(self, filename, url, player_url):
62cf7aaf 869 # Check file already present
3fb2c487 870 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
62cf7aaf
RG
871 self.report_file_already_downloaded(filename)
872 return True
873
0487b407
RG
874 # Attempt to download using rtmpdump
875 if url.startswith('rtmp'):
e616ec0c 876 return self._download_with_rtmpdump(filename, url, player_url)
0487b407 877
62cf7aaf 878 tmpfilename = self.temp_name(filename)
55e7c75e 879 stream = None
1987c232
RG
880
881 # Do not include the Accept-Encoding header
882 headers = {'Youtubedl-no-compression': 'True'}
883 basic_request = urllib2.Request(url, None, headers)
884 request = urllib2.Request(url, None, headers)
7db85b2c 885
9c457d2a 886 # Establish possible resume length
62cf7aaf
RG
887 if os.path.isfile(tmpfilename):
888 resume_len = os.path.getsize(tmpfilename)
55e7c75e
RG
889 else:
890 resume_len = 0
9c457d2a 891
10e7194d
MH
892 open_mode = 'wb'
893 if resume_len != 0:
894 if self.params.get('continuedl', False):
895 self.report_resuming_byte(resume_len)
896 request.add_header('Range','bytes=%d-' % resume_len)
897 open_mode = 'ab'
898 else:
899 resume_len = 0
55e7c75e 900
7031008c
RG
901 count = 0
902 retries = self.params.get('retries', 0)
101e0d1e 903 while count <= retries:
7031008c
RG
904 # Establish connection
905 try:
906 data = urllib2.urlopen(request)
907 break
908 except (urllib2.HTTPError, ), err:
ac249f42 909 if (err.code < 500 or err.code >= 600) and err.code != 416:
101e0d1e 910 # Unexpected HTTP error
7031008c 911 raise
101e0d1e
RG
912 elif err.code == 416:
913 # Unable to resume (requested range not satisfiable)
914 try:
915 # Open the connection again without the range header
916 data = urllib2.urlopen(basic_request)
917 content_length = data.info()['Content-Length']
918 except (urllib2.HTTPError, ), err:
ac249f42 919 if err.code < 500 or err.code >= 600:
101e0d1e
RG
920 raise
921 else:
922 # Examine the reported length
268fb2bd 923 if (content_length is not None and
c0a10ca8 924 (resume_len - 100 < long(content_length) < resume_len + 100)):
268fb2bd
RG
925 # The file had already been fully downloaded.
926 # Explanation to the above condition: in issue #175 it was revealed that
927 # YouTube sometimes adds or removes a few bytes from the end of the file,
928 # changing the file size slightly and causing problems for some users. So
929 # I decided to implement a suggested change and consider the file
930 # completely downloaded if the file size differs less than 100 bytes from
931 # the one in the hard drive.
101e0d1e 932 self.report_file_already_downloaded(filename)
62cf7aaf 933 self.try_rename(tmpfilename, filename)
101e0d1e
RG
934 return True
935 else:
936 # The length does not match, we start the download over
937 self.report_unable_to_resume()
938 open_mode = 'wb'
939 break
940 # Retry
941 count += 1
942 if count <= retries:
943 self.report_retry(count, retries)
944
945 if count > retries:
946 self.trouble(u'ERROR: giving up after %s retries' % retries)
947 return False
7db85b2c 948
4fa74b52 949 data_len = data.info().get('Content-length', None)
106d091e
RG
950 if data_len is not None:
951 data_len = long(data_len) + resume_len
4fa74b52 952 data_len_str = self.format_bytes(data_len)
106d091e 953 byte_counter = 0 + resume_len
4fa74b52
RG
954 block_size = 1024
955 start = time.time()
956 while True:
bafa5cd9 957 # Download and write
4fa74b52
RG
958 before = time.time()
959 data_block = data.read(block_size)
960 after = time.time()
975a91d0 961 if len(data_block) == 0:
4fa74b52 962 break
975a91d0 963 byte_counter += len(data_block)
55e7c75e
RG
964
965 # Open file just in time
966 if stream is None:
967 try:
62cf7aaf 968 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
dbddab27 969 assert stream is not None
8cc42e7c 970 filename = self.undo_temp_name(tmpfilename)
55e7c75e
RG
971 self.report_destination(filename)
972 except (OSError, IOError), err:
db7e31b8 973 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
55e7c75e 974 return False
131efd1a
RG
975 try:
976 stream.write(data_block)
977 except (IOError, OSError), err:
d67e0974
RG
978 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
979 return False
975a91d0 980 block_size = self.best_block_size(after - before, len(data_block))
4fa74b52 981
55e7c75e
RG
982 # Progress message
983 percent_str = self.calc_percent(byte_counter, data_len)
975a91d0
RG
984 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
985 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
55e7c75e
RG
986 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
987
acd3d842 988 # Apply rate limit
975a91d0 989 self.slow_down(start, byte_counter - resume_len)
acd3d842 990
dbddab27
PH
991 if stream is None:
992 self.trouble(u'\nERROR: Did not get any data blocks')
993 return False
6f0ff3ba 994 stream.close()
bafa5cd9 995 self.report_finish()
b905e5f5 996 if data_len is not None and byte_counter != data_len:
d69a1c91 997 raise ContentTooShortError(byte_counter, long(data_len))
62cf7aaf 998 self.try_rename(tmpfilename, filename)
e3018902 999
09bd408c 1000 # Update file modification time
36597dc4 1001 filetime = None
e3018902 1002 if self.params.get('updatetime', True):
36597dc4 1003 filetime = self.try_utime(filename, data.info().get('last-modified', None))
e3018902 1004
36597dc4 1005 return True, {'filetime': filetime}
4fa74b52 1006
c0a10ca8 1007
4fa74b52
RG
1008class InfoExtractor(object):
1009 """Information Extractor class.
1010
1011 Information extractors are the classes that, given a URL, extract
1012 information from the video (or videos) the URL refers to. This
1013 information includes the real video URL, the video title and simplified
2851b2ca
RG
1014 title, author and others. The information is stored in a dictionary
1015 which is then passed to the FileDownloader. The FileDownloader
1016 processes this information possibly downloading the video to the file
1017 system, among other possible outcomes. The dictionaries must include
4fa74b52
RG
1018 the following fields:
1019
1020 id: Video identifier.
1021 url: Final video URL.
1022 uploader: Nickname of the video uploader.
1023 title: Literal title.
1024 stitle: Simplified title.
1025 ext: Video filename extension.
6ba562b0 1026 format: Video format.
e616ec0c 1027 player_url: SWF Player URL (may be None).
4fa74b52 1028
7e58d568
RG
1029 The following fields are optional. Their primary purpose is to allow
1030 youtube-dl to serve as the backend for a video search function, such
1031 as the one in youtube2mp3. They are only used when their respective
1032 forced printing functions are called:
1033
1034 thumbnail: Full URL to a video thumbnail image.
1035 description: One-line video description.
1036
4fa74b52 1037 Subclasses of this one should re-define the _real_initialize() and
bdb3f7a7
PH
1038 _real_extract() methods and define a _VALID_URL regexp.
1039 Probably, they should also be added to the list of extractors.
4fa74b52
RG
1040 """
1041
1042 _ready = False
1043 _downloader = None
1044
1045 def __init__(self, downloader=None):
1046 """Constructor. Receives an optional downloader."""
1047 self._ready = False
1048 self.set_downloader(downloader)
1049
bdb3f7a7 1050 def suitable(self, url):
4fa74b52 1051 """Receives a URL and returns True if suitable for this IE."""
bdb3f7a7 1052 return re.match(self._VALID_URL, url) is not None
4fa74b52
RG
1053
1054 def initialize(self):
1c5e2302 1055 """Initializes an instance (authentication, etc)."""
4fa74b52
RG
1056 if not self._ready:
1057 self._real_initialize()
1058 self._ready = True
1059
1060 def extract(self, url):
1061 """Extracts URL information and returns it in list of dicts."""
1062 self.initialize()
1063 return self._real_extract(url)
1064
1065 def set_downloader(self, downloader):
1066 """Sets the downloader for this IE."""
1067 self._downloader = downloader
d3975459 1068
4fa74b52
RG
1069 def _real_initialize(self):
1070 """Real initialization process. Redefine in subclasses."""
1071 pass
1072
1073 def _real_extract(self, url):
1074 """Real extraction process. Redefine in subclasses."""
1075 pass
1076
c0a10ca8 1077
4fa74b52
RG
1078class YoutubeIE(InfoExtractor):
1079 """Information extractor for youtube.com."""
1080
86e709d3 1081 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
9715661c 1082 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
7df4635f 1083 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
72ac78b8 1084 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
4fa74b52 1085 _NETRC_MACHINE = 'youtube'
497cd3e6 1086 # Listed in order of quality
e0edf1e0 1087 _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
7b7759f5 1088 _video_extensions = {
1089 '13': '3gp',
1090 '17': 'mp4',
1091 '18': 'mp4',
1092 '22': 'mp4',
d9bc015b 1093 '37': 'mp4',
9e9647d9 1094 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
0b59bf4a
RG
1095 '43': 'webm',
1096 '45': 'webm',
7b7759f5 1097 }
f3098c4d 1098 IE_NAME = u'youtube'
4fa74b52 1099
72ac78b8
RG
1100 def report_lang(self):
1101 """Report attempt to set language."""
331ce0a0 1102 self._downloader.to_screen(u'[youtube] Setting language')
72ac78b8 1103
bafa5cd9
RG
1104 def report_login(self):
1105 """Report attempt to log in."""
331ce0a0 1106 self._downloader.to_screen(u'[youtube] Logging in')
d3975459 1107
bafa5cd9
RG
1108 def report_age_confirmation(self):
1109 """Report attempt to confirm age."""
331ce0a0 1110 self._downloader.to_screen(u'[youtube] Confirming age')
d3975459 1111
e616ec0c
RG
1112 def report_video_webpage_download(self, video_id):
1113 """Report attempt to download video webpage."""
331ce0a0 1114 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
d3975459 1115
71b7300e
RG
1116 def report_video_info_webpage_download(self, video_id):
1117 """Report attempt to download video info webpage."""
331ce0a0 1118 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
d3975459 1119
bafa5cd9
RG
1120 def report_information_extraction(self, video_id):
1121 """Report attempt to extract video information."""
331ce0a0 1122 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
d3975459 1123
7b7759f5 1124 def report_unavailable_format(self, video_id, format):
1125 """Report extracted video URL."""
331ce0a0 1126 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
d3975459 1127
0487b407
RG
1128 def report_rtmp_download(self):
1129 """Indicate the download will use the RTMP protocol."""
331ce0a0 1130 self._downloader.to_screen(u'[youtube] RTMP download detected')
d3975459 1131
4fa74b52
RG
1132 def _real_initialize(self):
1133 if self._downloader is None:
1134 return
1135
1136 username = None
1137 password = None
d0a9affb 1138 downloader_params = self._downloader.params
4fa74b52
RG
1139
1140 # Attempt to use provided username and password or .netrc data
1141 if downloader_params.get('username', None) is not None:
1142 username = downloader_params['username']
1143 password = downloader_params['password']
1144 elif downloader_params.get('usenetrc', False):
1145 try:
1146 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1147 if info is not None:
1148 username = info[0]
1149 password = info[2]
1150 else:
1151 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1152 except (IOError, netrc.NetrcParseError), err:
6f21f686 1153 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
4fa74b52
RG
1154 return
1155
72ac78b8 1156 # Set language
1987c232 1157 request = urllib2.Request(self._LANG_URL)
72ac78b8
RG
1158 try:
1159 self.report_lang()
1160 urllib2.urlopen(request).read()
1161 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1162 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
72ac78b8
RG
1163 return
1164
cc109403
RG
1165 # No authentication to be performed
1166 if username is None:
1167 return
1168
4fa74b52 1169 # Log in
9fcd8355
RG
1170 login_form = {
1171 'current_form': 'loginForm',
4fa74b52
RG
1172 'next': '/',
1173 'action_login': 'Log In',
1174 'username': username,
9fcd8355
RG
1175 'password': password,
1176 }
1987c232 1177 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
4fa74b52 1178 try:
bafa5cd9 1179 self.report_login()
4fa74b52
RG
1180 login_results = urllib2.urlopen(request).read()
1181 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
6f21f686 1182 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
4fa74b52
RG
1183 return
1184 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1185 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
4fa74b52 1186 return
d3975459 1187
4fa74b52 1188 # Confirm age
9fcd8355
RG
1189 age_form = {
1190 'next_url': '/',
1191 'action_confirm': 'Confirm',
1192 }
1987c232 1193 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
4fa74b52 1194 try:
bafa5cd9 1195 self.report_age_confirmation()
4fa74b52
RG
1196 age_results = urllib2.urlopen(request).read()
1197 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1198 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
e5bf0f55 1199 return
4fa74b52
RG
1200
1201 def _real_extract(self, url):
1202 # Extract video id from URL
020f7150 1203 mobj = re.match(self._VALID_URL, url)
4fa74b52 1204 if mobj is None:
147753eb 1205 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1206 return
4fa74b52
RG
1207 video_id = mobj.group(2)
1208
497cd3e6
RG
1209 # Get video webpage
1210 self.report_video_webpage_download(video_id)
1987c232 1211 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
497cd3e6
RG
1212 try:
1213 video_webpage = urllib2.urlopen(request).read()
1214 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1215 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1216 return
968aa884 1217
497cd3e6 1218 # Attempt to extract SWF player URL
b620a5f8 1219 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
497cd3e6 1220 if mobj is not None:
b620a5f8 1221 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
497cd3e6
RG
1222 else:
1223 player_url = None
1224
1225 # Get video info
1226 self.report_video_info_webpage_download(video_id)
1227 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1228 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
c0a10ca8 1229 % (video_id, el_type))
1987c232 1230 request = urllib2.Request(video_info_url)
e616ec0c 1231 try:
497cd3e6
RG
1232 video_info_webpage = urllib2.urlopen(request).read()
1233 video_info = parse_qs(video_info_webpage)
1234 if 'token' in video_info:
1235 break
e616ec0c 1236 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
497cd3e6 1237 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
e616ec0c 1238 return
f95f29fd
RG
1239 if 'token' not in video_info:
1240 if 'reason' in video_info:
8e686771 1241 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
f95f29fd
RG
1242 else:
1243 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1244 return
1245
1246 # Start extracting information
497cd3e6
RG
1247 self.report_information_extraction(video_id)
1248
1249 # uploader
1250 if 'author' not in video_info:
1251 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1252 return
1253 video_uploader = urllib.unquote_plus(video_info['author'][0])
e616ec0c 1254
497cd3e6
RG
1255 # title
1256 if 'title' not in video_info:
1257 self._downloader.trouble(u'ERROR: unable to extract video title')
1258 return
1259 video_title = urllib.unquote_plus(video_info['title'][0])
1260 video_title = video_title.decode('utf-8')
1261 video_title = sanitize_title(video_title)
1262
1263 # simplified title
1264 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1265 simple_title = simple_title.strip(ur'_')
1266
1267 # thumbnail image
1268 if 'thumbnail_url' not in video_info:
1269 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1270 video_thumbnail = ''
1271 else: # don't panic if we can't find it
1272 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1273
b3a27b52
NA
1274 # upload date
1275 upload_date = u'NA'
3efa45c3 1276 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
b3a27b52 1277 if mobj is not None:
a1f03c7b 1278 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
87cbd213 1279 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
a1f03c7b
NA
1280 for expression in format_expressions:
1281 try:
1282 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1283 except:
1284 pass
b3a27b52 1285
497cd3e6 1286 # description
c6b55a8d
PH
1287 try:
1288 lxml.etree
1289 except NameError:
1290 video_description = u'No description available.'
8b95c387 1291 if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
c6b55a8d
PH
1292 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1293 if mobj is not None:
1294 video_description = mobj.group(1).decode('utf-8')
1295 else:
1296 html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1297 vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1298 video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
91e6a385 1299 # TODO use another parser
497cd3e6 1300
5ce7d172
RG
1301 # token
1302 video_token = urllib.unquote_plus(video_info['token'][0])
1303
497cd3e6 1304 # Decide which formats to download
f83ae781 1305 req_format = self._downloader.params.get('format', None)
2e3a32e4 1306
f137bef9
PH
1307 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1308 self.report_rtmp_download()
1309 video_url_list = [(None, video_info['conn'][0])]
f137bef9 1310 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
0ac22e4f 1311 url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
8519c32d 1312 url_data = [parse_qs(uds) for uds in url_data_strs]
f137bef9 1313 url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
8519c32d 1314 url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
2b70537d 1315
497cd3e6
RG
1316 format_limit = self._downloader.params.get('format_limit', None)
1317 if format_limit is not None and format_limit in self._available_formats:
1318 format_list = self._available_formats[self._available_formats.index(format_limit):]
e616ec0c 1319 else:
497cd3e6
RG
1320 format_list = self._available_formats
1321 existing_formats = [x for x in format_list if x in url_map]
1322 if len(existing_formats) == 0:
1323 self._downloader.trouble(u'ERROR: no known formats available for video')
968aa884 1324 return
5260e68f 1325 if req_format is None or req_format == 'best':
d157d259 1326 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
c52b01f3
K
1327 elif req_format == 'worst':
1328 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
5260e68f 1329 elif req_format in ('-1', 'all'):
d157d259 1330 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
497cd3e6 1331 else:
5260e68f
PH
1332 # Specific formats. We pick the first in a slash-delimeted sequence.
1333 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1334 req_formats = req_format.split('/')
1335 video_url_list = None
1336 for rf in req_formats:
1337 if rf in url_map:
1338 video_url_list = [(rf, url_map[rf])]
1339 break
1340 if video_url_list is None:
5c132793
RG
1341 self._downloader.trouble(u'ERROR: requested format not available')
1342 return
497cd3e6 1343 else:
f3dc18d8 1344 self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
497cd3e6 1345 return
7b7759f5 1346
497cd3e6
RG
1347 for format_param, video_real_url in video_url_list:
1348 # At this point we have a new video
1349 self._downloader.increment_downloads()
1350
1351 # Extension
1352 video_extension = self._video_extensions.get(format_param, 'flv')
7e58d568 1353
968aa884 1354 try:
7b7759f5 1355 # Process video information
1356 self._downloader.process_info({
1357 'id': video_id.decode('utf-8'),
1358 'url': video_real_url.decode('utf-8'),
1359 'uploader': video_uploader.decode('utf-8'),
138b11f3 1360 'upload_date': upload_date,
7b7759f5 1361 'title': video_title,
1362 'stitle': simple_title,
1363 'ext': video_extension.decode('utf-8'),
6ba562b0 1364 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
7e58d568 1365 'thumbnail': video_thumbnail.decode('utf-8'),
c6b55a8d 1366 'description': video_description,
e616ec0c 1367 'player_url': player_url,
7b7759f5 1368 })
497cd3e6 1369 except UnavailableVideoError, err:
09cc744c 1370 self._downloader.trouble(u'\nERROR: unable to download video')
42bcd27d 1371
4fa74b52 1372
020f7150
RG
1373class MetacafeIE(InfoExtractor):
1374 """Information Extractor for metacafe.com."""
1375
1376 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
2546e767 1377 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
dbccb6cd 1378 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
020f7150 1379 _youtube_ie = None
f3098c4d 1380 IE_NAME = u'metacafe'
020f7150
RG
1381
1382 def __init__(self, youtube_ie, downloader=None):
1383 InfoExtractor.__init__(self, downloader)
1384 self._youtube_ie = youtube_ie
1385
020f7150
RG
1386 def report_disclaimer(self):
1387 """Report disclaimer retrieval."""
331ce0a0 1388 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
020f7150
RG
1389
1390 def report_age_confirmation(self):
1391 """Report attempt to confirm age."""
331ce0a0 1392 self._downloader.to_screen(u'[metacafe] Confirming age')
d3975459 1393
020f7150
RG
1394 def report_download_webpage(self, video_id):
1395 """Report webpage download."""
331ce0a0 1396 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
d3975459 1397
020f7150
RG
1398 def report_extraction(self, video_id):
1399 """Report information extraction."""
331ce0a0 1400 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
020f7150
RG
1401
1402 def _real_initialize(self):
1403 # Retrieve disclaimer
1987c232 1404 request = urllib2.Request(self._DISCLAIMER)
020f7150
RG
1405 try:
1406 self.report_disclaimer()
1407 disclaimer = urllib2.urlopen(request).read()
1408 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1409 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
020f7150
RG
1410 return
1411
1412 # Confirm age
1413 disclaimer_form = {
2546e767 1414 'filters': '0',
020f7150
RG
1415 'submit': "Continue - I'm over 18",
1416 }
1987c232 1417 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
020f7150
RG
1418 try:
1419 self.report_age_confirmation()
1420 disclaimer = urllib2.urlopen(request).read()
1421 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1422 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
020f7150 1423 return
d3975459 1424
020f7150
RG
1425 def _real_extract(self, url):
1426 # Extract id and simplified title from URL
1427 mobj = re.match(self._VALID_URL, url)
1428 if mobj is None:
147753eb 1429 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1430 return
020f7150
RG
1431
1432 video_id = mobj.group(1)
1433
1434 # Check if video comes from YouTube
1435 mobj2 = re.match(r'^yt-(.*)$', video_id)
1436 if mobj2 is not None:
6f21f686
RG
1437 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1438 return
020f7150 1439
df372a65 1440 # At this point we have a new video
9bf7fa52 1441 self._downloader.increment_downloads()
df372a65 1442
020f7150 1443 simple_title = mobj.group(2).decode('utf-8')
020f7150
RG
1444
1445 # Retrieve video webpage to extract further information
1446 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1447 try:
1448 self.report_download_webpage(video_id)
1449 webpage = urllib2.urlopen(request).read()
1450 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1451 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
6f21f686 1452 return
020f7150
RG
1453
1454 # Extract URL, uploader and title from webpage
1455 self.report_extraction(video_id)
18963a36 1456 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
c6c555cf
RG
1457 if mobj is not None:
1458 mediaURL = urllib.unquote(mobj.group(1))
6b57e8c5 1459 video_extension = mediaURL[-3:]
d3975459 1460
c6c555cf
RG
1461 # Extract gdaKey if available
1462 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1463 if mobj is None:
1464 video_url = mediaURL
1465 else:
1466 gdaKey = mobj.group(1)
1467 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
109626fc 1468 else:
c6c555cf
RG
1469 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1470 if mobj is None:
1471 self._downloader.trouble(u'ERROR: unable to extract media URL')
1472 return
1473 vardict = parse_qs(mobj.group(1))
1474 if 'mediaData' not in vardict:
1475 self._downloader.trouble(u'ERROR: unable to extract media URL')
1476 return
1477 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1478 if mobj is None:
1479 self._downloader.trouble(u'ERROR: unable to extract media URL')
1480 return
6b57e8c5
RG
1481 mediaURL = mobj.group(1).replace('\\/', '/')
1482 video_extension = mediaURL[-3:]
1483 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
020f7150 1484
2546e767 1485 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
020f7150 1486 if mobj is None:
147753eb 1487 self._downloader.trouble(u'ERROR: unable to extract title')
6f21f686 1488 return
020f7150 1489 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1490 video_title = sanitize_title(video_title)
020f7150 1491
29f07568 1492 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
020f7150 1493 if mobj is None:
147753eb 1494 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
6f21f686 1495 return
dbccb6cd 1496 video_uploader = mobj.group(1)
020f7150 1497
42bcd27d 1498 try:
1499 # Process video information
1500 self._downloader.process_info({
1501 'id': video_id.decode('utf-8'),
1502 'url': video_url.decode('utf-8'),
1503 'uploader': video_uploader.decode('utf-8'),
138b11f3 1504 'upload_date': u'NA',
42bcd27d 1505 'title': video_title,
1506 'stitle': simple_title,
1507 'ext': video_extension.decode('utf-8'),
6ba562b0 1508 'format': u'NA',
e616ec0c 1509 'player_url': None,
42bcd27d 1510 })
73f4e7af 1511 except UnavailableVideoError:
09cc744c 1512 self._downloader.trouble(u'\nERROR: unable to download video')
020f7150 1513
25af2bce 1514
4135fa45
WB
1515class DailymotionIE(InfoExtractor):
1516 """Information Extractor for Dailymotion"""
1517
1518 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
f3098c4d 1519 IE_NAME = u'dailymotion'
4135fa45
WB
1520
1521 def __init__(self, downloader=None):
1522 InfoExtractor.__init__(self, downloader)
1523
4135fa45
WB
1524 def report_download_webpage(self, video_id):
1525 """Report webpage download."""
331ce0a0 1526 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
d3975459 1527
4135fa45
WB
1528 def report_extraction(self, video_id):
1529 """Report information extraction."""
331ce0a0 1530 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
4135fa45
WB
1531
1532 def _real_initialize(self):
1533 return
1534
4135fa45
WB
1535 def _real_extract(self, url):
1536 # Extract id and simplified title from URL
1537 mobj = re.match(self._VALID_URL, url)
1538 if mobj is None:
1539 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1540 return
1541
df372a65 1542 # At this point we have a new video
9bf7fa52 1543 self._downloader.increment_downloads()
4135fa45
WB
1544 video_id = mobj.group(1)
1545
1546 simple_title = mobj.group(2).decode('utf-8')
1547 video_extension = 'flv'
1548
1549 # Retrieve video webpage to extract further information
1550 request = urllib2.Request(url)
62a29bbf 1551 request.add_header('Cookie', 'family_filter=off')
4135fa45
WB
1552 try:
1553 self.report_download_webpage(video_id)
1554 webpage = urllib2.urlopen(request).read()
1555 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1556 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1557 return
1558
1559 # Extract URL, uploader and title from webpage
1560 self.report_extraction(video_id)
62a29bbf 1561 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
4135fa45
WB
1562 if mobj is None:
1563 self._downloader.trouble(u'ERROR: unable to extract media URL')
1564 return
62a29bbf 1565 sequence = urllib.unquote(mobj.group(1))
1566 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1567 if mobj is None:
1568 self._downloader.trouble(u'ERROR: unable to extract media URL')
1569 return
1570 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
4135fa45
WB
1571
1572 # if needed add http://www.dailymotion.com/ if relative URL
1573
1574 video_url = mediaURL
1575
62a29bbf 1576 mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
4135fa45
WB
1577 if mobj is None:
1578 self._downloader.trouble(u'ERROR: unable to extract title')
1579 return
1580 video_title = mobj.group(1).decode('utf-8')
1581 video_title = sanitize_title(video_title)
1582
62a29bbf 1583 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
4135fa45
WB
1584 if mobj is None:
1585 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1586 return
1587 video_uploader = mobj.group(1)
1588
1589 try:
1590 # Process video information
1591 self._downloader.process_info({
1592 'id': video_id.decode('utf-8'),
1593 'url': video_url.decode('utf-8'),
1594 'uploader': video_uploader.decode('utf-8'),
138b11f3 1595 'upload_date': u'NA',
4135fa45
WB
1596 'title': video_title,
1597 'stitle': simple_title,
1598 'ext': video_extension.decode('utf-8'),
1599 'format': u'NA',
1600 'player_url': None,
1601 })
73f4e7af 1602 except UnavailableVideoError:
09cc744c 1603 self._downloader.trouble(u'\nERROR: unable to download video')
4135fa45 1604
c0a10ca8 1605
49c0028a 1606class GoogleIE(InfoExtractor):
1607 """Information extractor for video.google.com."""
1608
490fd7ae 1609 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
f3098c4d 1610 IE_NAME = u'video.google'
49c0028a 1611
1612 def __init__(self, downloader=None):
1613 InfoExtractor.__init__(self, downloader)
1614
49c0028a 1615 def report_download_webpage(self, video_id):
1616 """Report webpage download."""
331ce0a0 1617 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
49c0028a 1618
1619 def report_extraction(self, video_id):
1620 """Report information extraction."""
331ce0a0 1621 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
49c0028a 1622
1623 def _real_initialize(self):
1624 return
1625
1626 def _real_extract(self, url):
1627 # Extract id from URL
1628 mobj = re.match(self._VALID_URL, url)
1629 if mobj is None:
1630 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1631 return
1632
df372a65 1633 # At this point we have a new video
9bf7fa52 1634 self._downloader.increment_downloads()
49c0028a 1635 video_id = mobj.group(1)
1636
1637 video_extension = 'mp4'
1638
1639 # Retrieve video webpage to extract further information
490fd7ae 1640 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
49c0028a 1641 try:
1642 self.report_download_webpage(video_id)
1643 webpage = urllib2.urlopen(request).read()
1644 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1645 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1646 return
1647
1648 # Extract URL, uploader, and title from webpage
1649 self.report_extraction(video_id)
490fd7ae
RG
1650 mobj = re.search(r"download_url:'([^']+)'", webpage)
1651 if mobj is None:
1652 video_extension = 'flv'
1653 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
49c0028a 1654 if mobj is None:
1655 self._downloader.trouble(u'ERROR: unable to extract media URL')
1656 return
1657 mediaURL = urllib.unquote(mobj.group(1))
1658 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1659 mediaURL = mediaURL.replace('\\x26', '\x26')
1660
1661 video_url = mediaURL
1662
1663 mobj = re.search(r'<title>(.*)</title>', webpage)
1664 if mobj is None:
1665 self._downloader.trouble(u'ERROR: unable to extract title')
1666 return
1667 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1668 video_title = sanitize_title(video_title)
31cbdaaf 1669 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1670
7e58d568
RG
1671 # Extract video description
1672 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1673 if mobj is None:
1674 self._downloader.trouble(u'ERROR: unable to extract video description')
1675 return
1676 video_description = mobj.group(1).decode('utf-8')
1677 if not video_description:
1678 video_description = 'No description available.'
1679
1680 # Extract video thumbnail
1681 if self._downloader.params.get('forcethumbnail', False):
1682 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1683 try:
1684 webpage = urllib2.urlopen(request).read()
1685 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1686 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1687 return
1688 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1689 if mobj is None:
1690 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1691 return
1692 video_thumbnail = mobj.group(1)
1693 else: # we need something to pass to process_info
1694 video_thumbnail = ''
1695
49c0028a 1696 try:
1697 # Process video information
1698 self._downloader.process_info({
1699 'id': video_id.decode('utf-8'),
1700 'url': video_url.decode('utf-8'),
6ba562b0 1701 'uploader': u'NA',
138b11f3 1702 'upload_date': u'NA',
490fd7ae 1703 'title': video_title,
31cbdaaf 1704 'stitle': simple_title,
49c0028a 1705 'ext': video_extension.decode('utf-8'),
6ba562b0 1706 'format': u'NA',
e616ec0c 1707 'player_url': None,
49c0028a 1708 })
73f4e7af 1709 except UnavailableVideoError:
09cc744c 1710 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 1711
1712
1713class PhotobucketIE(InfoExtractor):
1714 """Information extractor for photobucket.com."""
1715
1716 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
f3098c4d 1717 IE_NAME = u'photobucket'
49c0028a 1718
1719 def __init__(self, downloader=None):
1720 InfoExtractor.__init__(self, downloader)
1721
49c0028a 1722 def report_download_webpage(self, video_id):
1723 """Report webpage download."""
331ce0a0 1724 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
49c0028a 1725
1726 def report_extraction(self, video_id):
1727 """Report information extraction."""
331ce0a0 1728 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
49c0028a 1729
1730 def _real_initialize(self):
1731 return
1732
1733 def _real_extract(self, url):
1734 # Extract id from URL
1735 mobj = re.match(self._VALID_URL, url)
1736 if mobj is None:
1737 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1738 return
1739
df372a65 1740 # At this point we have a new video
9bf7fa52 1741 self._downloader.increment_downloads()
49c0028a 1742 video_id = mobj.group(1)
1743
1744 video_extension = 'flv'
1745
1746 # Retrieve video webpage to extract further information
1747 request = urllib2.Request(url)
1748 try:
1749 self.report_download_webpage(video_id)
1750 webpage = urllib2.urlopen(request).read()
1751 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1752 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1753 return
1754
1755 # Extract URL, uploader, and title from webpage
1756 self.report_extraction(video_id)
1757 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1758 if mobj is None:
1759 self._downloader.trouble(u'ERROR: unable to extract media URL')
1760 return
1761 mediaURL = urllib.unquote(mobj.group(1))
1762
1763 video_url = mediaURL
1764
1765 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1766 if mobj is None:
1767 self._downloader.trouble(u'ERROR: unable to extract title')
1768 return
1769 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1770 video_title = sanitize_title(video_title)
31cbdaaf 1771 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1772
1773 video_uploader = mobj.group(2).decode('utf-8')
1774
1775 try:
1776 # Process video information
1777 self._downloader.process_info({
1778 'id': video_id.decode('utf-8'),
1779 'url': video_url.decode('utf-8'),
490fd7ae 1780 'uploader': video_uploader,
138b11f3 1781 'upload_date': u'NA',
490fd7ae 1782 'title': video_title,
31cbdaaf 1783 'stitle': simple_title,
490fd7ae 1784 'ext': video_extension.decode('utf-8'),
6ba562b0 1785 'format': u'NA',
e616ec0c 1786 'player_url': None,
490fd7ae 1787 })
73f4e7af 1788 except UnavailableVideoError:
09cc744c 1789 self._downloader.trouble(u'\nERROR: unable to download video')
490fd7ae
RG
1790
1791
61945318
RG
1792class YahooIE(InfoExtractor):
1793 """Information extractor for video.yahoo.com."""
1794
1795 # _VALID_URL matches all Yahoo! Video URLs
1796 # _VPAGE_URL matches only the extractable '/watch/' URLs
1797 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1798 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
f3098c4d 1799 IE_NAME = u'video.yahoo'
61945318
RG
1800
1801 def __init__(self, downloader=None):
1802 InfoExtractor.__init__(self, downloader)
1803
61945318
RG
1804 def report_download_webpage(self, video_id):
1805 """Report webpage download."""
331ce0a0 1806 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
61945318
RG
1807
1808 def report_extraction(self, video_id):
1809 """Report information extraction."""
331ce0a0 1810 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
61945318
RG
1811
1812 def _real_initialize(self):
1813 return
1814
df372a65 1815 def _real_extract(self, url, new_video=True):
61945318
RG
1816 # Extract ID from URL
1817 mobj = re.match(self._VALID_URL, url)
1818 if mobj is None:
1819 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1820 return
1821
df372a65 1822 # At this point we have a new video
9bf7fa52 1823 self._downloader.increment_downloads()
61945318
RG
1824 video_id = mobj.group(2)
1825 video_extension = 'flv'
1826
1827 # Rewrite valid but non-extractable URLs as
1828 # extractable English language /watch/ URLs
1829 if re.match(self._VPAGE_URL, url) is None:
1830 request = urllib2.Request(url)
1831 try:
1832 webpage = urllib2.urlopen(request).read()
1833 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1834 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1835 return
1836
1837 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1838 if mobj is None:
1839 self._downloader.trouble(u'ERROR: Unable to extract id field')
1840 return
1841 yahoo_id = mobj.group(1)
1842
1843 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1844 if mobj is None:
1845 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1846 return
1847 yahoo_vid = mobj.group(1)
1848
1849 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
df372a65 1850 return self._real_extract(url, new_video=False)
61945318
RG
1851
1852 # Retrieve video webpage to extract further information
1853 request = urllib2.Request(url)
1854 try:
1855 self.report_download_webpage(video_id)
1856 webpage = urllib2.urlopen(request).read()
1857 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1858 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1859 return
1860
1861 # Extract uploader and title from webpage
1862 self.report_extraction(video_id)
1863 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1864 if mobj is None:
1865 self._downloader.trouble(u'ERROR: unable to extract video title')
1866 return
1867 video_title = mobj.group(1).decode('utf-8')
1868 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1869
1870 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1871 if mobj is None:
1872 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1873 return
1874 video_uploader = mobj.group(1).decode('utf-8')
1875
7e58d568
RG
1876 # Extract video thumbnail
1877 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1878 if mobj is None:
1879 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1880 return
1881 video_thumbnail = mobj.group(1).decode('utf-8')
1882
1883 # Extract video description
1884 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1885 if mobj is None:
1886 self._downloader.trouble(u'ERROR: unable to extract video description')
1887 return
1888 video_description = mobj.group(1).decode('utf-8')
c0a10ca8
F
1889 if not video_description:
1890 video_description = 'No description available.'
7e58d568 1891
61945318
RG
1892 # Extract video height and width
1893 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1894 if mobj is None:
1895 self._downloader.trouble(u'ERROR: unable to extract video height')
1896 return
1897 yv_video_height = mobj.group(1)
1898
1899 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1900 if mobj is None:
1901 self._downloader.trouble(u'ERROR: unable to extract video width')
1902 return
1903 yv_video_width = mobj.group(1)
1904
1905 # Retrieve video playlist to extract media URL
1906 # I'm not completely sure what all these options are, but we
1907 # seem to need most of them, otherwise the server sends a 401.
1908 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1909 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1910 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
c0a10ca8
F
1911 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1912 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
61945318
RG
1913 try:
1914 self.report_download_webpage(video_id)
1915 webpage = urllib2.urlopen(request).read()
1916 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1917 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1918 return
1919
1920 # Extract media URL from playlist XML
1921 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1922 if mobj is None:
1923 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1924 return
1925 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1926 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1927
1928 try:
1929 # Process video information
1930 self._downloader.process_info({
1931 'id': video_id.decode('utf-8'),
1932 'url': video_url,
1933 'uploader': video_uploader,
138b11f3 1934 'upload_date': u'NA',
61945318
RG
1935 'title': video_title,
1936 'stitle': simple_title,
1937 'ext': video_extension.decode('utf-8'),
7e58d568
RG
1938 'thumbnail': video_thumbnail.decode('utf-8'),
1939 'description': video_description,
1940 'thumbnail': video_thumbnail,
e616ec0c 1941 'player_url': None,
61945318 1942 })
73f4e7af 1943 except UnavailableVideoError:
09cc744c 1944 self._downloader.trouble(u'\nERROR: unable to download video')
61945318
RG
1945
1946
92743d42
RB
1947class VimeoIE(InfoExtractor):
1948 """Information extractor for vimeo.com."""
1949
1950 # _VALID_URL matches Vimeo URLs
44c636df 1951 _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
f3098c4d 1952 IE_NAME = u'vimeo'
92743d42
RB
1953
1954 def __init__(self, downloader=None):
1955 InfoExtractor.__init__(self, downloader)
1956
92743d42
RB
1957 def report_download_webpage(self, video_id):
1958 """Report webpage download."""
0ecedbdb 1959 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
92743d42
RB
1960
1961 def report_extraction(self, video_id):
1962 """Report information extraction."""
0ecedbdb 1963 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
92743d42
RB
1964
1965 def _real_initialize(self):
1966 return
1967
1968 def _real_extract(self, url, new_video=True):
1969 # Extract ID from URL
1970 mobj = re.match(self._VALID_URL, url)
1971 if mobj is None:
1972 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1973 return
1974
1975 # At this point we have a new video
1976 self._downloader.increment_downloads()
1977 video_id = mobj.group(1)
92743d42
RB
1978
1979 # Retrieve video webpage to extract further information
1980 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
1981 try:
1982 self.report_download_webpage(video_id)
1983 webpage = urllib2.urlopen(request).read()
1984 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1985 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1986 return
1987
f24c674b
RB
1988 # Now we begin extracting as much information as we can from what we
1989 # retrieved. First we extract the information common to all extractors,
1990 # and latter we extract those that are Vimeo specific.
92743d42 1991 self.report_extraction(video_id)
f24c674b
RB
1992
1993 # Extract title
c5a088d3 1994 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
92743d42
RB
1995 if mobj is None:
1996 self._downloader.trouble(u'ERROR: unable to extract video title')
1997 return
1998 video_title = mobj.group(1).decode('utf-8')
1999 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2000
f24c674b 2001 # Extract uploader
c5a088d3 2002 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
92743d42
RB
2003 if mobj is None:
2004 self._downloader.trouble(u'ERROR: unable to extract video uploader')
2005 return
2006 video_uploader = mobj.group(1).decode('utf-8')
2007
2008 # Extract video thumbnail
c5a088d3 2009 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
92743d42
RB
2010 if mobj is None:
2011 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
2012 return
2013 video_thumbnail = mobj.group(1).decode('utf-8')
2014
2015 # # Extract video description
2016 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
2017 # if mobj is None:
2018 # self._downloader.trouble(u'ERROR: unable to extract video description')
2019 # return
2020 # video_description = mobj.group(1).decode('utf-8')
2021 # if not video_description: video_description = 'No description available.'
2022 video_description = 'Foo.'
2023
f24c674b 2024 # Vimeo specific: extract request signature
c5a088d3 2025 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
92743d42
RB
2026 if mobj is None:
2027 self._downloader.trouble(u'ERROR: unable to extract request signature')
2028 return
2029 sig = mobj.group(1).decode('utf-8')
2030
f24c674b 2031 # Vimeo specific: Extract request signature expiration
c5a088d3 2032 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
92743d42
RB
2033 if mobj is None:
2034 self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2035 return
2036 sig_exp = mobj.group(1).decode('utf-8')
2037
2038 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
2039
2040 try:
2041 # Process video information
2042 self._downloader.process_info({
2043 'id': video_id.decode('utf-8'),
2044 'url': video_url,
2045 'uploader': video_uploader,
2046 'upload_date': u'NA',
2047 'title': video_title,
2048 'stitle': simple_title,
2fc31a48 2049 'ext': u'mp4',
92743d42
RB
2050 'thumbnail': video_thumbnail.decode('utf-8'),
2051 'description': video_description,
2052 'thumbnail': video_thumbnail,
2053 'description': video_description,
2054 'player_url': None,
2055 })
2056 except UnavailableVideoError:
2057 self._downloader.trouble(u'ERROR: unable to download video')
2058
2059
490fd7ae
RG
2060class GenericIE(InfoExtractor):
2061 """Generic last-resort information extractor."""
2062
f3098c4d
PH
2063 _VALID_URL = r'.*'
2064 IE_NAME = u'generic'
bdb3f7a7 2065
490fd7ae
RG
2066 def __init__(self, downloader=None):
2067 InfoExtractor.__init__(self, downloader)
2068
490fd7ae
RG
2069 def report_download_webpage(self, video_id):
2070 """Report webpage download."""
331ce0a0
RG
2071 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2072 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
490fd7ae
RG
2073
2074 def report_extraction(self, video_id):
2075 """Report information extraction."""
331ce0a0 2076 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
490fd7ae
RG
2077
2078 def _real_initialize(self):
2079 return
2080
2081 def _real_extract(self, url):
df372a65 2082 # At this point we have a new video
9bf7fa52 2083 self._downloader.increment_downloads()
df372a65 2084
490fd7ae
RG
2085 video_id = url.split('/')[-1]
2086 request = urllib2.Request(url)
2087 try:
2088 self.report_download_webpage(video_id)
2089 webpage = urllib2.urlopen(request).read()
2090 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2091 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2092 return
2093 except ValueError, err:
2094 # since this is the last-resort InfoExtractor, if
2095 # this error is thrown, it'll be thrown here
2096 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2097 return
2098
a9806fd8 2099 self.report_extraction(video_id)
490fd7ae
RG
2100 # Start with something easy: JW Player in SWFObject
2101 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2102 if mobj is None:
2103 # Broaden the search a little bit
2104 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2105 if mobj is None:
2106 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2107 return
2108
2109 # It's possible that one of the regexes
2110 # matched, but returned an empty group:
2111 if mobj.group(1) is None:
2112 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2113 return
2114
2115 video_url = urllib.unquote(mobj.group(1))
c0a10ca8 2116 video_id = os.path.basename(video_url)
490fd7ae
RG
2117
2118 # here's a fun little line of code for you:
2119 video_extension = os.path.splitext(video_id)[1][1:]
c0a10ca8 2120 video_id = os.path.splitext(video_id)[0]
490fd7ae
RG
2121
2122 # it's tempting to parse this further, but you would
2123 # have to take into account all the variations like
2124 # Video Title - Site Name
2125 # Site Name | Video Title
2126 # Video Title - Tagline | Site Name
2127 # and so on and so forth; it's just not practical
2128 mobj = re.search(r'<title>(.*)</title>', webpage)
2129 if mobj is None:
2130 self._downloader.trouble(u'ERROR: unable to extract title')
2131 return
2132 video_title = mobj.group(1).decode('utf-8')
2133 video_title = sanitize_title(video_title)
31cbdaaf 2134 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
490fd7ae
RG
2135
2136 # video uploader is domain name
2137 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2138 if mobj is None:
2139 self._downloader.trouble(u'ERROR: unable to extract title')
2140 return
2141 video_uploader = mobj.group(1).decode('utf-8')
2142
2143 try:
2144 # Process video information
2145 self._downloader.process_info({
2146 'id': video_id.decode('utf-8'),
2147 'url': video_url.decode('utf-8'),
2148 'uploader': video_uploader,
138b11f3 2149 'upload_date': u'NA',
490fd7ae 2150 'title': video_title,
31cbdaaf 2151 'stitle': simple_title,
49c0028a 2152 'ext': video_extension.decode('utf-8'),
6ba562b0 2153 'format': u'NA',
e616ec0c 2154 'player_url': None,
49c0028a 2155 })
73f4e7af 2156 except UnavailableVideoError, err:
09cc744c 2157 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 2158
2159
25af2bce
RG
2160class YoutubeSearchIE(InfoExtractor):
2161 """Information Extractor for YouTube search queries."""
bdb3f7a7 2162 _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
25af2bce
RG
2163 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2164 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
304a4d85 2165 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
25af2bce 2166 _youtube_ie = None
fd9288c3 2167 _max_youtube_results = 1000
f3098c4d 2168 IE_NAME = u'youtube:search'
25af2bce 2169
f995f712 2170 def __init__(self, youtube_ie, downloader=None):
25af2bce
RG
2171 InfoExtractor.__init__(self, downloader)
2172 self._youtube_ie = youtube_ie
d3975459 2173
25af2bce
RG
2174 def report_download_page(self, query, pagenum):
2175 """Report attempt to download playlist page with given number."""
490fd7ae 2176 query = query.decode(preferredencoding())
331ce0a0 2177 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
25af2bce
RG
2178
2179 def _real_initialize(self):
2180 self._youtube_ie.initialize()
d3975459 2181
25af2bce 2182 def _real_extract(self, query):
bdb3f7a7 2183 mobj = re.match(self._VALID_URL, query)
25af2bce 2184 if mobj is None:
147753eb 2185 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
6f21f686 2186 return
25af2bce
RG
2187
2188 prefix, query = query.split(':')
2189 prefix = prefix[8:]
c0a10ca8 2190 query = query.encode('utf-8')
f995f712 2191 if prefix == '':
6f21f686
RG
2192 self._download_n_results(query, 1)
2193 return
f995f712 2194 elif prefix == 'all':
6f21f686
RG
2195 self._download_n_results(query, self._max_youtube_results)
2196 return
f995f712 2197 else:
25af2bce 2198 try:
e1f18b8a 2199 n = long(prefix)
25af2bce 2200 if n <= 0:
147753eb 2201 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
6f21f686 2202 return
257453b9 2203 elif n > self._max_youtube_results:
c0a10ca8 2204 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
257453b9 2205 n = self._max_youtube_results
6f21f686
RG
2206 self._download_n_results(query, n)
2207 return
e1f18b8a 2208 except ValueError: # parsing prefix as integer fails
6f21f686
RG
2209 self._download_n_results(query, 1)
2210 return
25af2bce
RG
2211
2212 def _download_n_results(self, query, n):
2213 """Downloads a specified number of results for a query"""
2214
2215 video_ids = []
2216 already_seen = set()
2217 pagenum = 1
2218
2219 while True:
2220 self.report_download_page(query, pagenum)
a9633f14 2221 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2222 request = urllib2.Request(result_url)
25af2bce
RG
2223 try:
2224 page = urllib2.urlopen(request).read()
2225 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2226 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2227 return
25af2bce
RG
2228
2229 # Extract video identifiers
2230 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2231 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2232 if video_id not in already_seen:
2233 video_ids.append(video_id)
2234 already_seen.add(video_id)
2235 if len(video_ids) == n:
2236 # Specified n videos reached
25af2bce 2237 for id in video_ids:
6f21f686
RG
2238 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2239 return
25af2bce 2240
304a4d85 2241 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
25af2bce 2242 for id in video_ids:
6f21f686
RG
2243 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2244 return
25af2bce
RG
2245
2246 pagenum = pagenum + 1
2247
c0a10ca8 2248
7e58d568
RG
2249class GoogleSearchIE(InfoExtractor):
2250 """Information Extractor for Google Video search queries."""
bdb3f7a7 2251 _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
7e58d568
RG
2252 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2253 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2254 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2255 _google_ie = None
2256 _max_google_results = 1000
f3098c4d 2257 IE_NAME = u'video.google:search'
7e58d568
RG
2258
2259 def __init__(self, google_ie, downloader=None):
2260 InfoExtractor.__init__(self, downloader)
2261 self._google_ie = google_ie
d3975459 2262
7e58d568
RG
2263 def report_download_page(self, query, pagenum):
2264 """Report attempt to download playlist page with given number."""
2265 query = query.decode(preferredencoding())
331ce0a0 2266 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2267
2268 def _real_initialize(self):
2269 self._google_ie.initialize()
d3975459 2270
7e58d568 2271 def _real_extract(self, query):
bdb3f7a7 2272 mobj = re.match(self._VALID_URL, query)
7e58d568
RG
2273 if mobj is None:
2274 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2275 return
2276
2277 prefix, query = query.split(':')
2278 prefix = prefix[8:]
c0a10ca8 2279 query = query.encode('utf-8')
7e58d568
RG
2280 if prefix == '':
2281 self._download_n_results(query, 1)
2282 return
2283 elif prefix == 'all':
2284 self._download_n_results(query, self._max_google_results)
2285 return
2286 else:
2287 try:
2288 n = long(prefix)
2289 if n <= 0:
2290 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2291 return
2292 elif n > self._max_google_results:
c0a10ca8 2293 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
7e58d568
RG
2294 n = self._max_google_results
2295 self._download_n_results(query, n)
2296 return
2297 except ValueError: # parsing prefix as integer fails
2298 self._download_n_results(query, 1)
2299 return
2300
2301 def _download_n_results(self, query, n):
2302 """Downloads a specified number of results for a query"""
2303
2304 video_ids = []
2305 already_seen = set()
2306 pagenum = 1
2307
2308 while True:
2309 self.report_download_page(query, pagenum)
2310 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2311 request = urllib2.Request(result_url)
7e58d568
RG
2312 try:
2313 page = urllib2.urlopen(request).read()
2314 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2315 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2316 return
2317
2318 # Extract video identifiers
2319 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2320 video_id = mobj.group(1)
2321 if video_id not in already_seen:
2322 video_ids.append(video_id)
2323 already_seen.add(video_id)
2324 if len(video_ids) == n:
2325 # Specified n videos reached
2326 for id in video_ids:
2327 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2328 return
2329
2330 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2331 for id in video_ids:
2332 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2333 return
2334
2335 pagenum = pagenum + 1
2336
c0a10ca8 2337
7e58d568
RG
2338class YahooSearchIE(InfoExtractor):
2339 """Information Extractor for Yahoo! Video search queries."""
bdb3f7a7 2340 _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
7e58d568
RG
2341 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2342 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2343 _MORE_PAGES_INDICATOR = r'\s*Next'
2344 _yahoo_ie = None
2345 _max_yahoo_results = 1000
f3098c4d 2346 IE_NAME = u'video.yahoo:search'
7e58d568
RG
2347
2348 def __init__(self, yahoo_ie, downloader=None):
2349 InfoExtractor.__init__(self, downloader)
2350 self._yahoo_ie = yahoo_ie
d3975459 2351
7e58d568
RG
2352 def report_download_page(self, query, pagenum):
2353 """Report attempt to download playlist page with given number."""
2354 query = query.decode(preferredencoding())
331ce0a0 2355 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2356
2357 def _real_initialize(self):
2358 self._yahoo_ie.initialize()
d3975459 2359
7e58d568 2360 def _real_extract(self, query):
bdb3f7a7 2361 mobj = re.match(self._VALID_URL, query)
7e58d568
RG
2362 if mobj is None:
2363 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2364 return
2365
2366 prefix, query = query.split(':')
2367 prefix = prefix[8:]
c0a10ca8 2368 query = query.encode('utf-8')
7e58d568
RG
2369 if prefix == '':
2370 self._download_n_results(query, 1)
2371 return
2372 elif prefix == 'all':
2373 self._download_n_results(query, self._max_yahoo_results)
2374 return
2375 else:
2376 try:
2377 n = long(prefix)
2378 if n <= 0:
2379 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2380 return
2381 elif n > self._max_yahoo_results:
c0a10ca8 2382 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
7e58d568
RG
2383 n = self._max_yahoo_results
2384 self._download_n_results(query, n)
2385 return
2386 except ValueError: # parsing prefix as integer fails
2387 self._download_n_results(query, 1)
2388 return
2389
2390 def _download_n_results(self, query, n):
2391 """Downloads a specified number of results for a query"""
2392
2393 video_ids = []
2394 already_seen = set()
2395 pagenum = 1
2396
2397 while True:
2398 self.report_download_page(query, pagenum)
2399 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2400 request = urllib2.Request(result_url)
7e58d568
RG
2401 try:
2402 page = urllib2.urlopen(request).read()
2403 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2404 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2405 return
2406
2407 # Extract video identifiers
2408 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2409 video_id = mobj.group(1)
2410 if video_id not in already_seen:
2411 video_ids.append(video_id)
2412 already_seen.add(video_id)
2413 if len(video_ids) == n:
2414 # Specified n videos reached
2415 for id in video_ids:
2416 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2417 return
2418
2419 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2420 for id in video_ids:
2421 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2422 return
2423
2424 pagenum = pagenum + 1
2425
c0a10ca8 2426
0c2dc87d
RG
2427class YoutubePlaylistIE(InfoExtractor):
2428 """Information Extractor for YouTube playlists."""
2429
2152ee86 2430 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
f74e22ae 2431 _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
0c2dc87d 2432 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
ce5cafea 2433 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
0c2dc87d 2434 _youtube_ie = None
f3098c4d 2435 IE_NAME = u'youtube:playlist'
0c2dc87d
RG
2436
2437 def __init__(self, youtube_ie, downloader=None):
2438 InfoExtractor.__init__(self, downloader)
2439 self._youtube_ie = youtube_ie
d3975459 2440
0c2dc87d
RG
2441 def report_download_page(self, playlist_id, pagenum):
2442 """Report attempt to download playlist page with given number."""
331ce0a0 2443 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
0c2dc87d
RG
2444
2445 def _real_initialize(self):
2446 self._youtube_ie.initialize()
d3975459 2447
0c2dc87d
RG
2448 def _real_extract(self, url):
2449 # Extract playlist id
2450 mobj = re.match(self._VALID_URL, url)
2451 if mobj is None:
147753eb 2452 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
6f21f686 2453 return
0c2dc87d 2454
d119b54d
RG
2455 # Single video case
2456 if mobj.group(3) is not None:
2457 self._youtube_ie.extract(mobj.group(3))
2458 return
2459
0c2dc87d 2460 # Download playlist pages
f74e22ae
GI
2461 # prefix is 'p' as default for playlists but there are other types that need extra care
2462 playlist_prefix = mobj.group(1)
2463 if playlist_prefix == 'a':
2464 playlist_access = 'artist'
2465 else:
7cc3c6fd 2466 playlist_prefix = 'p'
f74e22ae
GI
2467 playlist_access = 'view_play_list'
2468 playlist_id = mobj.group(2)
0c2dc87d
RG
2469 video_ids = []
2470 pagenum = 1
2471
2472 while True:
2473 self.report_download_page(playlist_id, pagenum)
f74e22ae 2474 request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
0c2dc87d
RG
2475 try:
2476 page = urllib2.urlopen(request).read()
2477 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2478 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2479 return
0c2dc87d
RG
2480
2481 # Extract video identifiers
27d98b6e 2482 ids_in_page = []
0c2dc87d 2483 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
27d98b6e
RG
2484 if mobj.group(1) not in ids_in_page:
2485 ids_in_page.append(mobj.group(1))
2486 video_ids.extend(ids_in_page)
0c2dc87d 2487
ce5cafea 2488 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
0c2dc87d
RG
2489 break
2490 pagenum = pagenum + 1
2491
8cc44341
RG
2492 playliststart = self._downloader.params.get('playliststart', 1) - 1
2493 playlistend = self._downloader.params.get('playlistend', -1)
2494 video_ids = video_ids[playliststart:playlistend]
2495
0c2dc87d 2496 for id in video_ids:
6f21f686
RG
2497 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2498 return
0c2dc87d 2499
c0a10ca8 2500
c39c05cd
A
2501class YoutubeUserIE(InfoExtractor):
2502 """Information Extractor for YouTube users."""
2503
5aba6ea4 2504 _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
c39c05cd 2505 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
5aba6ea4
RG
2506 _GDATA_PAGE_SIZE = 50
2507 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2508 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
c39c05cd 2509 _youtube_ie = None
f3098c4d 2510 IE_NAME = u'youtube:user'
c39c05cd
A
2511
2512 def __init__(self, youtube_ie, downloader=None):
2513 InfoExtractor.__init__(self, downloader)
2514 self._youtube_ie = youtube_ie
d3975459 2515
5aba6ea4 2516 def report_download_page(self, username, start_index):
c39c05cd 2517 """Report attempt to download user page."""
5aba6ea4 2518 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
c0a10ca8 2519 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
c39c05cd
A
2520
2521 def _real_initialize(self):
2522 self._youtube_ie.initialize()
d3975459 2523
c39c05cd
A
2524 def _real_extract(self, url):
2525 # Extract username
2526 mobj = re.match(self._VALID_URL, url)
2527 if mobj is None:
2528 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2529 return
2530
c39c05cd 2531 username = mobj.group(1)
5aba6ea4
RG
2532
2533 # Download video ids using YouTube Data API. Result size per
2534 # query is limited (currently to 50 videos) so we need to query
2535 # page by page until there are no video ids - it means we got
2536 # all of them.
2537
c39c05cd 2538 video_ids = []
5aba6ea4 2539 pagenum = 0
c39c05cd 2540
5aba6ea4
RG
2541 while True:
2542 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2543 self.report_download_page(username, start_index)
c39c05cd 2544
5aba6ea4 2545 request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
c39c05cd 2546
5aba6ea4
RG
2547 try:
2548 page = urllib2.urlopen(request).read()
2549 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2550 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2551 return
c39c05cd 2552
5aba6ea4
RG
2553 # Extract video identifiers
2554 ids_in_page = []
2555
2556 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2557 if mobj.group(1) not in ids_in_page:
2558 ids_in_page.append(mobj.group(1))
2559
2560 video_ids.extend(ids_in_page)
2561
2562 # A little optimization - if current page is not
2563 # "full", ie. does not contain PAGE_SIZE video ids then
2564 # we can assume that this page is the last one - there
2565 # are no more ids on further pages - no need to query
2566 # again.
2567
2568 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2569 break
2570
2571 pagenum += 1
2572
2573 all_ids_count = len(video_ids)
8cc44341
RG
2574 playliststart = self._downloader.params.get('playliststart', 1) - 1
2575 playlistend = self._downloader.params.get('playlistend', -1)
204c9398 2576
5aba6ea4
RG
2577 if playlistend == -1:
2578 video_ids = video_ids[playliststart:]
2579 else:
2580 video_ids = video_ids[playliststart:playlistend]
7a9054ec 2581
5aba6ea4 2582 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
c0a10ca8 2583 (username, all_ids_count, len(video_ids)))
5aba6ea4
RG
2584
2585 for video_id in video_ids:
2586 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2587
c39c05cd 2588
27179cfd
VV
2589class DepositFilesIE(InfoExtractor):
2590 """Information extractor for depositfiles.com"""
2591
2592 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
f3098c4d 2593 IE_NAME = u'DepositFiles'
27179cfd
VV
2594
2595 def __init__(self, downloader=None):
2596 InfoExtractor.__init__(self, downloader)
2597
27179cfd
VV
2598 def report_download_webpage(self, file_id):
2599 """Report webpage download."""
2600 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2601
2602 def report_extraction(self, file_id):
2603 """Report information extraction."""
2604 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2605
2606 def _real_initialize(self):
2607 return
2608
2609 def _real_extract(self, url):
2610 # At this point we have a new file
2611 self._downloader.increment_downloads()
2612
2613 file_id = url.split('/')[-1]
2614 # Rebuild url in english locale
2615 url = 'http://depositfiles.com/en/files/' + file_id
2616
2617 # Retrieve file webpage with 'Free download' button pressed
2618 free_download_indication = { 'gateway_result' : '1' }
1987c232 2619 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
27179cfd
VV
2620 try:
2621 self.report_download_webpage(file_id)
2622 webpage = urllib2.urlopen(request).read()
2623 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2624 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2625 return
2626
2627 # Search for the real file URL
2628 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2629 if (mobj is None) or (mobj.group(1) is None):
2630 # Try to figure out reason of the error.
2631 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2632 if (mobj is not None) and (mobj.group(1) is not None):
2633 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2634 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2635 else:
2636 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2637 return
2638
2639 file_url = mobj.group(1)
2640 file_extension = os.path.splitext(file_url)[1][1:]
2641
2642 # Search for file title
2643 mobj = re.search(r'<b title="(.*?)">', webpage)
2644 if mobj is None:
2645 self._downloader.trouble(u'ERROR: unable to extract title')
2646 return
2647 file_title = mobj.group(1).decode('utf-8')
2648
2649 try:
2650 # Process file information
2651 self._downloader.process_info({
2652 'id': file_id.decode('utf-8'),
2653 'url': file_url.decode('utf-8'),
2654 'uploader': u'NA',
2655 'upload_date': u'NA',
2656 'title': file_title,
2657 'stitle': file_title,
2658 'ext': file_extension.decode('utf-8'),
2659 'format': u'NA',
2660 'player_url': None,
2661 })
2662 except UnavailableVideoError, err:
2663 self._downloader.trouble(u'ERROR: unable to download file')
2664
c0a10ca8 2665
9f5f9602
GI
2666class FacebookIE(InfoExtractor):
2667 """Information Extractor for Facebook"""
2668
2669 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2670 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2671 _NETRC_MACHINE = 'facebook'
2672 _available_formats = ['highqual', 'lowqual']
2673 _video_extensions = {
2674 'highqual': 'mp4',
2675 'lowqual': 'mp4',
2676 }
f3098c4d 2677 IE_NAME = u'facebook'
9f5f9602
GI
2678
2679 def __init__(self, downloader=None):
2680 InfoExtractor.__init__(self, downloader)
2681
9f5f9602
GI
2682 def _reporter(self, message):
2683 """Add header and report message."""
2684 self._downloader.to_screen(u'[facebook] %s' % message)
2685
2686 def report_login(self):
2687 """Report attempt to log in."""
2688 self._reporter(u'Logging in')
2689
2690 def report_video_webpage_download(self, video_id):
2691 """Report attempt to download video webpage."""
2692 self._reporter(u'%s: Downloading video webpage' % video_id)
2693
2694 def report_information_extraction(self, video_id):
2695 """Report attempt to extract video information."""
2696 self._reporter(u'%s: Extracting video information' % video_id)
2697
2698 def _parse_page(self, video_webpage):
2699 """Extract video information from page"""
2700 # General data
2701 data = {'title': r'class="video_title datawrap">(.*?)</',
2702 'description': r'<div class="datawrap">(.*?)</div>',
2703 'owner': r'\("video_owner_name", "(.*?)"\)',
2704 'upload_date': r'data-date="(.*?)"',
2705 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2706 }
2707 video_info = {}
2708 for piece in data.keys():
2709 mobj = re.search(data[piece], video_webpage)
2710 if mobj is not None:
2711 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2712
2713 # Video urls
2714 video_urls = {}
2715 for fmt in self._available_formats:
2716 mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2717 if mobj is not None:
2718 # URL is in a Javascript segment inside an escaped Unicode format within
2719 # the generally utf-8 page
2720 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2721 video_info['video_urls'] = video_urls
2722
2723 return video_info
2724
2725 def _real_initialize(self):
2726 if self._downloader is None:
2727 return
2728
2729 useremail = None
2730 password = None
2731 downloader_params = self._downloader.params
2732
2733 # Attempt to use provided username and password or .netrc data
2734 if downloader_params.get('username', None) is not None:
2735 useremail = downloader_params['username']
2736 password = downloader_params['password']
2737 elif downloader_params.get('usenetrc', False):
2738 try:
2739 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2740 if info is not None:
2741 useremail = info[0]
2742 password = info[2]
2743 else:
2744 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2745 except (IOError, netrc.NetrcParseError), err:
2746 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2747 return
2748
2749 if useremail is None:
2750 return
2751
2752 # Log in
2753 login_form = {
2754 'email': useremail,
2755 'pass': password,
2756 'login': 'Log+In'
2757 }
2758 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2759 try:
2760 self.report_login()
2761 login_results = urllib2.urlopen(request).read()
2762 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2763 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2764 return
2765 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2766 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2767 return
2768
2769 def _real_extract(self, url):
2770 mobj = re.match(self._VALID_URL, url)
2771 if mobj is None:
2772 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2773 return
2774 video_id = mobj.group('ID')
2775
2776 # Get video webpage
2777 self.report_video_webpage_download(video_id)
2778 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2779 try:
2780 page = urllib2.urlopen(request)
2781 video_webpage = page.read()
2782 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2783 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2784 return
2785
2786 # Start extracting information
2787 self.report_information_extraction(video_id)
2788
2789 # Extract information
2790 video_info = self._parse_page(video_webpage)
2791
2792 # uploader
2793 if 'owner' not in video_info:
2794 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2795 return
2796 video_uploader = video_info['owner']
2797
2798 # title
2799 if 'title' not in video_info:
2800 self._downloader.trouble(u'ERROR: unable to extract video title')
2801 return
2802 video_title = video_info['title']
2803 video_title = video_title.decode('utf-8')
2804 video_title = sanitize_title(video_title)
2805
2806 # simplified title
2807 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2808 simple_title = simple_title.strip(ur'_')
2809
2810 # thumbnail image
2811 if 'thumbnail' not in video_info:
2812 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2813 video_thumbnail = ''
2814 else:
2815 video_thumbnail = video_info['thumbnail']
2816
2817 # upload date
2818 upload_date = u'NA'
2819 if 'upload_date' in video_info:
2820 upload_time = video_info['upload_date']
2821 timetuple = email.utils.parsedate_tz(upload_time)
2822 if timetuple is not None:
2823 try:
2824 upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2825 except:
2826 pass
2827
2828 # description
8b95c387 2829 video_description = video_info.get('description', 'No description available.')
9f5f9602
GI
2830
2831 url_map = video_info['video_urls']
2832 if len(url_map.keys()) > 0:
2833 # Decide which formats to download
2834 req_format = self._downloader.params.get('format', None)
2835 format_limit = self._downloader.params.get('format_limit', None)
2836
2837 if format_limit is not None and format_limit in self._available_formats:
2838 format_list = self._available_formats[self._available_formats.index(format_limit):]
2839 else:
2840 format_list = self._available_formats
2841 existing_formats = [x for x in format_list if x in url_map]
2842 if len(existing_formats) == 0:
2843 self._downloader.trouble(u'ERROR: no known formats available for video')
2844 return
2845 if req_format is None:
2846 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
c52b01f3
K
2847 elif req_format == 'worst':
2848 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
9f5f9602
GI
2849 elif req_format == '-1':
2850 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2851 else:
2852 # Specific format
2853 if req_format not in url_map:
2854 self._downloader.trouble(u'ERROR: requested format not available')
2855 return
2856 video_url_list = [(req_format, url_map[req_format])] # Specific format
2857
2858 for format_param, video_real_url in video_url_list:
2859
2860 # At this point we have a new video
2861 self._downloader.increment_downloads()
2862
2863 # Extension
2864 video_extension = self._video_extensions.get(format_param, 'mp4')
2865
9f5f9602
GI
2866 try:
2867 # Process video information
2868 self._downloader.process_info({
2869 'id': video_id.decode('utf-8'),
2870 'url': video_real_url.decode('utf-8'),
2871 'uploader': video_uploader.decode('utf-8'),
2872 'upload_date': upload_date,
2873 'title': video_title,
2874 'stitle': simple_title,
2875 'ext': video_extension.decode('utf-8'),
2876 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2877 'thumbnail': video_thumbnail.decode('utf-8'),
2878 'description': video_description.decode('utf-8'),
2879 'player_url': None,
2880 })
2881 except UnavailableVideoError, err:
2882 self._downloader.trouble(u'\nERROR: unable to download video')
2883
7745f5d8
PH
2884class BlipTVIE(InfoExtractor):
2885 """Information extractor for blip.tv"""
2886
1cab2c6d 2887 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
7745f5d8 2888 _URL_EXT = r'^.*\.([a-z0-9]+)$'
f3098c4d 2889 IE_NAME = u'blip.tv'
7745f5d8 2890
7745f5d8
PH
2891 def report_extraction(self, file_id):
2892 """Report information extraction."""
aded78d9 2893 self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
7745f5d8
PH
2894
2895 def _simplify_title(self, title):
2896 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
2897 res = res.strip(ur'_')
2898 return res
2899
2900 def _real_extract(self, url):
2901 mobj = re.match(self._VALID_URL, url)
2902 if mobj is None:
2903 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2904 return
2905
1293ce58
PH
2906 if '?' in url:
2907 cchar = '&'
2908 else:
2909 cchar = '?'
2910 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
7745f5d8 2911 request = urllib2.Request(json_url)
aded78d9 2912 self.report_extraction(mobj.group(1))
7745f5d8
PH
2913 try:
2914 json_code = urllib2.urlopen(request).read()
2915 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2916 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2917 return
2918 try:
2919 json_data = json.loads(json_code)
1293ce58
PH
2920 if 'Post' in json_data:
2921 data = json_data['Post']
2922 else:
2923 data = json_data
7745f5d8
PH
2924
2925 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2926 video_url = data['media']['url']
2927 umobj = re.match(self._URL_EXT, video_url)
2928 if umobj is None:
2929 raise ValueError('Can not determine filename extension')
2930 ext = umobj.group(1)
2931
a1cab7ce
PH
2932 self._downloader.increment_downloads()
2933
7745f5d8
PH
2934 info = {
2935 'id': data['item_id'],
2936 'url': video_url,
2937 'uploader': data['display_name'],
2938 'upload_date': upload_date,
2939 'title': data['title'],
2940 'stitle': self._simplify_title(data['title']),
2941 'ext': ext,
2942 'format': data['media']['mimeType'],
2943 'thumbnail': data['thumbnailUrl'],
2944 'description': data['description'],
2945 'player_url': data['embedUrl']
2946 }
2947 except (ValueError,KeyError), err:
aded78d9 2948 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
7745f5d8
PH
2949 return
2950
2951 try:
2952 self._downloader.process_info(info)
2953 except UnavailableVideoError, err:
2954 self._downloader.trouble(u'\nERROR: unable to download video')
2955
2956
9b0a8bc1
PH
2957class MyVideoIE(InfoExtractor):
2958 """Information Extractor for myvideo.de."""
2959
2960 _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
f3098c4d 2961 IE_NAME = u'myvideo'
9b0a8bc1
PH
2962
2963 def __init__(self, downloader=None):
2964 InfoExtractor.__init__(self, downloader)
2965
9b0a8bc1
PH
2966 def report_download_webpage(self, video_id):
2967 """Report webpage download."""
2968 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
2969
2970 def report_extraction(self, video_id):
2971 """Report information extraction."""
2972 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
2973
2974 def _real_initialize(self):
2975 return
2976
2977 def _real_extract(self,url):
2978 mobj = re.match(self._VALID_URL, url)
2979 if mobj is None:
2980 self._download.trouble(u'ERROR: invalid URL: %s' % url)
2981 return
2982
2983 video_id = mobj.group(1)
2984 simple_title = mobj.group(2).decode('utf-8')
2985 # should actually not be necessary
2986 simple_title = sanitize_title(simple_title)
2987 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
2988
2989 # Get video webpage
2990 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
2991 try:
2992 self.report_download_webpage(video_id)
2993 webpage = urllib2.urlopen(request).read()
2994 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2995 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2996 return
2997
2998 self.report_extraction(video_id)
2999 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3000 webpage)
3001 if mobj is None:
3002 self._downloader.trouble(u'ERROR: unable to extract media URL')
3003 return
3004 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3005
3006 mobj = re.search('<title>([^<]+)</title>', webpage)
3007 if mobj is None:
3008 self._downloader.trouble(u'ERROR: unable to extract title')
3009 return
3010
3011 video_title = mobj.group(1)
3012 video_title = sanitize_title(video_title)
3013
3014 try:
3015 print(video_url)
3016 self._downloader.process_info({
3017 'id': video_id,
3018 'url': video_url,
3019 'uploader': u'NA',
3020 'upload_date': u'NA',
3021 'title': video_title,
3022 'stitle': simple_title,
3023 'ext': u'flv',
3024 'format': u'NA',
3025 'player_url': None,
3026 })
3027 except UnavailableVideoError:
3028 self._downloader.trouble(u'\nERROR: Unable to download video')
3029
c8e30044 3030class ComedyCentralIE(InfoExtractor):
f166bccc 3031 """Information extractor for The Daily Show and Colbert Report """
c8e30044 3032
f3098c4d
PH
3033 _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3034 IE_NAME = u'comedycentral'
c8e30044 3035
c8e30044
PH
3036 def report_extraction(self, episode_id):
3037 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3038
3039 def report_config_download(self, episode_id):
3040 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3041
b487ef08
PH
3042 def report_index_download(self, episode_id):
3043 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
3044
fedf9f39
PH
3045 def report_player_url(self, episode_id):
3046 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3047
c8e30044
PH
3048 def _simplify_title(self, title):
3049 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3050 res = res.strip(ur'_')
3051 return res
3052
3053 def _real_extract(self, url):
3054 mobj = re.match(self._VALID_URL, url)
3055 if mobj is None:
3056 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3057 return
f166bccc
PH
3058
3059 if mobj.group('shortname'):
3060 if mobj.group('shortname') in ('tds', 'thedailyshow'):
3061 url = 'http://www.thedailyshow.com/full-episodes/'
3062 else:
3063 url = 'http://www.colbertnation.com/full-episodes/'
3064 mobj = re.match(self._VALID_URL, url)
3065 assert mobj is not None
3066
3067 dlNewest = not mobj.group('episode')
3068 if dlNewest:
3069 epTitle = mobj.group('showname')
3070 else:
3071 epTitle = mobj.group('episode')
c8e30044
PH
3072
3073 req = urllib2.Request(url)
3074 self.report_extraction(epTitle)
3075 try:
f166bccc
PH
3076 htmlHandle = urllib2.urlopen(req)
3077 html = htmlHandle.read()
c8e30044
PH
3078 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3079 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3080 return
f166bccc
PH
3081 if dlNewest:
3082 url = htmlHandle.geturl()
3083 mobj = re.match(self._VALID_URL, url)
3084 if mobj is None:
3085 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3086 return
3087 if mobj.group('episode') == '':
3088 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3089 return
3090 epTitle = mobj.group('episode')
c8e30044 3091
b487ef08 3092 mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html)
c8e30044
PH
3093 if len(mMovieParams) == 0:
3094 self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3095 return
b487ef08
PH
3096
3097 playerUrl_raw = mMovieParams[0][0]
fedf9f39
PH
3098 self.report_player_url(epTitle)
3099 try:
b487ef08
PH
3100 urlHandle = urllib2.urlopen(playerUrl_raw)
3101 playerUrl = urlHandle.geturl()
3102 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3103 self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
3104 return
3105
3106 uri = mMovieParams[0][1]
3107 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
3108 self.report_index_download(epTitle)
3109 try:
3110 indexXml = urllib2.urlopen(indexUrl).read()
fedf9f39 3111 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
b487ef08 3112 self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
fedf9f39 3113 return
fedf9f39 3114
b487ef08
PH
3115 idoc = xml.etree.ElementTree.fromstring(indexXml)
3116 itemEls = idoc.findall('.//item')
3117 for itemEl in itemEls:
3118 mediaId = itemEl.findall('./guid')[0].text
3119 shortMediaId = mediaId.split(':')[-1]
3120 showId = mediaId.split(':')[-2].replace('.com', '')
3121 officialTitle = itemEl.findall('./title')[0].text
3122 officialDate = itemEl.findall('./pubDate')[0].text
3123
c8e30044
PH
3124 configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3125 urllib.urlencode({'uri': mediaId}))
3126 configReq = urllib2.Request(configUrl)
3127 self.report_config_download(epTitle)
3128 try:
3129 configXml = urllib2.urlopen(configReq).read()
3130 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3131 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3132 return
46c8c432 3133
c8e30044
PH
3134 cdoc = xml.etree.ElementTree.fromstring(configXml)
3135 turls = []
3136 for rendition in cdoc.findall('.//rendition'):
3137 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3138 turls.append(finfo)
3139
a88bc6bb 3140 if len(turls) == 0:
b487ef08 3141 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
a88bc6bb
PH
3142 continue
3143
c8e30044
PH
3144 # For now, just pick the highest bitrate
3145 format,video_url = turls[-1]
3146
3147 self._downloader.increment_downloads()
a88bc6bb 3148
b487ef08 3149 effTitle = showId + '-' + epTitle
c8e30044 3150 info = {
b487ef08 3151 'id': shortMediaId,
c8e30044 3152 'url': video_url,
b487ef08
PH
3153 'uploader': showId,
3154 'upload_date': officialDate,
a88bc6bb
PH
3155 'title': effTitle,
3156 'stitle': self._simplify_title(effTitle),
c8e30044
PH
3157 'ext': 'mp4',
3158 'format': format,
3159 'thumbnail': None,
b487ef08
PH
3160 'description': officialTitle,
3161 'player_url': playerUrl
c8e30044 3162 }
46c8c432 3163
c8e30044
PH
3164 try:
3165 self._downloader.process_info(info)
3166 except UnavailableVideoError, err:
b487ef08 3167 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
a88bc6bb 3168 continue
c8e30044
PH
3169
3170
f9c68787
PH
3171class EscapistIE(InfoExtractor):
3172 """Information extractor for The Escapist """
3173
3174 _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?].*$'
f3098c4d 3175 IE_NAME = u'escapist'
f9c68787 3176
f9c68787
PH
3177 def report_extraction(self, showName):
3178 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
3179
3180 def report_config_download(self, showName):
3181 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
3182
3183 def _simplify_title(self, title):
3184 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3185 res = res.strip(ur'_')
3186 return res
3187
3188 def _real_extract(self, url):
3189 htmlParser = HTMLParser.HTMLParser()
3190
3191 mobj = re.match(self._VALID_URL, url)
3192 if mobj is None:
3193 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3194 return
3195 showName = mobj.group('showname')
3196 videoId = mobj.group('episode')
3197
3198 self.report_extraction(showName)
3199 try:
3200 webPage = urllib2.urlopen(url).read()
3201 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3202 self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
3203 return
3204
3205 descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
3206 description = htmlParser.unescape(descMatch.group(1))
3207 imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
3208 imgUrl = htmlParser.unescape(imgMatch.group(1))
3209 playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
3210 playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
3211 configUrlMatch = re.search('config=(.*)$', playerUrl)
3212 configUrl = urllib2.unquote(configUrlMatch.group(1))
3213
3214 self.report_config_download(showName)
3215 try:
3216 configJSON = urllib2.urlopen(configUrl).read()
3217 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3218 self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
3219 return
3220
3221 # Technically, it's JavaScript, not JSON
3222 configJSON = configJSON.replace("'", '"')
3223
3224 try:
3225 config = json.loads(configJSON)
3226 except (ValueError,), err:
3227 self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
3228 return
3229
3230 playlist = config['playlist']
3231 videoUrl = playlist[1]['url']
3232
3233 self._downloader.increment_downloads()
3234 info = {
3235 'id': videoId,
3236 'url': videoUrl,
3237 'uploader': showName,
3238 'upload_date': None,
3239 'title': showName,
3240 'stitle': self._simplify_title(showName),
3241 'ext': 'flv',
3242 'format': 'flv',
3243 'thumbnail': imgUrl,
3244 'description': description,
3245 'player_url': playerUrl,
3246 }
3247
3248 try:
3249 self._downloader.process_info(info)
3250 except UnavailableVideoError, err:
3251 self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
3252
3253
3254
65cd34c5
RG
3255class PostProcessor(object):
3256 """Post Processor class.
3257
3258 PostProcessor objects can be added to downloaders with their
3259 add_post_processor() method. When the downloader has finished a
3260 successful download, it will take its internal chain of PostProcessors
3261 and start calling the run() method on each one of them, first with
3262 an initial argument and then with the returned value of the previous
3263 PostProcessor.
3264
3265 The chain will be stopped if one of them ever returns None or the end
3266 of the chain is reached.
3267
3268 PostProcessor objects follow a "mutual registration" process similar
3269 to InfoExtractor objects.
3270 """
3271
3272 _downloader = None
3273
3274 def __init__(self, downloader=None):
3275 self._downloader = downloader
3276
65cd34c5
RG
3277 def set_downloader(self, downloader):
3278 """Sets the downloader for this PP."""
3279 self._downloader = downloader
d3975459 3280
65cd34c5
RG
3281 def run(self, information):
3282 """Run the PostProcessor.
3283
3284 The "information" argument is a dictionary like the ones
2f11508a 3285 composed by InfoExtractors. The only difference is that this
65cd34c5
RG
3286 one has an extra field called "filepath" that points to the
3287 downloaded file.
3288
3289 When this method returns None, the postprocessing chain is
3290 stopped. However, this method may return an information
3291 dictionary that will be passed to the next postprocessing
3292 object in the chain. It can be the one it received after
3293 changing some fields.
3294
3295 In addition, this method may raise a PostProcessingError
3296 exception that will be taken into account by the downloader
3297 it was called from.
3298 """
3299 return information # by default, do nothing
d3975459 3300
c0a10ca8 3301
3072fab1
RG
3302class FFmpegExtractAudioPP(PostProcessor):
3303
c99dcbd2 3304 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
3072fab1
RG
3305 PostProcessor.__init__(self, downloader)
3306 if preferredcodec is None:
3307 preferredcodec = 'best'
3308 self._preferredcodec = preferredcodec
18b7f874 3309 self._preferredquality = preferredquality
3310 self._keepvideo = keepvideo
3072fab1
RG
3311
3312 @staticmethod
3313 def get_audio_codec(path):
da273188 3314 try:
2727dbf7
RG
3315 cmd = ['ffprobe', '-show_streams', '--', path]
3316 handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
da273188
RG
3317 output = handle.communicate()[0]
3318 if handle.wait() != 0:
3319 return None
3320 except (IOError, OSError):
3072fab1
RG
3321 return None
3322 audio_codec = None
3323 for line in output.split('\n'):
3324 if line.startswith('codec_name='):
3325 audio_codec = line.split('=')[1].strip()
3326 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
3327 return audio_codec
3328 return None
3329
3330 @staticmethod
3331 def run_ffmpeg(path, out_path, codec, more_opts):
3332 try:
2727dbf7
RG
3333 cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
3334 ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
3072fab1
RG
3335 return (ret == 0)
3336 except (IOError, OSError):
3337 return False
3338
3339 def run(self, information):
3340 path = information['filepath']
3341
3342 filecodec = self.get_audio_codec(path)
3343 if filecodec is None:
da273188 3344 self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
3072fab1
RG
3345 return None
3346
3347 more_opts = []
3348 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
3349 if filecodec == 'aac' or filecodec == 'mp3':
3350 # Lossless if possible
3351 acodec = 'copy'
3352 extension = filecodec
3353 if filecodec == 'aac':
3354 more_opts = ['-f', 'adts']
3355 else:
3356 # MP3 otherwise.
3357 acodec = 'libmp3lame'
3358 extension = 'mp3'
c99dcbd2
PH
3359 more_opts = []
3360 if self._preferredquality is not None:
3361 more_opts += ['-ab', self._preferredquality]
3072fab1
RG
3362 else:
3363 # We convert the audio (lossy)
3364 acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
3365 extension = self._preferredcodec
c99dcbd2
PH
3366 more_opts = []
3367 if self._preferredquality is not None:
3368 more_opts += ['-ab', self._preferredquality]
3072fab1
RG
3369 if self._preferredcodec == 'aac':
3370 more_opts += ['-f', 'adts']
3371
3372 (prefix, ext) = os.path.splitext(path)
3373 new_path = prefix + '.' + extension
3374 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
3375 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
3376
3377 if not status:
1bd92582 3378 self._downloader.to_stderr(u'WARNING: error running ffmpeg')
3072fab1
RG
3379 return None
3380
36597dc4
K
3381 # Try to update the date time for extracted audio file.
3382 if information.get('filetime') is not None:
3383 try:
3384 os.utime(new_path, (time.time(), information['filetime']))
3385 except:
3386 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
3387
18b7f874 3388 if not self._keepvideo:
3389 try:
3390 os.remove(path)
3391 except (IOError, OSError):
3392 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
3393 return None
3072fab1
RG
3394
3395 information['filepath'] = new_path
3396 return information
3397
5fb3df4a
GV
3398
3399def updateSelf(downloader, filename):
3400 ''' Update the program file with the latest version from the repository '''
3401 # Note: downloader only used for options
3402 if not os.access(filename, os.W_OK):
3403 sys.exit('ERROR: no write permissions on %s' % filename)
3404
d207e7cf 3405 downloader.to_screen('Updating to latest version...')
5fb3df4a 3406
4fa74b52 3407 try:
d207e7cf
PH
3408 try:
3409 urlh = urllib.urlopen(UPDATE_URL)
3410 newcontent = urlh.read()
3411 finally:
3412 urlh.close()
5fb3df4a
GV
3413 except (IOError, OSError), err:
3414 sys.exit('ERROR: unable to download latest version')
f9f1e798 3415
5fb3df4a 3416 try:
d207e7cf
PH
3417 outf = open(filename, 'wb')
3418 try:
3419 outf.write(newcontent)
3420 finally:
3421 outf.close()
5fb3df4a
GV
3422 except (IOError, OSError), err:
3423 sys.exit('ERROR: unable to overwrite current version')
4bec29ef 3424
d207e7cf 3425 downloader.to_screen('Updated youtube-dl. Restart to use the new version.')
80066952 3426
4f9f96f6
GV
3427def parseOpts():
3428 # Deferred imports
3429 import getpass
3430 import optparse
e7cf18cb 3431
4f9f96f6
GV
3432 def _format_option_string(option):
3433 ''' ('-o', '--option') -> -o, --format METAVAR'''
80066952 3434
4f9f96f6
GV
3435 opts = []
3436
3437 if option._short_opts: opts.append(option._short_opts[0])
3438 if option._long_opts: opts.append(option._long_opts[0])
3439 if len(opts) > 1: opts.insert(1, ', ')
3440
3441 if option.takes_value(): opts.append(' %s' % option.metavar)
3442
3443 return "".join(opts)
3444
6a4f0a11
GV
3445 def _find_term_columns():
3446 columns = os.environ.get('COLUMNS', None)
2c8d32de
PH
3447 if columns:
3448 return int(columns)
3449
4f2a5e06
PH
3450 try:
3451 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3452 out,err = sp.communicate()
eb0387a8 3453 return int(out.split()[1])
4f2a5e06
PH
3454 except:
3455 pass
2c8d32de 3456 return None
6a4f0a11 3457
51c8e53f
GV
3458 max_width = 80
3459 max_help_position = 80
3460
3461 # No need to wrap help messages if we're on a wide console
6a4f0a11 3462 columns = _find_term_columns()
51c8e53f
GV
3463 if columns: max_width = columns
3464
3465 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
4f9f96f6
GV
3466 fmt.format_option_strings = _format_option_string
3467
3468 kw = {
3469 'version' : __version__,
3470 'formatter' : fmt,
a2f7e3a5 3471 'usage' : '%prog [options] url [url...]',
4f9f96f6
GV
3472 'conflict_handler' : 'resolve',
3473 }
3474
3475 parser = optparse.OptionParser(**kw)
3476
3477 # option groups
3478 general = optparse.OptionGroup(parser, 'General Options')
20e91e83 3479 selection = optparse.OptionGroup(parser, 'Video Selection')
4f9f96f6
GV
3480 authentication = optparse.OptionGroup(parser, 'Authentication Options')
3481 video_format = optparse.OptionGroup(parser, 'Video Format Options')
3482 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
3483 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
3484 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
3485
3486 general.add_option('-h', '--help',
3487 action='help', help='print this help text and exit')
3488 general.add_option('-v', '--version',
3489 action='version', help='print program version and exit')
3490 general.add_option('-U', '--update',
e0e56865 3491 action='store_true', dest='update_self', help='update this program to latest version')
4f9f96f6
GV
3492 general.add_option('-i', '--ignore-errors',
3493 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
3494 general.add_option('-r', '--rate-limit',
3495 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
3496 general.add_option('-R', '--retries',
3497 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
4f9f96f6
GV
3498 general.add_option('--dump-user-agent',
3499 action='store_true', dest='dump_user_agent',
3500 help='display the current browser identification', default=False)
f3098c4d
PH
3501 general.add_option('--list-extractors',
3502 action='store_true', dest='list_extractors',
3503 help='List all supported extractors and the URLs they would handle', default=False)
4f9f96f6 3504
20e91e83
ABP
3505 selection.add_option('--playlist-start',
3506 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
3507 selection.add_option('--playlist-end',
3508 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
3509 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
3510 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
3511
4f9f96f6
GV
3512 authentication.add_option('-u', '--username',
3513 dest='username', metavar='USERNAME', help='account username')
3514 authentication.add_option('-p', '--password',
3515 dest='password', metavar='PASSWORD', help='account password')
3516 authentication.add_option('-n', '--netrc',
3517 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
3518
3519
3520 video_format.add_option('-f', '--format',
3521 action='store', dest='format', metavar='FORMAT', help='video format code')
3522 video_format.add_option('--all-formats',
5260e68f 3523 action='store_const', dest='format', help='download all available video formats', const='all')
4f9f96f6
GV
3524 video_format.add_option('--max-quality',
3525 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
3526
3527
3528 verbosity.add_option('-q', '--quiet',
3529 action='store_true', dest='quiet', help='activates quiet mode', default=False)
3530 verbosity.add_option('-s', '--simulate',
9b4556c4
PH
3531 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
3532 verbosity.add_option('--skip-download',
3533 action='store_true', dest='skip_download', help='do not download the video', default=False)
4f9f96f6
GV
3534 verbosity.add_option('-g', '--get-url',
3535 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
3536 verbosity.add_option('-e', '--get-title',
3537 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
3538 verbosity.add_option('--get-thumbnail',
3539 action='store_true', dest='getthumbnail',
3540 help='simulate, quiet but print thumbnail URL', default=False)
3541 verbosity.add_option('--get-description',
3542 action='store_true', dest='getdescription',
3543 help='simulate, quiet but print video description', default=False)
3544 verbosity.add_option('--get-filename',
3545 action='store_true', dest='getfilename',
3546 help='simulate, quiet but print output filename', default=False)
da0db53a
DH
3547 verbosity.add_option('--get-format',
3548 action='store_true', dest='getformat',
3549 help='simulate, quiet but print output format', default=False)
4f9f96f6
GV
3550 verbosity.add_option('--no-progress',
3551 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
3552 verbosity.add_option('--console-title',
3553 action='store_true', dest='consoletitle',
3554 help='display progress in console titlebar', default=False)
3555
3556
3557 filesystem.add_option('-t', '--title',
3558 action='store_true', dest='usetitle', help='use title in file name', default=False)
3559 filesystem.add_option('-l', '--literal',
3560 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
3561 filesystem.add_option('-A', '--auto-number',
3562 action='store_true', dest='autonumber',
3563 help='number downloaded files starting from 00000', default=False)
3564 filesystem.add_option('-o', '--output',
3565 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
3566 filesystem.add_option('-a', '--batch-file',
3567 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
3568 filesystem.add_option('-w', '--no-overwrites',
3569 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
3570 filesystem.add_option('-c', '--continue',
3571 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
3572 filesystem.add_option('--cookies',
3573 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
3574 filesystem.add_option('--no-part',
3575 action='store_true', dest='nopart', help='do not use .part files', default=False)
3576 filesystem.add_option('--no-mtime',
3577 action='store_false', dest='updatetime',
3578 help='do not use the Last-modified header to set the file modification time', default=True)
2c8d32de
PH
3579 filesystem.add_option('--write-description',
3580 action='store_true', dest='writedescription',
3581 help='write video description to a .description file', default=False)
3582 filesystem.add_option('--write-info-json',
3583 action='store_true', dest='writeinfojson',
3584 help='write video metadata to a .info.json file', default=False)
4f9f96f6
GV
3585
3586
3587 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
3588 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
3589 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
3590 help='"best", "aac" or "mp3"; best by default')
c99dcbd2
PH
3591 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
3592 help='ffmpeg audio bitrate specification, 128k by default')
3593 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
3594 help='keeps the video file on disk after the post-processing; the video is erased by default')
4f9f96f6
GV
3595
3596
3597 parser.add_option_group(general)
20e91e83 3598 parser.add_option_group(selection)
4f9f96f6
GV
3599 parser.add_option_group(filesystem)
3600 parser.add_option_group(verbosity)
3601 parser.add_option_group(video_format)
3602 parser.add_option_group(authentication)
3603 parser.add_option_group(postproc)
3604
3605 opts, args = parser.parse_args()
3606
3607 return parser, opts, args
3608
f3098c4d
PH
3609def gen_extractors():
3610 """ Return a list of an instance of every supported extractor.
3611 The order does matter; the first extractor matched is the one handling the URL.
3612 """
3613 youtube_ie = YoutubeIE()
3614 google_ie = GoogleIE()
3615 yahoo_ie = YahooIE()
3616 return [
3617 youtube_ie,
3618 MetacafeIE(youtube_ie),
3619 DailymotionIE(),
3620 YoutubePlaylistIE(youtube_ie),
3621 YoutubeUserIE(youtube_ie),
3622 YoutubeSearchIE(youtube_ie),
3623 google_ie,
3624 GoogleSearchIE(google_ie),
3625 PhotobucketIE(),
3626 yahoo_ie,
3627 YahooSearchIE(yahoo_ie),
3628 DepositFilesIE(),
3629 FacebookIE(),
3630 BlipTVIE(),
3631 VimeoIE(),
3632 MyVideoIE(),
3633 ComedyCentralIE(),
3634 EscapistIE(),
3635
3636 GenericIE()
3637 ]
3638
5adcaa43
GV
3639def main():
3640 parser, opts, args = parseOpts()
4f9f96f6 3641
5adcaa43
GV
3642 # Open appropriate CookieJar
3643 if opts.cookiefile is None:
3644 jar = cookielib.CookieJar()
3645 else:
8cc44341 3646 try:
5adcaa43
GV
3647 jar = cookielib.MozillaCookieJar(opts.cookiefile)
3648 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
3649 jar.load()
3650 except (IOError, OSError), err:
3651 sys.exit(u'ERROR: unable to open cookie file')
80066952 3652
5adcaa43
GV
3653 # Dump user agent
3654 if opts.dump_user_agent:
3655 print std_headers['User-Agent']
3656 sys.exit(0)
e7cf18cb 3657
5adcaa43
GV
3658 # Batch file verification
3659 batchurls = []
3660 if opts.batchfile is not None:
8cc44341 3661 try:
5adcaa43
GV
3662 if opts.batchfile == '-':
3663 batchfd = sys.stdin
4bec29ef 3664 else:
5adcaa43
GV
3665 batchfd = open(opts.batchfile, 'r')
3666 batchurls = batchfd.readlines()
3667 batchurls = [x.strip() for x in batchurls]
3668 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
3669 except IOError:
3670 sys.exit(u'ERROR: batch file could not be read')
3671 all_urls = batchurls + args
3672
f3098c4d
PH
3673 # General configuration
3674 cookie_processor = urllib2.HTTPCookieProcessor(jar)
3675 opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
3676 urllib2.install_opener(opener)
3677 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
3678
3679 extractors = gen_extractors()
3680
3681 if opts.list_extractors:
3682 for ie in extractors:
3683 print(ie.IE_NAME)
3684 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
3685 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
3686 for mu in matchedUrls:
3687 print(u' ' + mu)
3688 sys.exit(0)
3689
5adcaa43
GV
3690 # Conflicting, missing and erroneous options
3691 if opts.usenetrc and (opts.username is not None or opts.password is not None):
3692 parser.error(u'using .netrc conflicts with giving username/password')
3693 if opts.password is not None and opts.username is None:
3694 parser.error(u'account username missing')
3695 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
3696 parser.error(u'using output template conflicts with using title, literal title or auto number')
3697 if opts.usetitle and opts.useliteral:
3698 parser.error(u'using title conflicts with using literal title')
3699 if opts.username is not None and opts.password is None:
3700 opts.password = getpass.getpass(u'Type account password and press return:')
3701 if opts.ratelimit is not None:
3702 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
3703 if numeric_limit is None:
3704 parser.error(u'invalid rate limit specified')
3705 opts.ratelimit = numeric_limit
3706 if opts.retries is not None:
8cc44341 3707 try:
5adcaa43 3708 opts.retries = long(opts.retries)
8cc44341 3709 except (TypeError, ValueError), err:
5adcaa43
GV
3710 parser.error(u'invalid retry count specified')
3711 try:
2c8d32de 3712 opts.playliststart = int(opts.playliststart)
5adcaa43 3713 if opts.playliststart <= 0:
2c8d32de 3714 raise ValueError(u'Playlist start must be positive')
5adcaa43
GV
3715 except (TypeError, ValueError), err:
3716 parser.error(u'invalid playlist start number specified')
3717 try:
2c8d32de 3718 opts.playlistend = int(opts.playlistend)
5adcaa43 3719 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2c8d32de 3720 raise ValueError(u'Playlist end must be greater than playlist start')
5adcaa43
GV
3721 except (TypeError, ValueError), err:
3722 parser.error(u'invalid playlist end number specified')
3723 if opts.extractaudio:
3724 if opts.audioformat not in ['best', 'aac', 'mp3']:
3725 parser.error(u'invalid audio format specified')
3726
5adcaa43
GV
3727 # File downloader
3728 fd = FileDownloader({
3729 'usenetrc': opts.usenetrc,
3730 'username': opts.username,
3731 'password': opts.password,
da0db53a 3732 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
5adcaa43
GV
3733 'forceurl': opts.geturl,
3734 'forcetitle': opts.gettitle,
3735 'forcethumbnail': opts.getthumbnail,
3736 'forcedescription': opts.getdescription,
3737 'forcefilename': opts.getfilename,
da0db53a 3738 'forceformat': opts.getformat,
9b4556c4 3739 'simulate': opts.simulate,
da0db53a 3740 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
5adcaa43
GV
3741 'format': opts.format,
3742 'format_limit': opts.format_limit,
3743 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3744 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3745 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3746 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3747 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3748 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3749 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3750 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3751 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3752 or u'%(id)s.%(ext)s'),
3753 'ignoreerrors': opts.ignoreerrors,
3754 'ratelimit': opts.ratelimit,
3755 'nooverwrites': opts.nooverwrites,
3756 'retries': opts.retries,
3757 'continuedl': opts.continue_dl,
3758 'noprogress': opts.noprogress,
3759 'playliststart': opts.playliststart,
3760 'playlistend': opts.playlistend,
3761 'logtostderr': opts.outtmpl == '-',
3762 'consoletitle': opts.consoletitle,
3763 'nopart': opts.nopart,
3764 'updatetime': opts.updatetime,
2c8d32de
PH
3765 'writedescription': opts.writedescription,
3766 'writeinfojson': opts.writeinfojson,
20e91e83
ABP
3767 'matchtitle': opts.matchtitle,
3768 'rejecttitle': opts.rejecttitle,
5adcaa43 3769 })
8c5dc3ad
PH
3770 for extractor in extractors:
3771 fd.add_info_extractor(extractor)
5adcaa43
GV
3772
3773 # PostProcessors
3774 if opts.extractaudio:
c99dcbd2 3775 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
5adcaa43
GV
3776
3777 # Update version
3778 if opts.update_self:
3779 updateSelf(fd, sys.argv[0])
3780
3781 # Maybe do nothing
3782 if len(all_urls) < 1:
3783 if not opts.update_self:
3784 parser.error(u'you must provide at least one URL')
3785 else:
3786 sys.exit()
3787 retcode = fd.download(all_urls)
80066952 3788
5adcaa43
GV
3789 # Dump cookie jar if requested
3790 if opts.cookiefile is not None:
3791 try:
3792 jar.save()
3793 except (IOError, OSError), err:
3794 sys.exit(u'ERROR: unable to save cookie jar')
80066952 3795
5adcaa43 3796 sys.exit(retcode)
80066952 3797
4fa74b52 3798
5adcaa43
GV
3799if __name__ == '__main__':
3800 try:
3801 main()
e5bf0f55
RG
3802 except DownloadError:
3803 sys.exit(1)
3804 except SameFileError:
76a7f364 3805 sys.exit(u'ERROR: fixed output name but more than one file to download')
4fa74b52 3806 except KeyboardInterrupt:
76a7f364 3807 sys.exit(u'\nERROR: Interrupted by user')
e9cb9c28
GV
3808
3809# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: