]> jfr.im git - yt-dlp.git/blame - youtube-dl
Fix progress message when Content-Length is not set
[yt-dlp.git] / youtube-dl
CommitLineData
4fa74b52
RG
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
2770590d
GV
3
4__author__ = (
2c8d32de
PH
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
6ae796b1 12 'Rogério Brito',
eb11aacc 13 'Philipp Hagemeister',
6fc5b0bb 14 'Sören Schulze',
2770590d
GV
15 )
16
2c8d32de 17__license__ = 'Public Domain'
7b1a2bbe 18__version__ = '2011.09.18'
2770590d 19
8236e851 20UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
d207e7cf 21
80066952 22import cookielib
a1f03c7b 23import datetime
1987c232 24import gzip
4fa74b52 25import htmlentitydefs
f9c68787 26import HTMLParser
4fa74b52 27import httplib
2546e767 28import locale
4fa74b52
RG
29import math
30import netrc
31import os
32import os.path
33import re
34import socket
35import string
0487b407 36import subprocess
4fa74b52
RG
37import sys
38import time
39import urllib
40import urllib2
c6b55a8d 41import warnings
1987c232 42import zlib
a04e80a4 43
0a3c8b62
PH
44if os.name == 'nt':
45 import ctypes
46
47try:
48 import email.utils
49except ImportError: # Python 2.4
50 import email.Utils
c6b55a8d
PH
51try:
52 import cStringIO as StringIO
53except ImportError:
54 import StringIO
55
a04e80a4
RG
56# parse_qs was moved from the cgi module to the urlparse module recently.
57try:
58 from urlparse import parse_qs
59except ImportError:
60 from cgi import parse_qs
4fa74b52 61
c6b55a8d
PH
62try:
63 import lxml.etree
2b70537d 64except ImportError:
c6b55a8d
PH
65 pass # Handled below
66
c8e30044
PH
67try:
68 import xml.etree.ElementTree
afb5b55d
PH
69except ImportError: # Python<2.5: Not officially supported, but let it slip
70 warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
c8e30044 71
f995f712 72std_headers = {
c44b9ee9 73 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
4fa74b52 74 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
96942e62 75 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
a57ed21f 76 'Accept-Encoding': 'gzip, deflate',
4fa74b52
RG
77 'Accept-Language': 'en-us,en;q=0.5',
78}
79
80simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
81
437d76c1
PH
82try:
83 import json
91e6a385 84except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
437d76c1
PH
85 import re
86 class json(object):
87 @staticmethod
88 def loads(s):
89 s = s.decode('UTF-8')
90 def raiseError(msg, i):
91 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
92 def skipSpace(i, expectMore=True):
93 while i < len(s) and s[i] in ' \t\r\n':
94 i += 1
95 if expectMore:
96 if i >= len(s):
97 raiseError('Premature end', i)
98 return i
99 def decodeEscape(match):
100 esc = match.group(1)
101 _STATIC = {
102 '"': '"',
103 '\\': '\\',
104 '/': '/',
105 'b': unichr(0x8),
106 'f': unichr(0xc),
107 'n': '\n',
108 'r': '\r',
109 't': '\t',
110 }
111 if esc in _STATIC:
112 return _STATIC[esc]
113 if esc[0] == 'u':
114 if len(esc) == 1+4:
115 return unichr(int(esc[1:5], 16))
116 if len(esc) == 5+6 and esc[5:7] == '\\u':
117 hi = int(esc[1:5], 16)
118 low = int(esc[7:11], 16)
119 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
120 raise ValueError('Unknown escape ' + str(esc))
121 def parseString(i):
122 i += 1
123 e = i
124 while True:
125 e = s.index('"', e)
126 bslashes = 0
127 while s[e-bslashes-1] == '\\':
128 bslashes += 1
129 if bslashes % 2 == 1:
130 e += 1
131 continue
132 break
133 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
134 stri = rexp.sub(decodeEscape, s[i:e])
135 return (e+1,stri)
136 def parseObj(i):
137 i += 1
138 res = {}
139 i = skipSpace(i)
140 if s[i] == '}': # Empty dictionary
141 return (i+1,res)
142 while True:
143 if s[i] != '"':
144 raiseError('Expected a string object key', i)
145 i,key = parseString(i)
146 i = skipSpace(i)
147 if i >= len(s) or s[i] != ':':
148 raiseError('Expected a colon', i)
149 i,val = parse(i+1)
150 res[key] = val
151 i = skipSpace(i)
152 if s[i] == '}':
153 return (i+1, res)
154 if s[i] != ',':
155 raiseError('Expected comma or closing curly brace', i)
156 i = skipSpace(i+1)
157 def parseArray(i):
158 res = []
159 i = skipSpace(i+1)
160 if s[i] == ']': # Empty array
161 return (i+1,res)
162 while True:
163 i,val = parse(i)
164 res.append(val)
165 i = skipSpace(i) # Raise exception if premature end
166 if s[i] == ']':
167 return (i+1, res)
168 if s[i] != ',':
169 raiseError('Expected a comma or closing bracket', i)
170 i = skipSpace(i+1)
171 def parseDiscrete(i):
172 for k,v in {'true': True, 'false': False, 'null': None}.items():
173 if s.startswith(k, i):
174 return (i+len(k), v)
175 raiseError('Not a boolean (or null)', i)
176 def parseNumber(i):
177 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
178 if mobj is None:
179 raiseError('Not a number', i)
180 nums = mobj.group(1)
181 if '.' in nums or 'e' in nums or 'E' in nums:
182 return (i+len(nums), float(nums))
183 return (i+len(nums), int(nums))
184 CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
185 def parse(i):
186 i = skipSpace(i)
187 i,res = CHARMAP.get(s[i], parseNumber)(i)
188 i = skipSpace(i, False)
189 return (i,res)
190 i,res = parse(0)
191 if i < len(s):
192 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
193 return res
194
eae2666c
RG
195def preferredencoding():
196 """Get preferred encoding.
197
198 Returns the best encoding scheme for the system, based on
199 locale.getpreferredencoding() and some further tweaks.
200 """
f94b636c
RG
201 def yield_preferredencoding():
202 try:
203 pref = locale.getpreferredencoding()
204 u'TEST'.encode(pref)
205 except:
206 pref = 'UTF-8'
207 while True:
208 yield pref
209 return yield_preferredencoding().next()
eae2666c 210
c0a10ca8 211
490fd7ae
RG
212def htmlentity_transform(matchobj):
213 """Transforms an HTML entity to a Unicode character.
d3975459 214
490fd7ae
RG
215 This function receives a match object and is intended to be used with
216 the re.sub() function.
217 """
218 entity = matchobj.group(1)
219
220 # Known non-numeric HTML entity
221 if entity in htmlentitydefs.name2codepoint:
222 return unichr(htmlentitydefs.name2codepoint[entity])
223
224 # Unicode character
225 mobj = re.match(ur'(?u)#(x?\d+)', entity)
226 if mobj is not None:
227 numstr = mobj.group(1)
228 if numstr.startswith(u'x'):
229 base = 16
230 numstr = u'0%s' % numstr
231 else:
232 base = 10
233 return unichr(long(numstr, base))
234
235 # Unknown entity in name, return its literal representation
236 return (u'&%s;' % entity)
237
c0a10ca8 238
490fd7ae 239def sanitize_title(utitle):
31bcb480 240 """Sanitizes a video title so it could be used as part of a filename."""
490fd7ae 241 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
490fd7ae
RG
242 return utitle.replace(unicode(os.sep), u'%')
243
c0a10ca8 244
31bcb480
RG
245def sanitize_open(filename, open_mode):
246 """Try to open the given filename, and slightly tweak it if this fails.
247
248 Attempts to open the given filename. If this fails, it tries to change
249 the filename slightly, step by step, until it's either able to open it
250 or it fails and raises a final exception, like the standard open()
251 function.
252
253 It returns the tuple (stream, definitive_file_name).
254 """
255 try:
131bc765 256 if filename == u'-':
e08878f4
RG
257 if sys.platform == 'win32':
258 import msvcrt
259 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
131bc765 260 return (sys.stdout, filename)
31bcb480
RG
261 stream = open(filename, open_mode)
262 return (stream, filename)
263 except (IOError, OSError), err:
264 # In case of error, try to remove win32 forbidden chars
ca6a11fa 265 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
31bcb480
RG
266
267 # An exception here should be caught in the caller
268 stream = open(filename, open_mode)
269 return (stream, filename)
270
c0a10ca8 271
09bd408c 272def timeconvert(timestr):
c0a10ca8
F
273 """Convert RFC 2822 defined time string into system timestamp"""
274 timestamp = None
275 timetuple = email.utils.parsedate_tz(timestr)
276 if timetuple is not None:
277 timestamp = email.utils.mktime_tz(timetuple)
278 return timestamp
279
09bd408c 280
e5bf0f55
RG
281class DownloadError(Exception):
282 """Download Error exception.
d3975459 283
e5bf0f55
RG
284 This exception may be thrown by FileDownloader objects if they are not
285 configured to continue on errors. They will contain the appropriate
286 error message.
287 """
288 pass
289
c0a10ca8 290
e5bf0f55
RG
291class SameFileError(Exception):
292 """Same File exception.
293
294 This exception will be thrown by FileDownloader objects if they detect
295 multiple files would have to be downloaded to the same file on disk.
296 """
297 pass
298
c0a10ca8 299
65cd34c5
RG
300class PostProcessingError(Exception):
301 """Post Processing exception.
302
303 This exception may be raised by PostProcessor's .run() method to
304 indicate an error in the postprocessing task.
305 """
306 pass
307
c0a10ca8 308
73f4e7af 309class UnavailableVideoError(Exception):
7b7759f5 310 """Unavailable Format exception.
311
312 This exception will be thrown when a video is requested
313 in a format that is not available for that video.
314 """
d69a1c91
RG
315 pass
316
c0a10ca8 317
d69a1c91
RG
318class ContentTooShortError(Exception):
319 """Content Too Short exception.
320
321 This exception may be raised by FileDownloader objects when a file they
322 download is too small for what the server announced first, indicating
323 the connection was probably interrupted.
324 """
325 # Both in bytes
326 downloaded = None
327 expected = None
328
329 def __init__(self, downloaded, expected):
330 self.downloaded = downloaded
331 self.expected = expected
7b7759f5 332
c0a10ca8 333
1987c232
RG
334class YoutubeDLHandler(urllib2.HTTPHandler):
335 """Handler for HTTP requests and responses.
336
337 This class, when installed with an OpenerDirector, automatically adds
338 the standard headers to every HTTP request and handles gzipped and
339 deflated responses from web servers. If compression is to be avoided in
340 a particular request, the original request in the program code only has
341 to include the HTTP header "Youtubedl-No-Compression", which will be
342 removed before making the real request.
c0a10ca8 343
1987c232
RG
344 Part of this code was copied from:
345
c0a10ca8
F
346 http://techknack.net/python-urllib2-handlers/
347
1987c232
RG
348 Andrew Rowls, the author of that code, agreed to release it to the
349 public domain.
350 """
351
352 @staticmethod
353 def deflate(data):
354 try:
355 return zlib.decompress(data, -zlib.MAX_WBITS)
356 except zlib.error:
357 return zlib.decompress(data)
c0a10ca8 358
7b531c0b
RG
359 @staticmethod
360 def addinfourl_wrapper(stream, headers, url, code):
361 if hasattr(urllib2.addinfourl, 'getcode'):
362 return urllib2.addinfourl(stream, headers, url, code)
0f6b00b5
RG
363 ret = urllib2.addinfourl(stream, headers, url)
364 ret.code = code
365 return ret
c0a10ca8 366
1987c232
RG
367 def http_request(self, req):
368 for h in std_headers:
369 if h in req.headers:
370 del req.headers[h]
371 req.add_header(h, std_headers[h])
372 if 'Youtubedl-no-compression' in req.headers:
373 if 'Accept-encoding' in req.headers:
374 del req.headers['Accept-encoding']
375 del req.headers['Youtubedl-no-compression']
376 return req
377
378 def http_response(self, req, resp):
379 old_resp = resp
380 # gzip
381 if resp.headers.get('Content-encoding', '') == 'gzip':
382 gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
7b531c0b 383 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
384 resp.msg = old_resp.msg
385 # deflate
386 if resp.headers.get('Content-encoding', '') == 'deflate':
387 gz = StringIO.StringIO(self.deflate(resp.read()))
7b531c0b 388 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
1987c232
RG
389 resp.msg = old_resp.msg
390 return resp
391
c0a10ca8 392
4fa74b52
RG
393class FileDownloader(object):
394 """File Downloader class.
395
396 File downloader objects are the ones responsible of downloading the
397 actual video file and writing it to disk if the user has requested
398 it, among some other tasks. In most cases there should be one per
399 program. As, given a video URL, the downloader doesn't know how to
400 extract all the needed information, task that InfoExtractors do, it
401 has to pass the URL to one of them.
402
403 For this, file downloader objects have a method that allows
404 InfoExtractors to be registered in a given order. When it is passed
405 a URL, the file downloader handles it to the first InfoExtractor it
2851b2ca
RG
406 finds that reports being able to handle it. The InfoExtractor extracts
407 all the information about the video or videos the URL refers to, and
408 asks the FileDownloader to process the video information, possibly
409 downloading the video.
4fa74b52
RG
410
411 File downloaders accept a lot of parameters. In order not to saturate
412 the object constructor with arguments, it receives a dictionary of
d0a9affb
RG
413 options instead. These options are available through the params
414 attribute for the InfoExtractors to use. The FileDownloader also
415 registers itself as the downloader in charge for the InfoExtractors
416 that are added to it, so this is a "mutual registration".
4fa74b52
RG
417
418 Available options:
419
80066952
RG
420 username: Username for authentication purposes.
421 password: Password for authentication purposes.
422 usenetrc: Use netrc for authentication instead.
423 quiet: Do not print messages to stdout.
424 forceurl: Force printing final URL.
425 forcetitle: Force printing title.
426 forcethumbnail: Force printing thumbnail URL.
427 forcedescription: Force printing description.
9f796346 428 forcefilename: Force printing final filename.
80066952
RG
429 simulate: Do not download the video files.
430 format: Video format code.
431 format_limit: Highest quality format to try.
432 outtmpl: Template for output names.
433 ignoreerrors: Do not stop on download errors.
434 ratelimit: Download speed limit, in bytes/sec.
435 nooverwrites: Prevent overwriting files.
436 retries: Number of times to retry for HTTP error 5xx
437 continuedl: Try to continue downloads if possible.
438 noprogress: Do not print the progress bar.
439 playliststart: Playlist item to start at.
8cc44341 440 playlistend: Playlist item to end at.
20e91e83
ABP
441 matchtitle: Download only matching titles.
442 rejecttitle: Reject downloads for matching titles.
331ce0a0 443 logtostderr: Log messages to stderr instead of stdout.
ccbd296b 444 consoletitle: Display progress in console window's titlebar.
3fb2c487 445 nopart: Do not use temporary .part files.
e3018902 446 updatetime: Use the Last-modified header to set output file timestamps.
8b95c387 447 writedescription: Write the video description to a .description file
6eb08fbf 448 writeinfojson: Write the video description to a .info.json file
4fa74b52
RG
449 """
450
d0a9affb 451 params = None
4fa74b52 452 _ies = []
65cd34c5 453 _pps = []
9bf386d7 454 _download_retcode = None
7d8d0612 455 _num_downloads = None
331ce0a0 456 _screen_file = None
4fa74b52
RG
457
458 def __init__(self, params):
1c5e2302 459 """Create a FileDownloader object with the given options."""
4fa74b52 460 self._ies = []
65cd34c5 461 self._pps = []
9bf386d7 462 self._download_retcode = 0
7d8d0612 463 self._num_downloads = 0
331ce0a0 464 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
d0a9affb 465 self.params = params
d3975459 466
4fa74b52
RG
467 @staticmethod
468 def format_bytes(bytes):
469 if bytes is None:
470 return 'N/A'
8497c36d
RG
471 if type(bytes) is str:
472 bytes = float(bytes)
473 if bytes == 0.0:
4fa74b52
RG
474 exponent = 0
475 else:
8497c36d 476 exponent = long(math.log(bytes, 1024.0))
4fa74b52 477 suffix = 'bkMGTPEZY'[exponent]
c0a10ca8 478 converted = float(bytes) / float(1024 ** exponent)
4fa74b52
RG
479 return '%.2f%s' % (converted, suffix)
480
481 @staticmethod
482 def calc_percent(byte_counter, data_len):
483 if data_len is None:
484 return '---.-%'
485 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
486
487 @staticmethod
488 def calc_eta(start, now, total, current):
489 if total is None:
490 return '--:--'
491 dif = now - start
492 if current == 0 or dif < 0.001: # One millisecond
493 return '--:--'
494 rate = float(current) / dif
495 eta = long((float(total) - float(current)) / rate)
496 (eta_mins, eta_secs) = divmod(eta, 60)
497 if eta_mins > 99:
498 return '--:--'
499 return '%02d:%02d' % (eta_mins, eta_secs)
500
5121ef20 501 @staticmethod
4fa74b52
RG
502 def calc_speed(start, now, bytes):
503 dif = now - start
504 if bytes == 0 or dif < 0.001: # One millisecond
9fcd8355 505 return '%10s' % '---b/s'
4fa74b52
RG
506 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
507
508 @staticmethod
509 def best_block_size(elapsed_time, bytes):
510 new_min = max(bytes / 2.0, 1.0)
511 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
512 if elapsed_time < 0.001:
e1f18b8a 513 return long(new_max)
4fa74b52
RG
514 rate = bytes / elapsed_time
515 if rate > new_max:
e1f18b8a 516 return long(new_max)
4fa74b52 517 if rate < new_min:
e1f18b8a
RG
518 return long(new_min)
519 return long(rate)
4fa74b52 520
acd3d842
RG
521 @staticmethod
522 def parse_bytes(bytestr):
523 """Parse a string indicating a byte quantity into a long integer."""
524 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
525 if matchobj is None:
526 return None
527 number = float(matchobj.group(1))
528 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
529 return long(round(number * multiplier))
530
4fa74b52
RG
531 def add_info_extractor(self, ie):
532 """Add an InfoExtractor object to the end of the list."""
533 self._ies.append(ie)
534 ie.set_downloader(self)
d3975459 535
65cd34c5
RG
536 def add_post_processor(self, pp):
537 """Add a PostProcessor object to the end of the chain."""
538 self._pps.append(pp)
539 pp.set_downloader(self)
d3975459 540
331ce0a0 541 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
9fcd8355 542 """Print message to stdout if not in quiet mode."""
43ab0ca4
RG
543 try:
544 if not self.params.get('quiet', False):
331ce0a0
RG
545 terminator = [u'\n', u''][skip_eol]
546 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
547 self._screen_file.flush()
43ab0ca4
RG
548 except (UnicodeEncodeError), err:
549 if not ignore_encoding_errors:
550 raise
d3975459 551
7e5cab67
RG
552 def to_stderr(self, message):
553 """Print message to stderr."""
eae2666c 554 print >>sys.stderr, message.encode(preferredencoding())
d3975459 555
ccbd296b
MM
556 def to_cons_title(self, message):
557 """Set console/terminal window title to message."""
558 if not self.params.get('consoletitle', False):
559 return
560 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
561 # c_wchar_p() might not be necessary if `message` is
562 # already of type unicode()
563 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
564 elif 'TERM' in os.environ:
565 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
566
22899cea
RG
567 def fixed_template(self):
568 """Checks if the output template is fixed."""
d0a9affb 569 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
9fcd8355 570
0086d1ec
RG
571 def trouble(self, message=None):
572 """Determine action to take when a download problem appears.
573
574 Depending on if the downloader has been configured to ignore
e5bf0f55 575 download errors or not, this method may throw an exception or
9bf386d7 576 not when errors are found, after printing the message.
0086d1ec
RG
577 """
578 if message is not None:
579 self.to_stderr(message)
d0a9affb 580 if not self.params.get('ignoreerrors', False):
e5bf0f55 581 raise DownloadError(message)
9bf386d7 582 self._download_retcode = 1
0086d1ec 583
acd3d842
RG
584 def slow_down(self, start_time, byte_counter):
585 """Sleep if the download speed is over the rate limit."""
d0a9affb 586 rate_limit = self.params.get('ratelimit', None)
acd3d842
RG
587 if rate_limit is None or byte_counter == 0:
588 return
589 now = time.time()
590 elapsed = now - start_time
591 if elapsed <= 0.0:
592 return
593 speed = float(byte_counter) / elapsed
594 if speed > rate_limit:
595 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
3fb2c487
RG
596
597 def temp_name(self, filename):
598 """Returns a temporary filename for the given filename."""
599 if self.params.get('nopart', False) or filename == u'-' or \
600 (os.path.exists(filename) and not os.path.isfile(filename)):
601 return filename
602 return filename + u'.part'
603
8cc42e7c
RG
604 def undo_temp_name(self, filename):
605 if filename.endswith(u'.part'):
606 return filename[:-len(u'.part')]
607 return filename
608
62cf7aaf
RG
609 def try_rename(self, old_filename, new_filename):
610 try:
7d950ca1
RG
611 if old_filename == new_filename:
612 return
62cf7aaf
RG
613 os.rename(old_filename, new_filename)
614 except (IOError, OSError), err:
615 self.trouble(u'ERROR: unable to rename file')
c0a10ca8 616
e3018902
RG
617 def try_utime(self, filename, last_modified_hdr):
618 """Try to set the last-modified time of the given file."""
619 if last_modified_hdr is None:
620 return
621 if not os.path.isfile(filename):
622 return
623 timestr = last_modified_hdr
624 if timestr is None:
625 return
626 filetime = timeconvert(timestr)
627 if filetime is None:
36597dc4 628 return filetime
e3018902 629 try:
c0a10ca8 630 os.utime(filename, (time.time(), filetime))
e3018902
RG
631 except:
632 pass
36597dc4 633 return filetime
acd3d842 634
8b95c387 635 def report_writedescription(self, descfn):
6eb08fbf
PH
636 """ Report that the description file is being written """
637 self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
638
639 def report_writeinfojson(self, infofn):
640 """ Report that the metadata file has been written """
641 self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
8b95c387 642
bafa5cd9
RG
643 def report_destination(self, filename):
644 """Report destination filename."""
331ce0a0 645 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
d3975459 646
bafa5cd9
RG
647 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
648 """Report download progress."""
d9835247
RG
649 if self.params.get('noprogress', False):
650 return
331ce0a0 651 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
bafa5cd9 652 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
ccbd296b
MM
653 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
654 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
7db85b2c
RG
655
656 def report_resuming_byte(self, resume_len):
8a9f53be 657 """Report attempt to resume at given byte."""
331ce0a0 658 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
d3975459 659
7031008c 660 def report_retry(self, count, retries):
e86e9474 661 """Report retry in case of HTTP error 5xx"""
331ce0a0 662 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
d3975459 663
7db85b2c
RG
664 def report_file_already_downloaded(self, file_name):
665 """Report file has already been fully downloaded."""
43ab0ca4 666 try:
331ce0a0 667 self.to_screen(u'[download] %s has already been downloaded' % file_name)
43ab0ca4 668 except (UnicodeEncodeError), err:
331ce0a0 669 self.to_screen(u'[download] The file has already been downloaded')
d3975459 670
7db85b2c
RG
671 def report_unable_to_resume(self):
672 """Report it was impossible to resume download."""
331ce0a0 673 self.to_screen(u'[download] Unable to resume')
d3975459 674
bafa5cd9
RG
675 def report_finish(self):
676 """Report download finished."""
d9835247 677 if self.params.get('noprogress', False):
331ce0a0 678 self.to_screen(u'[download] Download completed')
d9835247 679 else:
331ce0a0 680 self.to_screen(u'')
d3975459 681
df372a65
RG
682 def increment_downloads(self):
683 """Increment the ordinal that assigns a number to each file."""
684 self._num_downloads += 1
bafa5cd9 685
9f796346
GI
686 def prepare_filename(self, info_dict):
687 """Generate the output filename."""
688 try:
689 template_dict = dict(info_dict)
690 template_dict['epoch'] = unicode(long(time.time()))
691 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
692 filename = self.params['outtmpl'] % template_dict
693 return filename
694 except (ValueError, KeyError), err:
695 self.trouble(u'ERROR: invalid system charset or erroneous output template')
696 return None
697
c8619e01
RG
698 def process_info(self, info_dict):
699 """Process a single dictionary returned by an InfoExtractor."""
9f796346 700 filename = self.prepare_filename(info_dict)
9b4556c4
PH
701
702 # Forced printings
703 if self.params.get('forcetitle', False):
704 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
705 if self.params.get('forceurl', False):
706 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
707 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
708 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
709 if self.params.get('forcedescription', False) and 'description' in info_dict:
710 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
711 if self.params.get('forcefilename', False) and filename is not None:
712 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
da0db53a
DH
713 if self.params.get('forceformat', False):
714 print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
9b4556c4 715
c8619e01
RG
716 # Do nothing else if in simulate mode
717 if self.params.get('simulate', False):
9bf386d7 718 return
d3975459 719
9f796346 720 if filename is None:
38ed1344 721 return
20e91e83
ABP
722
723 matchtitle=self.params.get('matchtitle',False)
724 rejecttitle=self.params.get('rejecttitle',False)
725 title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
726 if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
727 self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
728 return
729 if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
730 self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
731 return
732
850ab765 733 if self.params.get('nooverwrites', False) and os.path.exists(filename):
5c44af18 734 self.to_stderr(u'WARNING: file exists and will be skipped')
9bf386d7 735 return
7b7759f5 736
c8619e01 737 try:
e5e74ffb
PH
738 dn = os.path.dirname(filename)
739 if dn != '' and not os.path.exists(dn):
740 os.makedirs(dn)
c8619e01 741 except (OSError, IOError), err:
cec3a53c 742 self.trouble(u'ERROR: unable to create directory ' + unicode(err))
9bf386d7 743 return
7b7759f5 744
8b95c387
PH
745 if self.params.get('writedescription', False):
746 try:
747 descfn = filename + '.description'
6eb08fbf 748 self.report_writedescription(descfn)
1293ce58
PH
749 descfile = open(descfn, 'wb')
750 try:
8b95c387 751 descfile.write(info_dict['description'].encode('utf-8'))
1293ce58
PH
752 finally:
753 descfile.close()
8b95c387 754 except (OSError, IOError):
cec3a53c 755 self.trouble(u'ERROR: Cannot write description file ' + descfn)
8b95c387
PH
756 return
757
6eb08fbf
PH
758 if self.params.get('writeinfojson', False):
759 infofn = filename + '.info.json'
760 self.report_writeinfojson(infofn)
761 try:
762 json.dump
763 except (NameError,AttributeError):
764 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
765 return
766 try:
1293ce58
PH
767 infof = open(infofn, 'wb')
768 try:
6eb08fbf 769 json.dump(info_dict, infof)
1293ce58
PH
770 finally:
771 infof.close()
6eb08fbf 772 except (OSError, IOError):
cec3a53c 773 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
6eb08fbf
PH
774 return
775
9b4556c4 776 if not self.params.get('skip_download', False):
55e7c75e 777 try:
366cbfb0 778 success = self._do_download(filename, info_dict)
9b4556c4
PH
779 except (OSError, IOError), err:
780 raise UnavailableVideoError
781 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
782 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
783 return
784 except (ContentTooShortError, ), err:
785 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
55e7c75e 786 return
9b4556c4
PH
787
788 if success:
789 try:
790 self.post_process(filename, info_dict)
791 except (PostProcessingError), err:
792 self.trouble(u'ERROR: postprocessing: %s' % str(err))
793 return
c8619e01 794
4fa74b52
RG
795 def download(self, url_list):
796 """Download a given list of URLs."""
22899cea 797 if len(url_list) > 1 and self.fixed_template():
d0a9affb 798 raise SameFileError(self.params['outtmpl'])
22899cea 799
4fa74b52
RG
800 for url in url_list:
801 suitable_found = False
802 for ie in self._ies:
c8619e01 803 # Go to next InfoExtractor if not suitable
4fa74b52
RG
804 if not ie.suitable(url):
805 continue
c8619e01 806
4fa74b52
RG
807 # Suitable InfoExtractor found
808 suitable_found = True
c8619e01 809
6f21f686
RG
810 # Extract information from URL and process it
811 ie.extract(url)
65cd34c5 812
c8619e01 813 # Suitable InfoExtractor had been found; go to next URL
4fa74b52 814 break
c8619e01 815
4fa74b52 816 if not suitable_found:
db7e31b8 817 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
bb681b88 818
9bf386d7 819 return self._download_retcode
65cd34c5
RG
820
821 def post_process(self, filename, ie_info):
822 """Run the postprocessing chain on the given file."""
823 info = dict(ie_info)
824 info['filepath'] = filename
825 for pp in self._pps:
826 info = pp.run(info)
827 if info is None:
828 break
d3975459 829
e616ec0c 830 def _download_with_rtmpdump(self, filename, url, player_url):
0487b407 831 self.report_destination(filename)
62cf7aaf 832 tmpfilename = self.temp_name(filename)
0487b407
RG
833
834 # Check for rtmpdump first
835 try:
836 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
837 except (OSError, IOError):
838 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
839 return False
840
841 # Download using rtmpdump. rtmpdump returns exit code 2 when
842 # the connection was interrumpted and resuming appears to be
843 # possible. This is part of rtmpdump's normal usage, AFAIK.
b487ef08 844 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
1c1821f8
RG
845 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
846 while retval == 2 or retval == 1:
62cf7aaf 847 prevsize = os.path.getsize(tmpfilename)
331ce0a0 848 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
e616ec0c 849 time.sleep(5.0) # This seems to be needed
1c1821f8 850 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
62cf7aaf 851 cursize = os.path.getsize(tmpfilename)
e616ec0c
RG
852 if prevsize == cursize and retval == 1:
853 break
b487ef08
PH
854 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
855 if prevsize == cursize and retval == 2 and cursize > 1024:
856 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
857 retval = 0
858 break
0487b407 859 if retval == 0:
62cf7aaf
RG
860 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
861 self.try_rename(tmpfilename, filename)
0487b407
RG
862 return True
863 else:
db7e31b8 864 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
0487b407
RG
865 return False
866
366cbfb0
PH
867 def _do_download(self, filename, info_dict):
868 url = info_dict['url']
869 player_url = info_dict.get('player_url', None)
870
62cf7aaf 871 # Check file already present
3fb2c487 872 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
62cf7aaf
RG
873 self.report_file_already_downloaded(filename)
874 return True
875
0487b407
RG
876 # Attempt to download using rtmpdump
877 if url.startswith('rtmp'):
e616ec0c 878 return self._download_with_rtmpdump(filename, url, player_url)
0487b407 879
62cf7aaf 880 tmpfilename = self.temp_name(filename)
55e7c75e 881 stream = None
1987c232
RG
882
883 # Do not include the Accept-Encoding header
884 headers = {'Youtubedl-no-compression': 'True'}
885 basic_request = urllib2.Request(url, None, headers)
886 request = urllib2.Request(url, None, headers)
7db85b2c 887
9c457d2a 888 # Establish possible resume length
62cf7aaf
RG
889 if os.path.isfile(tmpfilename):
890 resume_len = os.path.getsize(tmpfilename)
55e7c75e
RG
891 else:
892 resume_len = 0
9c457d2a 893
10e7194d
MH
894 open_mode = 'wb'
895 if resume_len != 0:
896 if self.params.get('continuedl', False):
897 self.report_resuming_byte(resume_len)
898 request.add_header('Range','bytes=%d-' % resume_len)
899 open_mode = 'ab'
900 else:
901 resume_len = 0
55e7c75e 902
7031008c
RG
903 count = 0
904 retries = self.params.get('retries', 0)
101e0d1e 905 while count <= retries:
7031008c
RG
906 # Establish connection
907 try:
908 data = urllib2.urlopen(request)
909 break
910 except (urllib2.HTTPError, ), err:
ac249f42 911 if (err.code < 500 or err.code >= 600) and err.code != 416:
101e0d1e 912 # Unexpected HTTP error
7031008c 913 raise
101e0d1e
RG
914 elif err.code == 416:
915 # Unable to resume (requested range not satisfiable)
916 try:
917 # Open the connection again without the range header
918 data = urllib2.urlopen(basic_request)
919 content_length = data.info()['Content-Length']
920 except (urllib2.HTTPError, ), err:
ac249f42 921 if err.code < 500 or err.code >= 600:
101e0d1e
RG
922 raise
923 else:
924 # Examine the reported length
268fb2bd 925 if (content_length is not None and
c0a10ca8 926 (resume_len - 100 < long(content_length) < resume_len + 100)):
268fb2bd
RG
927 # The file had already been fully downloaded.
928 # Explanation to the above condition: in issue #175 it was revealed that
929 # YouTube sometimes adds or removes a few bytes from the end of the file,
930 # changing the file size slightly and causing problems for some users. So
931 # I decided to implement a suggested change and consider the file
932 # completely downloaded if the file size differs less than 100 bytes from
933 # the one in the hard drive.
101e0d1e 934 self.report_file_already_downloaded(filename)
62cf7aaf 935 self.try_rename(tmpfilename, filename)
101e0d1e
RG
936 return True
937 else:
938 # The length does not match, we start the download over
939 self.report_unable_to_resume()
940 open_mode = 'wb'
941 break
942 # Retry
943 count += 1
944 if count <= retries:
945 self.report_retry(count, retries)
946
947 if count > retries:
948 self.trouble(u'ERROR: giving up after %s retries' % retries)
949 return False
7db85b2c 950
4fa74b52 951 data_len = data.info().get('Content-length', None)
106d091e
RG
952 if data_len is not None:
953 data_len = long(data_len) + resume_len
4fa74b52 954 data_len_str = self.format_bytes(data_len)
106d091e 955 byte_counter = 0 + resume_len
4fa74b52
RG
956 block_size = 1024
957 start = time.time()
958 while True:
bafa5cd9 959 # Download and write
4fa74b52
RG
960 before = time.time()
961 data_block = data.read(block_size)
962 after = time.time()
975a91d0 963 if len(data_block) == 0:
4fa74b52 964 break
975a91d0 965 byte_counter += len(data_block)
55e7c75e
RG
966
967 # Open file just in time
968 if stream is None:
969 try:
62cf7aaf 970 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
dbddab27 971 assert stream is not None
8cc42e7c 972 filename = self.undo_temp_name(tmpfilename)
55e7c75e
RG
973 self.report_destination(filename)
974 except (OSError, IOError), err:
db7e31b8 975 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
55e7c75e 976 return False
131efd1a
RG
977 try:
978 stream.write(data_block)
979 except (IOError, OSError), err:
d67e0974
RG
980 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
981 return False
975a91d0 982 block_size = self.best_block_size(after - before, len(data_block))
4fa74b52 983
55e7c75e 984 # Progress message
975a91d0 985 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
67035ede
PH
986 if data_len is None:
987 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
988 else:
989 percent_str = self.calc_percent(byte_counter, data_len)
990 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
991 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
55e7c75e 992
acd3d842 993 # Apply rate limit
975a91d0 994 self.slow_down(start, byte_counter - resume_len)
acd3d842 995
dbddab27
PH
996 if stream is None:
997 self.trouble(u'\nERROR: Did not get any data blocks')
998 return False
6f0ff3ba 999 stream.close()
bafa5cd9 1000 self.report_finish()
b905e5f5 1001 if data_len is not None and byte_counter != data_len:
d69a1c91 1002 raise ContentTooShortError(byte_counter, long(data_len))
62cf7aaf 1003 self.try_rename(tmpfilename, filename)
e3018902 1004
09bd408c 1005 # Update file modification time
e3018902 1006 if self.params.get('updatetime', True):
366cbfb0 1007 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
e3018902 1008
366cbfb0 1009 return True
4fa74b52 1010
c0a10ca8 1011
4fa74b52
RG
1012class InfoExtractor(object):
1013 """Information Extractor class.
1014
1015 Information extractors are the classes that, given a URL, extract
1016 information from the video (or videos) the URL refers to. This
1017 information includes the real video URL, the video title and simplified
2851b2ca
RG
1018 title, author and others. The information is stored in a dictionary
1019 which is then passed to the FileDownloader. The FileDownloader
1020 processes this information possibly downloading the video to the file
1021 system, among other possible outcomes. The dictionaries must include
4fa74b52
RG
1022 the following fields:
1023
1024 id: Video identifier.
1025 url: Final video URL.
1026 uploader: Nickname of the video uploader.
1027 title: Literal title.
1028 stitle: Simplified title.
1029 ext: Video filename extension.
6ba562b0 1030 format: Video format.
e616ec0c 1031 player_url: SWF Player URL (may be None).
4fa74b52 1032
7e58d568
RG
1033 The following fields are optional. Their primary purpose is to allow
1034 youtube-dl to serve as the backend for a video search function, such
1035 as the one in youtube2mp3. They are only used when their respective
1036 forced printing functions are called:
1037
1038 thumbnail: Full URL to a video thumbnail image.
1039 description: One-line video description.
1040
4fa74b52 1041 Subclasses of this one should re-define the _real_initialize() and
bdb3f7a7
PH
1042 _real_extract() methods and define a _VALID_URL regexp.
1043 Probably, they should also be added to the list of extractors.
4fa74b52
RG
1044 """
1045
1046 _ready = False
1047 _downloader = None
1048
1049 def __init__(self, downloader=None):
1050 """Constructor. Receives an optional downloader."""
1051 self._ready = False
1052 self.set_downloader(downloader)
1053
bdb3f7a7 1054 def suitable(self, url):
4fa74b52 1055 """Receives a URL and returns True if suitable for this IE."""
bdb3f7a7 1056 return re.match(self._VALID_URL, url) is not None
4fa74b52
RG
1057
1058 def initialize(self):
1c5e2302 1059 """Initializes an instance (authentication, etc)."""
4fa74b52
RG
1060 if not self._ready:
1061 self._real_initialize()
1062 self._ready = True
1063
1064 def extract(self, url):
1065 """Extracts URL information and returns it in list of dicts."""
1066 self.initialize()
1067 return self._real_extract(url)
1068
1069 def set_downloader(self, downloader):
1070 """Sets the downloader for this IE."""
1071 self._downloader = downloader
d3975459 1072
4fa74b52
RG
1073 def _real_initialize(self):
1074 """Real initialization process. Redefine in subclasses."""
1075 pass
1076
1077 def _real_extract(self, url):
1078 """Real extraction process. Redefine in subclasses."""
1079 pass
1080
c0a10ca8 1081
4fa74b52
RG
1082class YoutubeIE(InfoExtractor):
1083 """Information extractor for youtube.com."""
1084
86e709d3 1085 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
9715661c 1086 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
7df4635f 1087 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
72ac78b8 1088 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
4fa74b52 1089 _NETRC_MACHINE = 'youtube'
497cd3e6 1090 # Listed in order of quality
e0edf1e0 1091 _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
7b7759f5 1092 _video_extensions = {
1093 '13': '3gp',
1094 '17': 'mp4',
1095 '18': 'mp4',
1096 '22': 'mp4',
d9bc015b 1097 '37': 'mp4',
9e9647d9 1098 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
0b59bf4a
RG
1099 '43': 'webm',
1100 '45': 'webm',
7b7759f5 1101 }
f3098c4d 1102 IE_NAME = u'youtube'
4fa74b52 1103
72ac78b8
RG
1104 def report_lang(self):
1105 """Report attempt to set language."""
331ce0a0 1106 self._downloader.to_screen(u'[youtube] Setting language')
72ac78b8 1107
bafa5cd9
RG
1108 def report_login(self):
1109 """Report attempt to log in."""
331ce0a0 1110 self._downloader.to_screen(u'[youtube] Logging in')
d3975459 1111
bafa5cd9
RG
1112 def report_age_confirmation(self):
1113 """Report attempt to confirm age."""
331ce0a0 1114 self._downloader.to_screen(u'[youtube] Confirming age')
d3975459 1115
e616ec0c
RG
1116 def report_video_webpage_download(self, video_id):
1117 """Report attempt to download video webpage."""
331ce0a0 1118 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
d3975459 1119
71b7300e
RG
1120 def report_video_info_webpage_download(self, video_id):
1121 """Report attempt to download video info webpage."""
331ce0a0 1122 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
d3975459 1123
bafa5cd9
RG
1124 def report_information_extraction(self, video_id):
1125 """Report attempt to extract video information."""
331ce0a0 1126 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
d3975459 1127
7b7759f5 1128 def report_unavailable_format(self, video_id, format):
1129 """Report extracted video URL."""
331ce0a0 1130 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
d3975459 1131
0487b407
RG
1132 def report_rtmp_download(self):
1133 """Indicate the download will use the RTMP protocol."""
331ce0a0 1134 self._downloader.to_screen(u'[youtube] RTMP download detected')
d3975459 1135
4fa74b52
RG
1136 def _real_initialize(self):
1137 if self._downloader is None:
1138 return
1139
1140 username = None
1141 password = None
d0a9affb 1142 downloader_params = self._downloader.params
4fa74b52
RG
1143
1144 # Attempt to use provided username and password or .netrc data
1145 if downloader_params.get('username', None) is not None:
1146 username = downloader_params['username']
1147 password = downloader_params['password']
1148 elif downloader_params.get('usenetrc', False):
1149 try:
1150 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1151 if info is not None:
1152 username = info[0]
1153 password = info[2]
1154 else:
1155 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1156 except (IOError, netrc.NetrcParseError), err:
6f21f686 1157 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
4fa74b52
RG
1158 return
1159
72ac78b8 1160 # Set language
1987c232 1161 request = urllib2.Request(self._LANG_URL)
72ac78b8
RG
1162 try:
1163 self.report_lang()
1164 urllib2.urlopen(request).read()
1165 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1166 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
72ac78b8
RG
1167 return
1168
cc109403
RG
1169 # No authentication to be performed
1170 if username is None:
1171 return
1172
4fa74b52 1173 # Log in
9fcd8355
RG
1174 login_form = {
1175 'current_form': 'loginForm',
4fa74b52
RG
1176 'next': '/',
1177 'action_login': 'Log In',
1178 'username': username,
9fcd8355
RG
1179 'password': password,
1180 }
1987c232 1181 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
4fa74b52 1182 try:
bafa5cd9 1183 self.report_login()
4fa74b52
RG
1184 login_results = urllib2.urlopen(request).read()
1185 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
6f21f686 1186 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
4fa74b52
RG
1187 return
1188 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 1189 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
4fa74b52 1190 return
d3975459 1191
4fa74b52 1192 # Confirm age
9fcd8355
RG
1193 age_form = {
1194 'next_url': '/',
1195 'action_confirm': 'Confirm',
1196 }
1987c232 1197 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
4fa74b52 1198 try:
bafa5cd9 1199 self.report_age_confirmation()
4fa74b52
RG
1200 age_results = urllib2.urlopen(request).read()
1201 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1202 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
e5bf0f55 1203 return
4fa74b52
RG
1204
1205 def _real_extract(self, url):
1206 # Extract video id from URL
020f7150 1207 mobj = re.match(self._VALID_URL, url)
4fa74b52 1208 if mobj is None:
147753eb 1209 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1210 return
4fa74b52
RG
1211 video_id = mobj.group(2)
1212
497cd3e6
RG
1213 # Get video webpage
1214 self.report_video_webpage_download(video_id)
1987c232 1215 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
497cd3e6
RG
1216 try:
1217 video_webpage = urllib2.urlopen(request).read()
1218 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1219 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1220 return
968aa884 1221
497cd3e6 1222 # Attempt to extract SWF player URL
b620a5f8 1223 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
497cd3e6 1224 if mobj is not None:
b620a5f8 1225 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
497cd3e6
RG
1226 else:
1227 player_url = None
1228
1229 # Get video info
1230 self.report_video_info_webpage_download(video_id)
1231 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1232 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
c0a10ca8 1233 % (video_id, el_type))
1987c232 1234 request = urllib2.Request(video_info_url)
e616ec0c 1235 try:
497cd3e6
RG
1236 video_info_webpage = urllib2.urlopen(request).read()
1237 video_info = parse_qs(video_info_webpage)
1238 if 'token' in video_info:
1239 break
e616ec0c 1240 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
497cd3e6 1241 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
e616ec0c 1242 return
f95f29fd
RG
1243 if 'token' not in video_info:
1244 if 'reason' in video_info:
8e686771 1245 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
f95f29fd
RG
1246 else:
1247 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1248 return
1249
1250 # Start extracting information
497cd3e6
RG
1251 self.report_information_extraction(video_id)
1252
1253 # uploader
1254 if 'author' not in video_info:
1255 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1256 return
1257 video_uploader = urllib.unquote_plus(video_info['author'][0])
e616ec0c 1258
497cd3e6
RG
1259 # title
1260 if 'title' not in video_info:
1261 self._downloader.trouble(u'ERROR: unable to extract video title')
1262 return
1263 video_title = urllib.unquote_plus(video_info['title'][0])
1264 video_title = video_title.decode('utf-8')
1265 video_title = sanitize_title(video_title)
1266
1267 # simplified title
1268 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1269 simple_title = simple_title.strip(ur'_')
1270
1271 # thumbnail image
1272 if 'thumbnail_url' not in video_info:
1273 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1274 video_thumbnail = ''
1275 else: # don't panic if we can't find it
1276 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1277
b3a27b52
NA
1278 # upload date
1279 upload_date = u'NA'
3efa45c3 1280 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
b3a27b52 1281 if mobj is not None:
a1f03c7b 1282 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
87cbd213 1283 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
a1f03c7b
NA
1284 for expression in format_expressions:
1285 try:
1286 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1287 except:
1288 pass
b3a27b52 1289
497cd3e6 1290 # description
c6b55a8d
PH
1291 try:
1292 lxml.etree
1293 except NameError:
1294 video_description = u'No description available.'
8b95c387 1295 if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
c6b55a8d
PH
1296 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1297 if mobj is not None:
1298 video_description = mobj.group(1).decode('utf-8')
1299 else:
1300 html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1301 vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1302 video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
91e6a385 1303 # TODO use another parser
497cd3e6 1304
5ce7d172
RG
1305 # token
1306 video_token = urllib.unquote_plus(video_info['token'][0])
1307
497cd3e6 1308 # Decide which formats to download
f83ae781 1309 req_format = self._downloader.params.get('format', None)
2e3a32e4 1310
f137bef9
PH
1311 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1312 self.report_rtmp_download()
1313 video_url_list = [(None, video_info['conn'][0])]
f137bef9 1314 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
0ac22e4f 1315 url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
8519c32d 1316 url_data = [parse_qs(uds) for uds in url_data_strs]
f137bef9 1317 url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
8519c32d 1318 url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
2b70537d 1319
497cd3e6
RG
1320 format_limit = self._downloader.params.get('format_limit', None)
1321 if format_limit is not None and format_limit in self._available_formats:
1322 format_list = self._available_formats[self._available_formats.index(format_limit):]
e616ec0c 1323 else:
497cd3e6
RG
1324 format_list = self._available_formats
1325 existing_formats = [x for x in format_list if x in url_map]
1326 if len(existing_formats) == 0:
1327 self._downloader.trouble(u'ERROR: no known formats available for video')
968aa884 1328 return
5260e68f 1329 if req_format is None or req_format == 'best':
d157d259 1330 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
c52b01f3
K
1331 elif req_format == 'worst':
1332 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
5260e68f 1333 elif req_format in ('-1', 'all'):
d157d259 1334 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
497cd3e6 1335 else:
5260e68f
PH
1336 # Specific formats. We pick the first in a slash-delimeted sequence.
1337 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1338 req_formats = req_format.split('/')
1339 video_url_list = None
1340 for rf in req_formats:
1341 if rf in url_map:
1342 video_url_list = [(rf, url_map[rf])]
1343 break
1344 if video_url_list is None:
5c132793
RG
1345 self._downloader.trouble(u'ERROR: requested format not available')
1346 return
497cd3e6 1347 else:
f3dc18d8 1348 self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
497cd3e6 1349 return
7b7759f5 1350
497cd3e6
RG
1351 for format_param, video_real_url in video_url_list:
1352 # At this point we have a new video
1353 self._downloader.increment_downloads()
1354
1355 # Extension
1356 video_extension = self._video_extensions.get(format_param, 'flv')
7e58d568 1357
968aa884 1358 try:
7b7759f5 1359 # Process video information
1360 self._downloader.process_info({
1361 'id': video_id.decode('utf-8'),
1362 'url': video_real_url.decode('utf-8'),
1363 'uploader': video_uploader.decode('utf-8'),
138b11f3 1364 'upload_date': upload_date,
7b7759f5 1365 'title': video_title,
1366 'stitle': simple_title,
1367 'ext': video_extension.decode('utf-8'),
6ba562b0 1368 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
7e58d568 1369 'thumbnail': video_thumbnail.decode('utf-8'),
c6b55a8d 1370 'description': video_description,
e616ec0c 1371 'player_url': player_url,
7b7759f5 1372 })
497cd3e6 1373 except UnavailableVideoError, err:
09cc744c 1374 self._downloader.trouble(u'\nERROR: unable to download video')
42bcd27d 1375
4fa74b52 1376
020f7150
RG
1377class MetacafeIE(InfoExtractor):
1378 """Information Extractor for metacafe.com."""
1379
1380 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
2546e767 1381 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
dbccb6cd 1382 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
020f7150 1383 _youtube_ie = None
f3098c4d 1384 IE_NAME = u'metacafe'
020f7150
RG
1385
1386 def __init__(self, youtube_ie, downloader=None):
1387 InfoExtractor.__init__(self, downloader)
1388 self._youtube_ie = youtube_ie
1389
020f7150
RG
1390 def report_disclaimer(self):
1391 """Report disclaimer retrieval."""
331ce0a0 1392 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
020f7150
RG
1393
1394 def report_age_confirmation(self):
1395 """Report attempt to confirm age."""
331ce0a0 1396 self._downloader.to_screen(u'[metacafe] Confirming age')
d3975459 1397
020f7150
RG
1398 def report_download_webpage(self, video_id):
1399 """Report webpage download."""
331ce0a0 1400 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
d3975459 1401
020f7150
RG
1402 def report_extraction(self, video_id):
1403 """Report information extraction."""
331ce0a0 1404 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
020f7150
RG
1405
1406 def _real_initialize(self):
1407 # Retrieve disclaimer
1987c232 1408 request = urllib2.Request(self._DISCLAIMER)
020f7150
RG
1409 try:
1410 self.report_disclaimer()
1411 disclaimer = urllib2.urlopen(request).read()
1412 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1413 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
020f7150
RG
1414 return
1415
1416 # Confirm age
1417 disclaimer_form = {
2546e767 1418 'filters': '0',
020f7150
RG
1419 'submit': "Continue - I'm over 18",
1420 }
1987c232 1421 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
020f7150
RG
1422 try:
1423 self.report_age_confirmation()
1424 disclaimer = urllib2.urlopen(request).read()
1425 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1426 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
020f7150 1427 return
d3975459 1428
020f7150
RG
1429 def _real_extract(self, url):
1430 # Extract id and simplified title from URL
1431 mobj = re.match(self._VALID_URL, url)
1432 if mobj is None:
147753eb 1433 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1434 return
020f7150
RG
1435
1436 video_id = mobj.group(1)
1437
1438 # Check if video comes from YouTube
1439 mobj2 = re.match(r'^yt-(.*)$', video_id)
1440 if mobj2 is not None:
6f21f686
RG
1441 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1442 return
020f7150 1443
df372a65 1444 # At this point we have a new video
9bf7fa52 1445 self._downloader.increment_downloads()
df372a65 1446
020f7150 1447 simple_title = mobj.group(2).decode('utf-8')
020f7150
RG
1448
1449 # Retrieve video webpage to extract further information
1450 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1451 try:
1452 self.report_download_webpage(video_id)
1453 webpage = urllib2.urlopen(request).read()
1454 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1455 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
6f21f686 1456 return
020f7150
RG
1457
1458 # Extract URL, uploader and title from webpage
1459 self.report_extraction(video_id)
18963a36 1460 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
c6c555cf
RG
1461 if mobj is not None:
1462 mediaURL = urllib.unquote(mobj.group(1))
6b57e8c5 1463 video_extension = mediaURL[-3:]
d3975459 1464
c6c555cf
RG
1465 # Extract gdaKey if available
1466 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1467 if mobj is None:
1468 video_url = mediaURL
1469 else:
1470 gdaKey = mobj.group(1)
1471 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
109626fc 1472 else:
c6c555cf
RG
1473 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1474 if mobj is None:
1475 self._downloader.trouble(u'ERROR: unable to extract media URL')
1476 return
1477 vardict = parse_qs(mobj.group(1))
1478 if 'mediaData' not in vardict:
1479 self._downloader.trouble(u'ERROR: unable to extract media URL')
1480 return
1481 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1482 if mobj is None:
1483 self._downloader.trouble(u'ERROR: unable to extract media URL')
1484 return
6b57e8c5
RG
1485 mediaURL = mobj.group(1).replace('\\/', '/')
1486 video_extension = mediaURL[-3:]
1487 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
020f7150 1488
2546e767 1489 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
020f7150 1490 if mobj is None:
147753eb 1491 self._downloader.trouble(u'ERROR: unable to extract title')
6f21f686 1492 return
020f7150 1493 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1494 video_title = sanitize_title(video_title)
020f7150 1495
29f07568 1496 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
020f7150 1497 if mobj is None:
147753eb 1498 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
6f21f686 1499 return
dbccb6cd 1500 video_uploader = mobj.group(1)
020f7150 1501
42bcd27d 1502 try:
1503 # Process video information
1504 self._downloader.process_info({
1505 'id': video_id.decode('utf-8'),
1506 'url': video_url.decode('utf-8'),
1507 'uploader': video_uploader.decode('utf-8'),
138b11f3 1508 'upload_date': u'NA',
42bcd27d 1509 'title': video_title,
1510 'stitle': simple_title,
1511 'ext': video_extension.decode('utf-8'),
6ba562b0 1512 'format': u'NA',
e616ec0c 1513 'player_url': None,
42bcd27d 1514 })
73f4e7af 1515 except UnavailableVideoError:
09cc744c 1516 self._downloader.trouble(u'\nERROR: unable to download video')
020f7150 1517
25af2bce 1518
4135fa45
WB
1519class DailymotionIE(InfoExtractor):
1520 """Information Extractor for Dailymotion"""
1521
1522 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
f3098c4d 1523 IE_NAME = u'dailymotion'
4135fa45
WB
1524
1525 def __init__(self, downloader=None):
1526 InfoExtractor.__init__(self, downloader)
1527
4135fa45
WB
1528 def report_download_webpage(self, video_id):
1529 """Report webpage download."""
331ce0a0 1530 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
d3975459 1531
4135fa45
WB
1532 def report_extraction(self, video_id):
1533 """Report information extraction."""
331ce0a0 1534 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
4135fa45
WB
1535
1536 def _real_initialize(self):
1537 return
1538
4135fa45
WB
1539 def _real_extract(self, url):
1540 # Extract id and simplified title from URL
1541 mobj = re.match(self._VALID_URL, url)
1542 if mobj is None:
1543 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1544 return
1545
df372a65 1546 # At this point we have a new video
9bf7fa52 1547 self._downloader.increment_downloads()
4135fa45
WB
1548 video_id = mobj.group(1)
1549
1550 simple_title = mobj.group(2).decode('utf-8')
1551 video_extension = 'flv'
1552
1553 # Retrieve video webpage to extract further information
1554 request = urllib2.Request(url)
62a29bbf 1555 request.add_header('Cookie', 'family_filter=off')
4135fa45
WB
1556 try:
1557 self.report_download_webpage(video_id)
1558 webpage = urllib2.urlopen(request).read()
1559 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1560 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1561 return
1562
1563 # Extract URL, uploader and title from webpage
1564 self.report_extraction(video_id)
62a29bbf 1565 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
4135fa45
WB
1566 if mobj is None:
1567 self._downloader.trouble(u'ERROR: unable to extract media URL')
1568 return
62a29bbf 1569 sequence = urllib.unquote(mobj.group(1))
1570 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1571 if mobj is None:
1572 self._downloader.trouble(u'ERROR: unable to extract media URL')
1573 return
1574 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
4135fa45
WB
1575
1576 # if needed add http://www.dailymotion.com/ if relative URL
1577
1578 video_url = mediaURL
1579
62a29bbf 1580 mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
4135fa45
WB
1581 if mobj is None:
1582 self._downloader.trouble(u'ERROR: unable to extract title')
1583 return
1584 video_title = mobj.group(1).decode('utf-8')
1585 video_title = sanitize_title(video_title)
1586
62a29bbf 1587 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
4135fa45
WB
1588 if mobj is None:
1589 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1590 return
1591 video_uploader = mobj.group(1)
1592
1593 try:
1594 # Process video information
1595 self._downloader.process_info({
1596 'id': video_id.decode('utf-8'),
1597 'url': video_url.decode('utf-8'),
1598 'uploader': video_uploader.decode('utf-8'),
138b11f3 1599 'upload_date': u'NA',
4135fa45
WB
1600 'title': video_title,
1601 'stitle': simple_title,
1602 'ext': video_extension.decode('utf-8'),
1603 'format': u'NA',
1604 'player_url': None,
1605 })
73f4e7af 1606 except UnavailableVideoError:
09cc744c 1607 self._downloader.trouble(u'\nERROR: unable to download video')
4135fa45 1608
c0a10ca8 1609
49c0028a 1610class GoogleIE(InfoExtractor):
1611 """Information extractor for video.google.com."""
1612
490fd7ae 1613 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
f3098c4d 1614 IE_NAME = u'video.google'
49c0028a 1615
1616 def __init__(self, downloader=None):
1617 InfoExtractor.__init__(self, downloader)
1618
49c0028a 1619 def report_download_webpage(self, video_id):
1620 """Report webpage download."""
331ce0a0 1621 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
49c0028a 1622
1623 def report_extraction(self, video_id):
1624 """Report information extraction."""
331ce0a0 1625 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
49c0028a 1626
1627 def _real_initialize(self):
1628 return
1629
1630 def _real_extract(self, url):
1631 # Extract id from URL
1632 mobj = re.match(self._VALID_URL, url)
1633 if mobj is None:
1634 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1635 return
1636
df372a65 1637 # At this point we have a new video
9bf7fa52 1638 self._downloader.increment_downloads()
49c0028a 1639 video_id = mobj.group(1)
1640
1641 video_extension = 'mp4'
1642
1643 # Retrieve video webpage to extract further information
490fd7ae 1644 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
49c0028a 1645 try:
1646 self.report_download_webpage(video_id)
1647 webpage = urllib2.urlopen(request).read()
1648 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1649 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1650 return
1651
1652 # Extract URL, uploader, and title from webpage
1653 self.report_extraction(video_id)
490fd7ae
RG
1654 mobj = re.search(r"download_url:'([^']+)'", webpage)
1655 if mobj is None:
1656 video_extension = 'flv'
1657 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
49c0028a 1658 if mobj is None:
1659 self._downloader.trouble(u'ERROR: unable to extract media URL')
1660 return
1661 mediaURL = urllib.unquote(mobj.group(1))
1662 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1663 mediaURL = mediaURL.replace('\\x26', '\x26')
1664
1665 video_url = mediaURL
1666
1667 mobj = re.search(r'<title>(.*)</title>', webpage)
1668 if mobj is None:
1669 self._downloader.trouble(u'ERROR: unable to extract title')
1670 return
1671 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1672 video_title = sanitize_title(video_title)
31cbdaaf 1673 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1674
7e58d568
RG
1675 # Extract video description
1676 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1677 if mobj is None:
1678 self._downloader.trouble(u'ERROR: unable to extract video description')
1679 return
1680 video_description = mobj.group(1).decode('utf-8')
1681 if not video_description:
1682 video_description = 'No description available.'
1683
1684 # Extract video thumbnail
1685 if self._downloader.params.get('forcethumbnail', False):
1686 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1687 try:
1688 webpage = urllib2.urlopen(request).read()
1689 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1690 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1691 return
1692 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1693 if mobj is None:
1694 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1695 return
1696 video_thumbnail = mobj.group(1)
1697 else: # we need something to pass to process_info
1698 video_thumbnail = ''
1699
49c0028a 1700 try:
1701 # Process video information
1702 self._downloader.process_info({
1703 'id': video_id.decode('utf-8'),
1704 'url': video_url.decode('utf-8'),
6ba562b0 1705 'uploader': u'NA',
138b11f3 1706 'upload_date': u'NA',
490fd7ae 1707 'title': video_title,
31cbdaaf 1708 'stitle': simple_title,
49c0028a 1709 'ext': video_extension.decode('utf-8'),
6ba562b0 1710 'format': u'NA',
e616ec0c 1711 'player_url': None,
49c0028a 1712 })
73f4e7af 1713 except UnavailableVideoError:
09cc744c 1714 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 1715
1716
1717class PhotobucketIE(InfoExtractor):
1718 """Information extractor for photobucket.com."""
1719
1720 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
f3098c4d 1721 IE_NAME = u'photobucket'
49c0028a 1722
1723 def __init__(self, downloader=None):
1724 InfoExtractor.__init__(self, downloader)
1725
49c0028a 1726 def report_download_webpage(self, video_id):
1727 """Report webpage download."""
331ce0a0 1728 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
49c0028a 1729
1730 def report_extraction(self, video_id):
1731 """Report information extraction."""
331ce0a0 1732 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
49c0028a 1733
1734 def _real_initialize(self):
1735 return
1736
1737 def _real_extract(self, url):
1738 # Extract id from URL
1739 mobj = re.match(self._VALID_URL, url)
1740 if mobj is None:
1741 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1742 return
1743
df372a65 1744 # At this point we have a new video
9bf7fa52 1745 self._downloader.increment_downloads()
49c0028a 1746 video_id = mobj.group(1)
1747
1748 video_extension = 'flv'
1749
1750 # Retrieve video webpage to extract further information
1751 request = urllib2.Request(url)
1752 try:
1753 self.report_download_webpage(video_id)
1754 webpage = urllib2.urlopen(request).read()
1755 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1756 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1757 return
1758
1759 # Extract URL, uploader, and title from webpage
1760 self.report_extraction(video_id)
1761 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1762 if mobj is None:
1763 self._downloader.trouble(u'ERROR: unable to extract media URL')
1764 return
1765 mediaURL = urllib.unquote(mobj.group(1))
1766
1767 video_url = mediaURL
1768
1769 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1770 if mobj is None:
1771 self._downloader.trouble(u'ERROR: unable to extract title')
1772 return
1773 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1774 video_title = sanitize_title(video_title)
31cbdaaf 1775 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1776
1777 video_uploader = mobj.group(2).decode('utf-8')
1778
1779 try:
1780 # Process video information
1781 self._downloader.process_info({
1782 'id': video_id.decode('utf-8'),
1783 'url': video_url.decode('utf-8'),
490fd7ae 1784 'uploader': video_uploader,
138b11f3 1785 'upload_date': u'NA',
490fd7ae 1786 'title': video_title,
31cbdaaf 1787 'stitle': simple_title,
490fd7ae 1788 'ext': video_extension.decode('utf-8'),
6ba562b0 1789 'format': u'NA',
e616ec0c 1790 'player_url': None,
490fd7ae 1791 })
73f4e7af 1792 except UnavailableVideoError:
09cc744c 1793 self._downloader.trouble(u'\nERROR: unable to download video')
490fd7ae
RG
1794
1795
61945318
RG
1796class YahooIE(InfoExtractor):
1797 """Information extractor for video.yahoo.com."""
1798
1799 # _VALID_URL matches all Yahoo! Video URLs
1800 # _VPAGE_URL matches only the extractable '/watch/' URLs
1801 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1802 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
f3098c4d 1803 IE_NAME = u'video.yahoo'
61945318
RG
1804
1805 def __init__(self, downloader=None):
1806 InfoExtractor.__init__(self, downloader)
1807
61945318
RG
1808 def report_download_webpage(self, video_id):
1809 """Report webpage download."""
331ce0a0 1810 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
61945318
RG
1811
1812 def report_extraction(self, video_id):
1813 """Report information extraction."""
331ce0a0 1814 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
61945318
RG
1815
1816 def _real_initialize(self):
1817 return
1818
df372a65 1819 def _real_extract(self, url, new_video=True):
61945318
RG
1820 # Extract ID from URL
1821 mobj = re.match(self._VALID_URL, url)
1822 if mobj is None:
1823 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1824 return
1825
df372a65 1826 # At this point we have a new video
9bf7fa52 1827 self._downloader.increment_downloads()
61945318
RG
1828 video_id = mobj.group(2)
1829 video_extension = 'flv'
1830
1831 # Rewrite valid but non-extractable URLs as
1832 # extractable English language /watch/ URLs
1833 if re.match(self._VPAGE_URL, url) is None:
1834 request = urllib2.Request(url)
1835 try:
1836 webpage = urllib2.urlopen(request).read()
1837 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1838 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1839 return
1840
1841 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1842 if mobj is None:
1843 self._downloader.trouble(u'ERROR: Unable to extract id field')
1844 return
1845 yahoo_id = mobj.group(1)
1846
1847 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1848 if mobj is None:
1849 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1850 return
1851 yahoo_vid = mobj.group(1)
1852
1853 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
df372a65 1854 return self._real_extract(url, new_video=False)
61945318
RG
1855
1856 # Retrieve video webpage to extract further information
1857 request = urllib2.Request(url)
1858 try:
1859 self.report_download_webpage(video_id)
1860 webpage = urllib2.urlopen(request).read()
1861 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1862 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1863 return
1864
1865 # Extract uploader and title from webpage
1866 self.report_extraction(video_id)
1867 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1868 if mobj is None:
1869 self._downloader.trouble(u'ERROR: unable to extract video title')
1870 return
1871 video_title = mobj.group(1).decode('utf-8')
1872 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1873
1874 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1875 if mobj is None:
1876 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1877 return
1878 video_uploader = mobj.group(1).decode('utf-8')
1879
7e58d568
RG
1880 # Extract video thumbnail
1881 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1882 if mobj is None:
1883 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1884 return
1885 video_thumbnail = mobj.group(1).decode('utf-8')
1886
1887 # Extract video description
1888 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1889 if mobj is None:
1890 self._downloader.trouble(u'ERROR: unable to extract video description')
1891 return
1892 video_description = mobj.group(1).decode('utf-8')
c0a10ca8
F
1893 if not video_description:
1894 video_description = 'No description available.'
7e58d568 1895
61945318
RG
1896 # Extract video height and width
1897 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1898 if mobj is None:
1899 self._downloader.trouble(u'ERROR: unable to extract video height')
1900 return
1901 yv_video_height = mobj.group(1)
1902
1903 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1904 if mobj is None:
1905 self._downloader.trouble(u'ERROR: unable to extract video width')
1906 return
1907 yv_video_width = mobj.group(1)
1908
1909 # Retrieve video playlist to extract media URL
1910 # I'm not completely sure what all these options are, but we
1911 # seem to need most of them, otherwise the server sends a 401.
1912 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1913 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1914 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
c0a10ca8
F
1915 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1916 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
61945318
RG
1917 try:
1918 self.report_download_webpage(video_id)
1919 webpage = urllib2.urlopen(request).read()
1920 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1921 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1922 return
1923
1924 # Extract media URL from playlist XML
1925 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1926 if mobj is None:
1927 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1928 return
1929 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1930 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1931
1932 try:
1933 # Process video information
1934 self._downloader.process_info({
1935 'id': video_id.decode('utf-8'),
1936 'url': video_url,
1937 'uploader': video_uploader,
138b11f3 1938 'upload_date': u'NA',
61945318
RG
1939 'title': video_title,
1940 'stitle': simple_title,
1941 'ext': video_extension.decode('utf-8'),
7e58d568
RG
1942 'thumbnail': video_thumbnail.decode('utf-8'),
1943 'description': video_description,
1944 'thumbnail': video_thumbnail,
e616ec0c 1945 'player_url': None,
61945318 1946 })
73f4e7af 1947 except UnavailableVideoError:
09cc744c 1948 self._downloader.trouble(u'\nERROR: unable to download video')
61945318
RG
1949
1950
92743d42
RB
1951class VimeoIE(InfoExtractor):
1952 """Information extractor for vimeo.com."""
1953
1954 # _VALID_URL matches Vimeo URLs
44c636df 1955 _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
f3098c4d 1956 IE_NAME = u'vimeo'
92743d42
RB
1957
1958 def __init__(self, downloader=None):
1959 InfoExtractor.__init__(self, downloader)
1960
92743d42
RB
1961 def report_download_webpage(self, video_id):
1962 """Report webpage download."""
0ecedbdb 1963 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
92743d42
RB
1964
1965 def report_extraction(self, video_id):
1966 """Report information extraction."""
0ecedbdb 1967 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
92743d42
RB
1968
1969 def _real_initialize(self):
1970 return
1971
1972 def _real_extract(self, url, new_video=True):
1973 # Extract ID from URL
1974 mobj = re.match(self._VALID_URL, url)
1975 if mobj is None:
1976 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1977 return
1978
1979 # At this point we have a new video
1980 self._downloader.increment_downloads()
1981 video_id = mobj.group(1)
92743d42
RB
1982
1983 # Retrieve video webpage to extract further information
1984 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
1985 try:
1986 self.report_download_webpage(video_id)
1987 webpage = urllib2.urlopen(request).read()
1988 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1989 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1990 return
1991
f24c674b
RB
1992 # Now we begin extracting as much information as we can from what we
1993 # retrieved. First we extract the information common to all extractors,
1994 # and latter we extract those that are Vimeo specific.
92743d42 1995 self.report_extraction(video_id)
f24c674b
RB
1996
1997 # Extract title
c5a088d3 1998 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
92743d42
RB
1999 if mobj is None:
2000 self._downloader.trouble(u'ERROR: unable to extract video title')
2001 return
2002 video_title = mobj.group(1).decode('utf-8')
2003 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2004
f24c674b 2005 # Extract uploader
c5a088d3 2006 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
92743d42
RB
2007 if mobj is None:
2008 self._downloader.trouble(u'ERROR: unable to extract video uploader')
2009 return
2010 video_uploader = mobj.group(1).decode('utf-8')
2011
2012 # Extract video thumbnail
c5a088d3 2013 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
92743d42
RB
2014 if mobj is None:
2015 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
2016 return
2017 video_thumbnail = mobj.group(1).decode('utf-8')
2018
2019 # # Extract video description
2020 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
2021 # if mobj is None:
2022 # self._downloader.trouble(u'ERROR: unable to extract video description')
2023 # return
2024 # video_description = mobj.group(1).decode('utf-8')
2025 # if not video_description: video_description = 'No description available.'
2026 video_description = 'Foo.'
2027
f24c674b 2028 # Vimeo specific: extract request signature
c5a088d3 2029 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
92743d42
RB
2030 if mobj is None:
2031 self._downloader.trouble(u'ERROR: unable to extract request signature')
2032 return
2033 sig = mobj.group(1).decode('utf-8')
2034
f24c674b 2035 # Vimeo specific: Extract request signature expiration
c5a088d3 2036 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
92743d42
RB
2037 if mobj is None:
2038 self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2039 return
2040 sig_exp = mobj.group(1).decode('utf-8')
2041
2042 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
2043
2044 try:
2045 # Process video information
2046 self._downloader.process_info({
2047 'id': video_id.decode('utf-8'),
2048 'url': video_url,
2049 'uploader': video_uploader,
2050 'upload_date': u'NA',
2051 'title': video_title,
2052 'stitle': simple_title,
2fc31a48 2053 'ext': u'mp4',
92743d42
RB
2054 'thumbnail': video_thumbnail.decode('utf-8'),
2055 'description': video_description,
2056 'thumbnail': video_thumbnail,
2057 'description': video_description,
2058 'player_url': None,
2059 })
2060 except UnavailableVideoError:
2061 self._downloader.trouble(u'ERROR: unable to download video')
2062
2063
490fd7ae
RG
2064class GenericIE(InfoExtractor):
2065 """Generic last-resort information extractor."""
2066
f3098c4d
PH
2067 _VALID_URL = r'.*'
2068 IE_NAME = u'generic'
bdb3f7a7 2069
490fd7ae
RG
2070 def __init__(self, downloader=None):
2071 InfoExtractor.__init__(self, downloader)
2072
490fd7ae
RG
2073 def report_download_webpage(self, video_id):
2074 """Report webpage download."""
331ce0a0
RG
2075 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2076 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
490fd7ae
RG
2077
2078 def report_extraction(self, video_id):
2079 """Report information extraction."""
331ce0a0 2080 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
490fd7ae
RG
2081
2082 def _real_initialize(self):
2083 return
2084
2085 def _real_extract(self, url):
df372a65 2086 # At this point we have a new video
9bf7fa52 2087 self._downloader.increment_downloads()
df372a65 2088
490fd7ae
RG
2089 video_id = url.split('/')[-1]
2090 request = urllib2.Request(url)
2091 try:
2092 self.report_download_webpage(video_id)
2093 webpage = urllib2.urlopen(request).read()
2094 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2095 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2096 return
2097 except ValueError, err:
2098 # since this is the last-resort InfoExtractor, if
2099 # this error is thrown, it'll be thrown here
2100 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2101 return
2102
a9806fd8 2103 self.report_extraction(video_id)
490fd7ae
RG
2104 # Start with something easy: JW Player in SWFObject
2105 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2106 if mobj is None:
2107 # Broaden the search a little bit
2108 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2109 if mobj is None:
2110 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2111 return
2112
2113 # It's possible that one of the regexes
2114 # matched, but returned an empty group:
2115 if mobj.group(1) is None:
2116 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2117 return
2118
2119 video_url = urllib.unquote(mobj.group(1))
c0a10ca8 2120 video_id = os.path.basename(video_url)
490fd7ae
RG
2121
2122 # here's a fun little line of code for you:
2123 video_extension = os.path.splitext(video_id)[1][1:]
c0a10ca8 2124 video_id = os.path.splitext(video_id)[0]
490fd7ae
RG
2125
2126 # it's tempting to parse this further, but you would
2127 # have to take into account all the variations like
2128 # Video Title - Site Name
2129 # Site Name | Video Title
2130 # Video Title - Tagline | Site Name
2131 # and so on and so forth; it's just not practical
2132 mobj = re.search(r'<title>(.*)</title>', webpage)
2133 if mobj is None:
2134 self._downloader.trouble(u'ERROR: unable to extract title')
2135 return
2136 video_title = mobj.group(1).decode('utf-8')
2137 video_title = sanitize_title(video_title)
31cbdaaf 2138 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
490fd7ae
RG
2139
2140 # video uploader is domain name
2141 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2142 if mobj is None:
2143 self._downloader.trouble(u'ERROR: unable to extract title')
2144 return
2145 video_uploader = mobj.group(1).decode('utf-8')
2146
2147 try:
2148 # Process video information
2149 self._downloader.process_info({
2150 'id': video_id.decode('utf-8'),
2151 'url': video_url.decode('utf-8'),
2152 'uploader': video_uploader,
138b11f3 2153 'upload_date': u'NA',
490fd7ae 2154 'title': video_title,
31cbdaaf 2155 'stitle': simple_title,
49c0028a 2156 'ext': video_extension.decode('utf-8'),
6ba562b0 2157 'format': u'NA',
e616ec0c 2158 'player_url': None,
49c0028a 2159 })
73f4e7af 2160 except UnavailableVideoError, err:
09cc744c 2161 self._downloader.trouble(u'\nERROR: unable to download video')
49c0028a 2162
2163
25af2bce
RG
2164class YoutubeSearchIE(InfoExtractor):
2165 """Information Extractor for YouTube search queries."""
bdb3f7a7 2166 _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
25af2bce
RG
2167 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2168 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
304a4d85 2169 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
25af2bce 2170 _youtube_ie = None
fd9288c3 2171 _max_youtube_results = 1000
f3098c4d 2172 IE_NAME = u'youtube:search'
25af2bce 2173
f995f712 2174 def __init__(self, youtube_ie, downloader=None):
25af2bce
RG
2175 InfoExtractor.__init__(self, downloader)
2176 self._youtube_ie = youtube_ie
d3975459 2177
25af2bce
RG
2178 def report_download_page(self, query, pagenum):
2179 """Report attempt to download playlist page with given number."""
490fd7ae 2180 query = query.decode(preferredencoding())
331ce0a0 2181 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
25af2bce
RG
2182
2183 def _real_initialize(self):
2184 self._youtube_ie.initialize()
d3975459 2185
25af2bce 2186 def _real_extract(self, query):
bdb3f7a7 2187 mobj = re.match(self._VALID_URL, query)
25af2bce 2188 if mobj is None:
147753eb 2189 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
6f21f686 2190 return
25af2bce
RG
2191
2192 prefix, query = query.split(':')
2193 prefix = prefix[8:]
c0a10ca8 2194 query = query.encode('utf-8')
f995f712 2195 if prefix == '':
6f21f686
RG
2196 self._download_n_results(query, 1)
2197 return
f995f712 2198 elif prefix == 'all':
6f21f686
RG
2199 self._download_n_results(query, self._max_youtube_results)
2200 return
f995f712 2201 else:
25af2bce 2202 try:
e1f18b8a 2203 n = long(prefix)
25af2bce 2204 if n <= 0:
147753eb 2205 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
6f21f686 2206 return
257453b9 2207 elif n > self._max_youtube_results:
c0a10ca8 2208 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
257453b9 2209 n = self._max_youtube_results
6f21f686
RG
2210 self._download_n_results(query, n)
2211 return
e1f18b8a 2212 except ValueError: # parsing prefix as integer fails
6f21f686
RG
2213 self._download_n_results(query, 1)
2214 return
25af2bce
RG
2215
2216 def _download_n_results(self, query, n):
2217 """Downloads a specified number of results for a query"""
2218
2219 video_ids = []
2220 already_seen = set()
2221 pagenum = 1
2222
2223 while True:
2224 self.report_download_page(query, pagenum)
a9633f14 2225 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2226 request = urllib2.Request(result_url)
25af2bce
RG
2227 try:
2228 page = urllib2.urlopen(request).read()
2229 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2230 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2231 return
25af2bce
RG
2232
2233 # Extract video identifiers
2234 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2235 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2236 if video_id not in already_seen:
2237 video_ids.append(video_id)
2238 already_seen.add(video_id)
2239 if len(video_ids) == n:
2240 # Specified n videos reached
25af2bce 2241 for id in video_ids:
6f21f686
RG
2242 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2243 return
25af2bce 2244
304a4d85 2245 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
25af2bce 2246 for id in video_ids:
6f21f686
RG
2247 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2248 return
25af2bce
RG
2249
2250 pagenum = pagenum + 1
2251
c0a10ca8 2252
7e58d568
RG
2253class GoogleSearchIE(InfoExtractor):
2254 """Information Extractor for Google Video search queries."""
bdb3f7a7 2255 _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
7e58d568
RG
2256 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2257 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2258 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2259 _google_ie = None
2260 _max_google_results = 1000
f3098c4d 2261 IE_NAME = u'video.google:search'
7e58d568
RG
2262
2263 def __init__(self, google_ie, downloader=None):
2264 InfoExtractor.__init__(self, downloader)
2265 self._google_ie = google_ie
d3975459 2266
7e58d568
RG
2267 def report_download_page(self, query, pagenum):
2268 """Report attempt to download playlist page with given number."""
2269 query = query.decode(preferredencoding())
331ce0a0 2270 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2271
2272 def _real_initialize(self):
2273 self._google_ie.initialize()
d3975459 2274
7e58d568 2275 def _real_extract(self, query):
bdb3f7a7 2276 mobj = re.match(self._VALID_URL, query)
7e58d568
RG
2277 if mobj is None:
2278 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2279 return
2280
2281 prefix, query = query.split(':')
2282 prefix = prefix[8:]
c0a10ca8 2283 query = query.encode('utf-8')
7e58d568
RG
2284 if prefix == '':
2285 self._download_n_results(query, 1)
2286 return
2287 elif prefix == 'all':
2288 self._download_n_results(query, self._max_google_results)
2289 return
2290 else:
2291 try:
2292 n = long(prefix)
2293 if n <= 0:
2294 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2295 return
2296 elif n > self._max_google_results:
c0a10ca8 2297 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
7e58d568
RG
2298 n = self._max_google_results
2299 self._download_n_results(query, n)
2300 return
2301 except ValueError: # parsing prefix as integer fails
2302 self._download_n_results(query, 1)
2303 return
2304
2305 def _download_n_results(self, query, n):
2306 """Downloads a specified number of results for a query"""
2307
2308 video_ids = []
2309 already_seen = set()
2310 pagenum = 1
2311
2312 while True:
2313 self.report_download_page(query, pagenum)
2314 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2315 request = urllib2.Request(result_url)
7e58d568
RG
2316 try:
2317 page = urllib2.urlopen(request).read()
2318 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2319 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2320 return
2321
2322 # Extract video identifiers
2323 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2324 video_id = mobj.group(1)
2325 if video_id not in already_seen:
2326 video_ids.append(video_id)
2327 already_seen.add(video_id)
2328 if len(video_ids) == n:
2329 # Specified n videos reached
2330 for id in video_ids:
2331 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2332 return
2333
2334 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2335 for id in video_ids:
2336 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2337 return
2338
2339 pagenum = pagenum + 1
2340
c0a10ca8 2341
7e58d568
RG
2342class YahooSearchIE(InfoExtractor):
2343 """Information Extractor for Yahoo! Video search queries."""
bdb3f7a7 2344 _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
7e58d568
RG
2345 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2346 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2347 _MORE_PAGES_INDICATOR = r'\s*Next'
2348 _yahoo_ie = None
2349 _max_yahoo_results = 1000
f3098c4d 2350 IE_NAME = u'video.yahoo:search'
7e58d568
RG
2351
2352 def __init__(self, yahoo_ie, downloader=None):
2353 InfoExtractor.__init__(self, downloader)
2354 self._yahoo_ie = yahoo_ie
d3975459 2355
7e58d568
RG
2356 def report_download_page(self, query, pagenum):
2357 """Report attempt to download playlist page with given number."""
2358 query = query.decode(preferredencoding())
331ce0a0 2359 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
7e58d568
RG
2360
2361 def _real_initialize(self):
2362 self._yahoo_ie.initialize()
d3975459 2363
7e58d568 2364 def _real_extract(self, query):
bdb3f7a7 2365 mobj = re.match(self._VALID_URL, query)
7e58d568
RG
2366 if mobj is None:
2367 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2368 return
2369
2370 prefix, query = query.split(':')
2371 prefix = prefix[8:]
c0a10ca8 2372 query = query.encode('utf-8')
7e58d568
RG
2373 if prefix == '':
2374 self._download_n_results(query, 1)
2375 return
2376 elif prefix == 'all':
2377 self._download_n_results(query, self._max_yahoo_results)
2378 return
2379 else:
2380 try:
2381 n = long(prefix)
2382 if n <= 0:
2383 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2384 return
2385 elif n > self._max_yahoo_results:
c0a10ca8 2386 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
7e58d568
RG
2387 n = self._max_yahoo_results
2388 self._download_n_results(query, n)
2389 return
2390 except ValueError: # parsing prefix as integer fails
2391 self._download_n_results(query, 1)
2392 return
2393
2394 def _download_n_results(self, query, n):
2395 """Downloads a specified number of results for a query"""
2396
2397 video_ids = []
2398 already_seen = set()
2399 pagenum = 1
2400
2401 while True:
2402 self.report_download_page(query, pagenum)
2403 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1987c232 2404 request = urllib2.Request(result_url)
7e58d568
RG
2405 try:
2406 page = urllib2.urlopen(request).read()
2407 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2408 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2409 return
2410
2411 # Extract video identifiers
2412 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2413 video_id = mobj.group(1)
2414 if video_id not in already_seen:
2415 video_ids.append(video_id)
2416 already_seen.add(video_id)
2417 if len(video_ids) == n:
2418 # Specified n videos reached
2419 for id in video_ids:
2420 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2421 return
2422
2423 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2424 for id in video_ids:
2425 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2426 return
2427
2428 pagenum = pagenum + 1
2429
c0a10ca8 2430
0c2dc87d
RG
2431class YoutubePlaylistIE(InfoExtractor):
2432 """Information Extractor for YouTube playlists."""
2433
2152ee86 2434 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
f74e22ae 2435 _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
0c2dc87d 2436 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
ce5cafea 2437 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
0c2dc87d 2438 _youtube_ie = None
f3098c4d 2439 IE_NAME = u'youtube:playlist'
0c2dc87d
RG
2440
2441 def __init__(self, youtube_ie, downloader=None):
2442 InfoExtractor.__init__(self, downloader)
2443 self._youtube_ie = youtube_ie
d3975459 2444
0c2dc87d
RG
2445 def report_download_page(self, playlist_id, pagenum):
2446 """Report attempt to download playlist page with given number."""
331ce0a0 2447 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
0c2dc87d
RG
2448
2449 def _real_initialize(self):
2450 self._youtube_ie.initialize()
d3975459 2451
0c2dc87d
RG
2452 def _real_extract(self, url):
2453 # Extract playlist id
2454 mobj = re.match(self._VALID_URL, url)
2455 if mobj is None:
147753eb 2456 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
6f21f686 2457 return
0c2dc87d 2458
d119b54d
RG
2459 # Single video case
2460 if mobj.group(3) is not None:
2461 self._youtube_ie.extract(mobj.group(3))
2462 return
2463
0c2dc87d 2464 # Download playlist pages
f74e22ae
GI
2465 # prefix is 'p' as default for playlists but there are other types that need extra care
2466 playlist_prefix = mobj.group(1)
2467 if playlist_prefix == 'a':
2468 playlist_access = 'artist'
2469 else:
7cc3c6fd 2470 playlist_prefix = 'p'
f74e22ae
GI
2471 playlist_access = 'view_play_list'
2472 playlist_id = mobj.group(2)
0c2dc87d
RG
2473 video_ids = []
2474 pagenum = 1
2475
2476 while True:
2477 self.report_download_page(playlist_id, pagenum)
f74e22ae 2478 request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
0c2dc87d
RG
2479 try:
2480 page = urllib2.urlopen(request).read()
2481 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 2482 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 2483 return
0c2dc87d
RG
2484
2485 # Extract video identifiers
27d98b6e 2486 ids_in_page = []
0c2dc87d 2487 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
27d98b6e
RG
2488 if mobj.group(1) not in ids_in_page:
2489 ids_in_page.append(mobj.group(1))
2490 video_ids.extend(ids_in_page)
0c2dc87d 2491
ce5cafea 2492 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
0c2dc87d
RG
2493 break
2494 pagenum = pagenum + 1
2495
8cc44341
RG
2496 playliststart = self._downloader.params.get('playliststart', 1) - 1
2497 playlistend = self._downloader.params.get('playlistend', -1)
2498 video_ids = video_ids[playliststart:playlistend]
2499
0c2dc87d 2500 for id in video_ids:
6f21f686
RG
2501 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2502 return
0c2dc87d 2503
c0a10ca8 2504
c39c05cd
A
2505class YoutubeUserIE(InfoExtractor):
2506 """Information Extractor for YouTube users."""
2507
5aba6ea4 2508 _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
c39c05cd 2509 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
5aba6ea4
RG
2510 _GDATA_PAGE_SIZE = 50
2511 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2512 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
c39c05cd 2513 _youtube_ie = None
f3098c4d 2514 IE_NAME = u'youtube:user'
c39c05cd
A
2515
2516 def __init__(self, youtube_ie, downloader=None):
2517 InfoExtractor.__init__(self, downloader)
2518 self._youtube_ie = youtube_ie
d3975459 2519
5aba6ea4 2520 def report_download_page(self, username, start_index):
c39c05cd 2521 """Report attempt to download user page."""
5aba6ea4 2522 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
c0a10ca8 2523 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
c39c05cd
A
2524
2525 def _real_initialize(self):
2526 self._youtube_ie.initialize()
d3975459 2527
c39c05cd
A
2528 def _real_extract(self, url):
2529 # Extract username
2530 mobj = re.match(self._VALID_URL, url)
2531 if mobj is None:
2532 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2533 return
2534
c39c05cd 2535 username = mobj.group(1)
5aba6ea4
RG
2536
2537 # Download video ids using YouTube Data API. Result size per
2538 # query is limited (currently to 50 videos) so we need to query
2539 # page by page until there are no video ids - it means we got
2540 # all of them.
2541
c39c05cd 2542 video_ids = []
5aba6ea4 2543 pagenum = 0
c39c05cd 2544
5aba6ea4
RG
2545 while True:
2546 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2547 self.report_download_page(username, start_index)
c39c05cd 2548
5aba6ea4 2549 request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
c39c05cd 2550
5aba6ea4
RG
2551 try:
2552 page = urllib2.urlopen(request).read()
2553 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2554 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2555 return
c39c05cd 2556
5aba6ea4
RG
2557 # Extract video identifiers
2558 ids_in_page = []
2559
2560 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2561 if mobj.group(1) not in ids_in_page:
2562 ids_in_page.append(mobj.group(1))
2563
2564 video_ids.extend(ids_in_page)
2565
2566 # A little optimization - if current page is not
2567 # "full", ie. does not contain PAGE_SIZE video ids then
2568 # we can assume that this page is the last one - there
2569 # are no more ids on further pages - no need to query
2570 # again.
2571
2572 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2573 break
2574
2575 pagenum += 1
2576
2577 all_ids_count = len(video_ids)
8cc44341
RG
2578 playliststart = self._downloader.params.get('playliststart', 1) - 1
2579 playlistend = self._downloader.params.get('playlistend', -1)
204c9398 2580
5aba6ea4
RG
2581 if playlistend == -1:
2582 video_ids = video_ids[playliststart:]
2583 else:
2584 video_ids = video_ids[playliststart:playlistend]
7a9054ec 2585
5aba6ea4 2586 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
c0a10ca8 2587 (username, all_ids_count, len(video_ids)))
5aba6ea4
RG
2588
2589 for video_id in video_ids:
2590 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2591
c39c05cd 2592
27179cfd
VV
2593class DepositFilesIE(InfoExtractor):
2594 """Information extractor for depositfiles.com"""
2595
2596 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
f3098c4d 2597 IE_NAME = u'DepositFiles'
27179cfd
VV
2598
2599 def __init__(self, downloader=None):
2600 InfoExtractor.__init__(self, downloader)
2601
27179cfd
VV
2602 def report_download_webpage(self, file_id):
2603 """Report webpage download."""
2604 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2605
2606 def report_extraction(self, file_id):
2607 """Report information extraction."""
2608 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2609
2610 def _real_initialize(self):
2611 return
2612
2613 def _real_extract(self, url):
2614 # At this point we have a new file
2615 self._downloader.increment_downloads()
2616
2617 file_id = url.split('/')[-1]
2618 # Rebuild url in english locale
2619 url = 'http://depositfiles.com/en/files/' + file_id
2620
2621 # Retrieve file webpage with 'Free download' button pressed
2622 free_download_indication = { 'gateway_result' : '1' }
1987c232 2623 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
27179cfd
VV
2624 try:
2625 self.report_download_webpage(file_id)
2626 webpage = urllib2.urlopen(request).read()
2627 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2628 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2629 return
2630
2631 # Search for the real file URL
2632 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2633 if (mobj is None) or (mobj.group(1) is None):
2634 # Try to figure out reason of the error.
2635 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2636 if (mobj is not None) and (mobj.group(1) is not None):
2637 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2638 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2639 else:
2640 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2641 return
2642
2643 file_url = mobj.group(1)
2644 file_extension = os.path.splitext(file_url)[1][1:]
2645
2646 # Search for file title
2647 mobj = re.search(r'<b title="(.*?)">', webpage)
2648 if mobj is None:
2649 self._downloader.trouble(u'ERROR: unable to extract title')
2650 return
2651 file_title = mobj.group(1).decode('utf-8')
2652
2653 try:
2654 # Process file information
2655 self._downloader.process_info({
2656 'id': file_id.decode('utf-8'),
2657 'url': file_url.decode('utf-8'),
2658 'uploader': u'NA',
2659 'upload_date': u'NA',
2660 'title': file_title,
2661 'stitle': file_title,
2662 'ext': file_extension.decode('utf-8'),
2663 'format': u'NA',
2664 'player_url': None,
2665 })
2666 except UnavailableVideoError, err:
2667 self._downloader.trouble(u'ERROR: unable to download file')
2668
c0a10ca8 2669
9f5f9602
GI
2670class FacebookIE(InfoExtractor):
2671 """Information Extractor for Facebook"""
2672
2673 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2674 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2675 _NETRC_MACHINE = 'facebook'
2676 _available_formats = ['highqual', 'lowqual']
2677 _video_extensions = {
2678 'highqual': 'mp4',
2679 'lowqual': 'mp4',
2680 }
f3098c4d 2681 IE_NAME = u'facebook'
9f5f9602
GI
2682
2683 def __init__(self, downloader=None):
2684 InfoExtractor.__init__(self, downloader)
2685
9f5f9602
GI
2686 def _reporter(self, message):
2687 """Add header and report message."""
2688 self._downloader.to_screen(u'[facebook] %s' % message)
2689
2690 def report_login(self):
2691 """Report attempt to log in."""
2692 self._reporter(u'Logging in')
2693
2694 def report_video_webpage_download(self, video_id):
2695 """Report attempt to download video webpage."""
2696 self._reporter(u'%s: Downloading video webpage' % video_id)
2697
2698 def report_information_extraction(self, video_id):
2699 """Report attempt to extract video information."""
2700 self._reporter(u'%s: Extracting video information' % video_id)
2701
2702 def _parse_page(self, video_webpage):
2703 """Extract video information from page"""
2704 # General data
2705 data = {'title': r'class="video_title datawrap">(.*?)</',
2706 'description': r'<div class="datawrap">(.*?)</div>',
2707 'owner': r'\("video_owner_name", "(.*?)"\)',
2708 'upload_date': r'data-date="(.*?)"',
2709 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2710 }
2711 video_info = {}
2712 for piece in data.keys():
2713 mobj = re.search(data[piece], video_webpage)
2714 if mobj is not None:
2715 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2716
2717 # Video urls
2718 video_urls = {}
2719 for fmt in self._available_formats:
2720 mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2721 if mobj is not None:
2722 # URL is in a Javascript segment inside an escaped Unicode format within
2723 # the generally utf-8 page
2724 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2725 video_info['video_urls'] = video_urls
2726
2727 return video_info
2728
2729 def _real_initialize(self):
2730 if self._downloader is None:
2731 return
2732
2733 useremail = None
2734 password = None
2735 downloader_params = self._downloader.params
2736
2737 # Attempt to use provided username and password or .netrc data
2738 if downloader_params.get('username', None) is not None:
2739 useremail = downloader_params['username']
2740 password = downloader_params['password']
2741 elif downloader_params.get('usenetrc', False):
2742 try:
2743 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2744 if info is not None:
2745 useremail = info[0]
2746 password = info[2]
2747 else:
2748 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2749 except (IOError, netrc.NetrcParseError), err:
2750 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2751 return
2752
2753 if useremail is None:
2754 return
2755
2756 # Log in
2757 login_form = {
2758 'email': useremail,
2759 'pass': password,
2760 'login': 'Log+In'
2761 }
2762 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2763 try:
2764 self.report_login()
2765 login_results = urllib2.urlopen(request).read()
2766 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2767 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2768 return
2769 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2770 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2771 return
2772
2773 def _real_extract(self, url):
2774 mobj = re.match(self._VALID_URL, url)
2775 if mobj is None:
2776 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2777 return
2778 video_id = mobj.group('ID')
2779
2780 # Get video webpage
2781 self.report_video_webpage_download(video_id)
2782 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2783 try:
2784 page = urllib2.urlopen(request)
2785 video_webpage = page.read()
2786 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2787 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2788 return
2789
2790 # Start extracting information
2791 self.report_information_extraction(video_id)
2792
2793 # Extract information
2794 video_info = self._parse_page(video_webpage)
2795
2796 # uploader
2797 if 'owner' not in video_info:
2798 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2799 return
2800 video_uploader = video_info['owner']
2801
2802 # title
2803 if 'title' not in video_info:
2804 self._downloader.trouble(u'ERROR: unable to extract video title')
2805 return
2806 video_title = video_info['title']
2807 video_title = video_title.decode('utf-8')
2808 video_title = sanitize_title(video_title)
2809
2810 # simplified title
2811 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2812 simple_title = simple_title.strip(ur'_')
2813
2814 # thumbnail image
2815 if 'thumbnail' not in video_info:
2816 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2817 video_thumbnail = ''
2818 else:
2819 video_thumbnail = video_info['thumbnail']
2820
2821 # upload date
2822 upload_date = u'NA'
2823 if 'upload_date' in video_info:
2824 upload_time = video_info['upload_date']
2825 timetuple = email.utils.parsedate_tz(upload_time)
2826 if timetuple is not None:
2827 try:
2828 upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2829 except:
2830 pass
2831
2832 # description
8b95c387 2833 video_description = video_info.get('description', 'No description available.')
9f5f9602
GI
2834
2835 url_map = video_info['video_urls']
2836 if len(url_map.keys()) > 0:
2837 # Decide which formats to download
2838 req_format = self._downloader.params.get('format', None)
2839 format_limit = self._downloader.params.get('format_limit', None)
2840
2841 if format_limit is not None and format_limit in self._available_formats:
2842 format_list = self._available_formats[self._available_formats.index(format_limit):]
2843 else:
2844 format_list = self._available_formats
2845 existing_formats = [x for x in format_list if x in url_map]
2846 if len(existing_formats) == 0:
2847 self._downloader.trouble(u'ERROR: no known formats available for video')
2848 return
2849 if req_format is None:
2850 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
c52b01f3
K
2851 elif req_format == 'worst':
2852 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
9f5f9602
GI
2853 elif req_format == '-1':
2854 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2855 else:
2856 # Specific format
2857 if req_format not in url_map:
2858 self._downloader.trouble(u'ERROR: requested format not available')
2859 return
2860 video_url_list = [(req_format, url_map[req_format])] # Specific format
2861
2862 for format_param, video_real_url in video_url_list:
2863
2864 # At this point we have a new video
2865 self._downloader.increment_downloads()
2866
2867 # Extension
2868 video_extension = self._video_extensions.get(format_param, 'mp4')
2869
9f5f9602
GI
2870 try:
2871 # Process video information
2872 self._downloader.process_info({
2873 'id': video_id.decode('utf-8'),
2874 'url': video_real_url.decode('utf-8'),
2875 'uploader': video_uploader.decode('utf-8'),
2876 'upload_date': upload_date,
2877 'title': video_title,
2878 'stitle': simple_title,
2879 'ext': video_extension.decode('utf-8'),
2880 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2881 'thumbnail': video_thumbnail.decode('utf-8'),
2882 'description': video_description.decode('utf-8'),
2883 'player_url': None,
2884 })
2885 except UnavailableVideoError, err:
2886 self._downloader.trouble(u'\nERROR: unable to download video')
2887
7745f5d8
PH
2888class BlipTVIE(InfoExtractor):
2889 """Information extractor for blip.tv"""
2890
1cab2c6d 2891 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
7745f5d8 2892 _URL_EXT = r'^.*\.([a-z0-9]+)$'
f3098c4d 2893 IE_NAME = u'blip.tv'
7745f5d8 2894
7745f5d8
PH
2895 def report_extraction(self, file_id):
2896 """Report information extraction."""
aded78d9 2897 self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
7745f5d8
PH
2898
2899 def _simplify_title(self, title):
2900 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
2901 res = res.strip(ur'_')
2902 return res
2903
2904 def _real_extract(self, url):
2905 mobj = re.match(self._VALID_URL, url)
2906 if mobj is None:
2907 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2908 return
2909
1293ce58
PH
2910 if '?' in url:
2911 cchar = '&'
2912 else:
2913 cchar = '?'
2914 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
7745f5d8 2915 request = urllib2.Request(json_url)
aded78d9 2916 self.report_extraction(mobj.group(1))
7745f5d8
PH
2917 try:
2918 json_code = urllib2.urlopen(request).read()
2919 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2920 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
2921 return
2922 try:
2923 json_data = json.loads(json_code)
1293ce58
PH
2924 if 'Post' in json_data:
2925 data = json_data['Post']
2926 else:
2927 data = json_data
7745f5d8
PH
2928
2929 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
2930 video_url = data['media']['url']
2931 umobj = re.match(self._URL_EXT, video_url)
2932 if umobj is None:
2933 raise ValueError('Can not determine filename extension')
2934 ext = umobj.group(1)
2935
a1cab7ce
PH
2936 self._downloader.increment_downloads()
2937
7745f5d8
PH
2938 info = {
2939 'id': data['item_id'],
2940 'url': video_url,
2941 'uploader': data['display_name'],
2942 'upload_date': upload_date,
2943 'title': data['title'],
2944 'stitle': self._simplify_title(data['title']),
2945 'ext': ext,
2946 'format': data['media']['mimeType'],
2947 'thumbnail': data['thumbnailUrl'],
2948 'description': data['description'],
2949 'player_url': data['embedUrl']
2950 }
2951 except (ValueError,KeyError), err:
aded78d9 2952 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
7745f5d8
PH
2953 return
2954
2955 try:
2956 self._downloader.process_info(info)
2957 except UnavailableVideoError, err:
2958 self._downloader.trouble(u'\nERROR: unable to download video')
2959
2960
9b0a8bc1
PH
2961class MyVideoIE(InfoExtractor):
2962 """Information Extractor for myvideo.de."""
2963
2964 _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
f3098c4d 2965 IE_NAME = u'myvideo'
9b0a8bc1
PH
2966
2967 def __init__(self, downloader=None):
2968 InfoExtractor.__init__(self, downloader)
2969
9b0a8bc1
PH
2970 def report_download_webpage(self, video_id):
2971 """Report webpage download."""
2972 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
2973
2974 def report_extraction(self, video_id):
2975 """Report information extraction."""
2976 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
2977
2978 def _real_initialize(self):
2979 return
2980
2981 def _real_extract(self,url):
2982 mobj = re.match(self._VALID_URL, url)
2983 if mobj is None:
2984 self._download.trouble(u'ERROR: invalid URL: %s' % url)
2985 return
2986
2987 video_id = mobj.group(1)
2988 simple_title = mobj.group(2).decode('utf-8')
2989 # should actually not be necessary
2990 simple_title = sanitize_title(simple_title)
2991 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
2992
2993 # Get video webpage
2994 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
2995 try:
2996 self.report_download_webpage(video_id)
2997 webpage = urllib2.urlopen(request).read()
2998 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2999 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
3000 return
3001
3002 self.report_extraction(video_id)
3003 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3004 webpage)
3005 if mobj is None:
3006 self._downloader.trouble(u'ERROR: unable to extract media URL')
3007 return
3008 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3009
3010 mobj = re.search('<title>([^<]+)</title>', webpage)
3011 if mobj is None:
3012 self._downloader.trouble(u'ERROR: unable to extract title')
3013 return
3014
3015 video_title = mobj.group(1)
3016 video_title = sanitize_title(video_title)
3017
3018 try:
3019 print(video_url)
3020 self._downloader.process_info({
3021 'id': video_id,
3022 'url': video_url,
3023 'uploader': u'NA',
3024 'upload_date': u'NA',
3025 'title': video_title,
3026 'stitle': simple_title,
3027 'ext': u'flv',
3028 'format': u'NA',
3029 'player_url': None,
3030 })
3031 except UnavailableVideoError:
3032 self._downloader.trouble(u'\nERROR: Unable to download video')
3033
c8e30044 3034class ComedyCentralIE(InfoExtractor):
f166bccc 3035 """Information extractor for The Daily Show and Colbert Report """
c8e30044 3036
f3098c4d
PH
3037 _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3038 IE_NAME = u'comedycentral'
c8e30044 3039
c8e30044
PH
3040 def report_extraction(self, episode_id):
3041 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3042
3043 def report_config_download(self, episode_id):
3044 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3045
b487ef08
PH
3046 def report_index_download(self, episode_id):
3047 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
3048
fedf9f39
PH
3049 def report_player_url(self, episode_id):
3050 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3051
c8e30044
PH
3052 def _simplify_title(self, title):
3053 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3054 res = res.strip(ur'_')
3055 return res
3056
3057 def _real_extract(self, url):
3058 mobj = re.match(self._VALID_URL, url)
3059 if mobj is None:
3060 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3061 return
f166bccc
PH
3062
3063 if mobj.group('shortname'):
3064 if mobj.group('shortname') in ('tds', 'thedailyshow'):
3065 url = 'http://www.thedailyshow.com/full-episodes/'
3066 else:
3067 url = 'http://www.colbertnation.com/full-episodes/'
3068 mobj = re.match(self._VALID_URL, url)
3069 assert mobj is not None
3070
3071 dlNewest = not mobj.group('episode')
3072 if dlNewest:
3073 epTitle = mobj.group('showname')
3074 else:
3075 epTitle = mobj.group('episode')
c8e30044
PH
3076
3077 req = urllib2.Request(url)
3078 self.report_extraction(epTitle)
3079 try:
f166bccc
PH
3080 htmlHandle = urllib2.urlopen(req)
3081 html = htmlHandle.read()
c8e30044
PH
3082 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3083 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3084 return
f166bccc
PH
3085 if dlNewest:
3086 url = htmlHandle.geturl()
3087 mobj = re.match(self._VALID_URL, url)
3088 if mobj is None:
3089 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3090 return
3091 if mobj.group('episode') == '':
3092 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3093 return
3094 epTitle = mobj.group('episode')
c8e30044 3095
b487ef08 3096 mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html)
c8e30044
PH
3097 if len(mMovieParams) == 0:
3098 self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3099 return
b487ef08
PH
3100
3101 playerUrl_raw = mMovieParams[0][0]
fedf9f39
PH
3102 self.report_player_url(epTitle)
3103 try:
b487ef08
PH
3104 urlHandle = urllib2.urlopen(playerUrl_raw)
3105 playerUrl = urlHandle.geturl()
3106 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3107 self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
3108 return
3109
3110 uri = mMovieParams[0][1]
3111 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
3112 self.report_index_download(epTitle)
3113 try:
3114 indexXml = urllib2.urlopen(indexUrl).read()
fedf9f39 3115 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
b487ef08 3116 self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
fedf9f39 3117 return
fedf9f39 3118
b487ef08
PH
3119 idoc = xml.etree.ElementTree.fromstring(indexXml)
3120 itemEls = idoc.findall('.//item')
3121 for itemEl in itemEls:
3122 mediaId = itemEl.findall('./guid')[0].text
3123 shortMediaId = mediaId.split(':')[-1]
3124 showId = mediaId.split(':')[-2].replace('.com', '')
3125 officialTitle = itemEl.findall('./title')[0].text
3126 officialDate = itemEl.findall('./pubDate')[0].text
3127
c8e30044
PH
3128 configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3129 urllib.urlencode({'uri': mediaId}))
3130 configReq = urllib2.Request(configUrl)
3131 self.report_config_download(epTitle)
3132 try:
3133 configXml = urllib2.urlopen(configReq).read()
3134 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3135 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3136 return
46c8c432 3137
c8e30044
PH
3138 cdoc = xml.etree.ElementTree.fromstring(configXml)
3139 turls = []
3140 for rendition in cdoc.findall('.//rendition'):
3141 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3142 turls.append(finfo)
3143
a88bc6bb 3144 if len(turls) == 0:
b487ef08 3145 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
a88bc6bb
PH
3146 continue
3147
c8e30044
PH
3148 # For now, just pick the highest bitrate
3149 format,video_url = turls[-1]
3150
3151 self._downloader.increment_downloads()
a88bc6bb 3152
b487ef08 3153 effTitle = showId + '-' + epTitle
c8e30044 3154 info = {
b487ef08 3155 'id': shortMediaId,
c8e30044 3156 'url': video_url,
b487ef08
PH
3157 'uploader': showId,
3158 'upload_date': officialDate,
a88bc6bb
PH
3159 'title': effTitle,
3160 'stitle': self._simplify_title(effTitle),
c8e30044
PH
3161 'ext': 'mp4',
3162 'format': format,
3163 'thumbnail': None,
b487ef08
PH
3164 'description': officialTitle,
3165 'player_url': playerUrl
c8e30044 3166 }
46c8c432 3167
c8e30044
PH
3168 try:
3169 self._downloader.process_info(info)
3170 except UnavailableVideoError, err:
b487ef08 3171 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
a88bc6bb 3172 continue
c8e30044
PH
3173
3174
f9c68787
PH
3175class EscapistIE(InfoExtractor):
3176 """Information extractor for The Escapist """
3177
3178 _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?].*$'
f3098c4d 3179 IE_NAME = u'escapist'
f9c68787 3180
f9c68787
PH
3181 def report_extraction(self, showName):
3182 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
3183
3184 def report_config_download(self, showName):
3185 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
3186
3187 def _simplify_title(self, title):
3188 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
3189 res = res.strip(ur'_')
3190 return res
3191
3192 def _real_extract(self, url):
3193 htmlParser = HTMLParser.HTMLParser()
3194
3195 mobj = re.match(self._VALID_URL, url)
3196 if mobj is None:
3197 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3198 return
3199 showName = mobj.group('showname')
3200 videoId = mobj.group('episode')
3201
3202 self.report_extraction(showName)
3203 try:
3204 webPage = urllib2.urlopen(url).read()
3205 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3206 self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
3207 return
3208
3209 descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
3210 description = htmlParser.unescape(descMatch.group(1))
3211 imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
3212 imgUrl = htmlParser.unescape(imgMatch.group(1))
3213 playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
3214 playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
3215 configUrlMatch = re.search('config=(.*)$', playerUrl)
3216 configUrl = urllib2.unquote(configUrlMatch.group(1))
3217
3218 self.report_config_download(showName)
3219 try:
3220 configJSON = urllib2.urlopen(configUrl).read()
3221 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3222 self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
3223 return
3224
3225 # Technically, it's JavaScript, not JSON
3226 configJSON = configJSON.replace("'", '"')
3227
3228 try:
3229 config = json.loads(configJSON)
3230 except (ValueError,), err:
3231 self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
3232 return
3233
3234 playlist = config['playlist']
3235 videoUrl = playlist[1]['url']
3236
3237 self._downloader.increment_downloads()
3238 info = {
3239 'id': videoId,
3240 'url': videoUrl,
3241 'uploader': showName,
3242 'upload_date': None,
3243 'title': showName,
3244 'stitle': self._simplify_title(showName),
3245 'ext': 'flv',
3246 'format': 'flv',
3247 'thumbnail': imgUrl,
3248 'description': description,
3249 'player_url': playerUrl,
3250 }
3251
3252 try:
3253 self._downloader.process_info(info)
3254 except UnavailableVideoError, err:
3255 self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
3256
3257
3258
65cd34c5
RG
3259class PostProcessor(object):
3260 """Post Processor class.
3261
3262 PostProcessor objects can be added to downloaders with their
3263 add_post_processor() method. When the downloader has finished a
3264 successful download, it will take its internal chain of PostProcessors
3265 and start calling the run() method on each one of them, first with
3266 an initial argument and then with the returned value of the previous
3267 PostProcessor.
3268
3269 The chain will be stopped if one of them ever returns None or the end
3270 of the chain is reached.
3271
3272 PostProcessor objects follow a "mutual registration" process similar
3273 to InfoExtractor objects.
3274 """
3275
3276 _downloader = None
3277
3278 def __init__(self, downloader=None):
3279 self._downloader = downloader
3280
65cd34c5
RG
3281 def set_downloader(self, downloader):
3282 """Sets the downloader for this PP."""
3283 self._downloader = downloader
d3975459 3284
65cd34c5
RG
3285 def run(self, information):
3286 """Run the PostProcessor.
3287
3288 The "information" argument is a dictionary like the ones
2f11508a 3289 composed by InfoExtractors. The only difference is that this
65cd34c5
RG
3290 one has an extra field called "filepath" that points to the
3291 downloaded file.
3292
3293 When this method returns None, the postprocessing chain is
3294 stopped. However, this method may return an information
3295 dictionary that will be passed to the next postprocessing
3296 object in the chain. It can be the one it received after
3297 changing some fields.
3298
3299 In addition, this method may raise a PostProcessingError
3300 exception that will be taken into account by the downloader
3301 it was called from.
3302 """
3303 return information # by default, do nothing
d3975459 3304
c0a10ca8 3305
3072fab1
RG
3306class FFmpegExtractAudioPP(PostProcessor):
3307
c99dcbd2 3308 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
3072fab1
RG
3309 PostProcessor.__init__(self, downloader)
3310 if preferredcodec is None:
3311 preferredcodec = 'best'
3312 self._preferredcodec = preferredcodec
18b7f874 3313 self._preferredquality = preferredquality
3314 self._keepvideo = keepvideo
3072fab1
RG
3315
3316 @staticmethod
3317 def get_audio_codec(path):
da273188 3318 try:
2727dbf7
RG
3319 cmd = ['ffprobe', '-show_streams', '--', path]
3320 handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
da273188
RG
3321 output = handle.communicate()[0]
3322 if handle.wait() != 0:
3323 return None
3324 except (IOError, OSError):
3072fab1
RG
3325 return None
3326 audio_codec = None
3327 for line in output.split('\n'):
3328 if line.startswith('codec_name='):
3329 audio_codec = line.split('=')[1].strip()
3330 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
3331 return audio_codec
3332 return None
3333
3334 @staticmethod
3335 def run_ffmpeg(path, out_path, codec, more_opts):
3336 try:
2727dbf7
RG
3337 cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
3338 ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
3072fab1
RG
3339 return (ret == 0)
3340 except (IOError, OSError):
3341 return False
3342
3343 def run(self, information):
3344 path = information['filepath']
3345
3346 filecodec = self.get_audio_codec(path)
3347 if filecodec is None:
da273188 3348 self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
3072fab1
RG
3349 return None
3350
3351 more_opts = []
3352 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
3353 if filecodec == 'aac' or filecodec == 'mp3':
3354 # Lossless if possible
3355 acodec = 'copy'
3356 extension = filecodec
3357 if filecodec == 'aac':
3358 more_opts = ['-f', 'adts']
3359 else:
3360 # MP3 otherwise.
3361 acodec = 'libmp3lame'
3362 extension = 'mp3'
c99dcbd2
PH
3363 more_opts = []
3364 if self._preferredquality is not None:
3365 more_opts += ['-ab', self._preferredquality]
3072fab1
RG
3366 else:
3367 # We convert the audio (lossy)
3368 acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
3369 extension = self._preferredcodec
c99dcbd2
PH
3370 more_opts = []
3371 if self._preferredquality is not None:
3372 more_opts += ['-ab', self._preferredquality]
3072fab1
RG
3373 if self._preferredcodec == 'aac':
3374 more_opts += ['-f', 'adts']
3375
3376 (prefix, ext) = os.path.splitext(path)
3377 new_path = prefix + '.' + extension
3378 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
3379 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
3380
3381 if not status:
1bd92582 3382 self._downloader.to_stderr(u'WARNING: error running ffmpeg')
3072fab1
RG
3383 return None
3384
36597dc4
K
3385 # Try to update the date time for extracted audio file.
3386 if information.get('filetime') is not None:
3387 try:
3388 os.utime(new_path, (time.time(), information['filetime']))
3389 except:
3390 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
3391
18b7f874 3392 if not self._keepvideo:
3393 try:
3394 os.remove(path)
3395 except (IOError, OSError):
3396 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
3397 return None
3072fab1
RG
3398
3399 information['filepath'] = new_path
3400 return information
3401
5fb3df4a
GV
3402
3403def updateSelf(downloader, filename):
3404 ''' Update the program file with the latest version from the repository '''
3405 # Note: downloader only used for options
3406 if not os.access(filename, os.W_OK):
3407 sys.exit('ERROR: no write permissions on %s' % filename)
3408
d207e7cf 3409 downloader.to_screen('Updating to latest version...')
5fb3df4a 3410
4fa74b52 3411 try:
d207e7cf
PH
3412 try:
3413 urlh = urllib.urlopen(UPDATE_URL)
3414 newcontent = urlh.read()
27365956
PH
3415
3416 vmatch = re.search("__version__ = '([^']+)'", newcontent)
3417 if vmatch is not None and vmatch.group(1) == __version__:
3418 downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')')
3419 return
d207e7cf
PH
3420 finally:
3421 urlh.close()
5fb3df4a
GV
3422 except (IOError, OSError), err:
3423 sys.exit('ERROR: unable to download latest version')
f9f1e798 3424
5fb3df4a 3425 try:
d207e7cf
PH
3426 outf = open(filename, 'wb')
3427 try:
3428 outf.write(newcontent)
3429 finally:
3430 outf.close()
5fb3df4a
GV
3431 except (IOError, OSError), err:
3432 sys.exit('ERROR: unable to overwrite current version')
4bec29ef 3433
eb6c37da 3434 downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
80066952 3435
4f9f96f6
GV
3436def parseOpts():
3437 # Deferred imports
3438 import getpass
3439 import optparse
e7cf18cb 3440
4f9f96f6
GV
3441 def _format_option_string(option):
3442 ''' ('-o', '--option') -> -o, --format METAVAR'''
80066952 3443
4f9f96f6
GV
3444 opts = []
3445
3446 if option._short_opts: opts.append(option._short_opts[0])
3447 if option._long_opts: opts.append(option._long_opts[0])
3448 if len(opts) > 1: opts.insert(1, ', ')
3449
3450 if option.takes_value(): opts.append(' %s' % option.metavar)
3451
3452 return "".join(opts)
3453
6a4f0a11
GV
3454 def _find_term_columns():
3455 columns = os.environ.get('COLUMNS', None)
2c8d32de
PH
3456 if columns:
3457 return int(columns)
3458
4f2a5e06
PH
3459 try:
3460 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3461 out,err = sp.communicate()
eb0387a8 3462 return int(out.split()[1])
4f2a5e06
PH
3463 except:
3464 pass
2c8d32de 3465 return None
6a4f0a11 3466
51c8e53f
GV
3467 max_width = 80
3468 max_help_position = 80
3469
3470 # No need to wrap help messages if we're on a wide console
6a4f0a11 3471 columns = _find_term_columns()
51c8e53f
GV
3472 if columns: max_width = columns
3473
3474 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
4f9f96f6
GV
3475 fmt.format_option_strings = _format_option_string
3476
3477 kw = {
3478 'version' : __version__,
3479 'formatter' : fmt,
a2f7e3a5 3480 'usage' : '%prog [options] url [url...]',
4f9f96f6
GV
3481 'conflict_handler' : 'resolve',
3482 }
3483
3484 parser = optparse.OptionParser(**kw)
3485
3486 # option groups
3487 general = optparse.OptionGroup(parser, 'General Options')
20e91e83 3488 selection = optparse.OptionGroup(parser, 'Video Selection')
4f9f96f6
GV
3489 authentication = optparse.OptionGroup(parser, 'Authentication Options')
3490 video_format = optparse.OptionGroup(parser, 'Video Format Options')
3491 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
3492 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
3493 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
3494
3495 general.add_option('-h', '--help',
3496 action='help', help='print this help text and exit')
3497 general.add_option('-v', '--version',
3498 action='version', help='print program version and exit')
3499 general.add_option('-U', '--update',
e0e56865 3500 action='store_true', dest='update_self', help='update this program to latest version')
4f9f96f6
GV
3501 general.add_option('-i', '--ignore-errors',
3502 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
3503 general.add_option('-r', '--rate-limit',
3504 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
3505 general.add_option('-R', '--retries',
3506 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
4f9f96f6
GV
3507 general.add_option('--dump-user-agent',
3508 action='store_true', dest='dump_user_agent',
3509 help='display the current browser identification', default=False)
f3098c4d
PH
3510 general.add_option('--list-extractors',
3511 action='store_true', dest='list_extractors',
3512 help='List all supported extractors and the URLs they would handle', default=False)
4f9f96f6 3513
20e91e83
ABP
3514 selection.add_option('--playlist-start',
3515 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
3516 selection.add_option('--playlist-end',
3517 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
3518 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
3519 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
3520
4f9f96f6
GV
3521 authentication.add_option('-u', '--username',
3522 dest='username', metavar='USERNAME', help='account username')
3523 authentication.add_option('-p', '--password',
3524 dest='password', metavar='PASSWORD', help='account password')
3525 authentication.add_option('-n', '--netrc',
3526 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
3527
3528
3529 video_format.add_option('-f', '--format',
3530 action='store', dest='format', metavar='FORMAT', help='video format code')
3531 video_format.add_option('--all-formats',
5260e68f 3532 action='store_const', dest='format', help='download all available video formats', const='all')
4f9f96f6
GV
3533 video_format.add_option('--max-quality',
3534 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
3535
3536
3537 verbosity.add_option('-q', '--quiet',
3538 action='store_true', dest='quiet', help='activates quiet mode', default=False)
3539 verbosity.add_option('-s', '--simulate',
9b4556c4
PH
3540 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
3541 verbosity.add_option('--skip-download',
3542 action='store_true', dest='skip_download', help='do not download the video', default=False)
4f9f96f6
GV
3543 verbosity.add_option('-g', '--get-url',
3544 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
3545 verbosity.add_option('-e', '--get-title',
3546 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
3547 verbosity.add_option('--get-thumbnail',
3548 action='store_true', dest='getthumbnail',
3549 help='simulate, quiet but print thumbnail URL', default=False)
3550 verbosity.add_option('--get-description',
3551 action='store_true', dest='getdescription',
3552 help='simulate, quiet but print video description', default=False)
3553 verbosity.add_option('--get-filename',
3554 action='store_true', dest='getfilename',
3555 help='simulate, quiet but print output filename', default=False)
da0db53a
DH
3556 verbosity.add_option('--get-format',
3557 action='store_true', dest='getformat',
3558 help='simulate, quiet but print output format', default=False)
4f9f96f6
GV
3559 verbosity.add_option('--no-progress',
3560 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
3561 verbosity.add_option('--console-title',
3562 action='store_true', dest='consoletitle',
3563 help='display progress in console titlebar', default=False)
3564
3565
3566 filesystem.add_option('-t', '--title',
3567 action='store_true', dest='usetitle', help='use title in file name', default=False)
3568 filesystem.add_option('-l', '--literal',
3569 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
3570 filesystem.add_option('-A', '--auto-number',
3571 action='store_true', dest='autonumber',
3572 help='number downloaded files starting from 00000', default=False)
3573 filesystem.add_option('-o', '--output',
3574 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
3575 filesystem.add_option('-a', '--batch-file',
3576 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
3577 filesystem.add_option('-w', '--no-overwrites',
3578 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
3579 filesystem.add_option('-c', '--continue',
c25303c3 3580 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
18bb3d1e
PH
3581 filesystem.add_option('--no-continue',
3582 action='store_false', dest='continue_dl',
3583 help='do not resume partially downloaded files (restart from beginning)')
4f9f96f6
GV
3584 filesystem.add_option('--cookies',
3585 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
3586 filesystem.add_option('--no-part',
3587 action='store_true', dest='nopart', help='do not use .part files', default=False)
3588 filesystem.add_option('--no-mtime',
3589 action='store_false', dest='updatetime',
3590 help='do not use the Last-modified header to set the file modification time', default=True)
2c8d32de
PH
3591 filesystem.add_option('--write-description',
3592 action='store_true', dest='writedescription',
3593 help='write video description to a .description file', default=False)
3594 filesystem.add_option('--write-info-json',
3595 action='store_true', dest='writeinfojson',
3596 help='write video metadata to a .info.json file', default=False)
4f9f96f6
GV
3597
3598
3599 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
3600 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
3601 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
3602 help='"best", "aac" or "mp3"; best by default')
c99dcbd2
PH
3603 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
3604 help='ffmpeg audio bitrate specification, 128k by default')
3605 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
3606 help='keeps the video file on disk after the post-processing; the video is erased by default')
4f9f96f6
GV
3607
3608
3609 parser.add_option_group(general)
20e91e83 3610 parser.add_option_group(selection)
4f9f96f6
GV
3611 parser.add_option_group(filesystem)
3612 parser.add_option_group(verbosity)
3613 parser.add_option_group(video_format)
3614 parser.add_option_group(authentication)
3615 parser.add_option_group(postproc)
3616
3617 opts, args = parser.parse_args()
3618
3619 return parser, opts, args
3620
f3098c4d
PH
3621def gen_extractors():
3622 """ Return a list of an instance of every supported extractor.
3623 The order does matter; the first extractor matched is the one handling the URL.
3624 """
3625 youtube_ie = YoutubeIE()
3626 google_ie = GoogleIE()
3627 yahoo_ie = YahooIE()
3628 return [
3629 youtube_ie,
3630 MetacafeIE(youtube_ie),
3631 DailymotionIE(),
3632 YoutubePlaylistIE(youtube_ie),
3633 YoutubeUserIE(youtube_ie),
3634 YoutubeSearchIE(youtube_ie),
3635 google_ie,
3636 GoogleSearchIE(google_ie),
3637 PhotobucketIE(),
3638 yahoo_ie,
3639 YahooSearchIE(yahoo_ie),
3640 DepositFilesIE(),
3641 FacebookIE(),
3642 BlipTVIE(),
3643 VimeoIE(),
3644 MyVideoIE(),
3645 ComedyCentralIE(),
3646 EscapistIE(),
3647
3648 GenericIE()
3649 ]
3650
5adcaa43
GV
3651def main():
3652 parser, opts, args = parseOpts()
4f9f96f6 3653
5adcaa43
GV
3654 # Open appropriate CookieJar
3655 if opts.cookiefile is None:
3656 jar = cookielib.CookieJar()
3657 else:
8cc44341 3658 try:
5adcaa43
GV
3659 jar = cookielib.MozillaCookieJar(opts.cookiefile)
3660 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
3661 jar.load()
3662 except (IOError, OSError), err:
3663 sys.exit(u'ERROR: unable to open cookie file')
80066952 3664
5adcaa43
GV
3665 # Dump user agent
3666 if opts.dump_user_agent:
3667 print std_headers['User-Agent']
3668 sys.exit(0)
e7cf18cb 3669
5adcaa43
GV
3670 # Batch file verification
3671 batchurls = []
3672 if opts.batchfile is not None:
8cc44341 3673 try:
5adcaa43
GV
3674 if opts.batchfile == '-':
3675 batchfd = sys.stdin
4bec29ef 3676 else:
5adcaa43
GV
3677 batchfd = open(opts.batchfile, 'r')
3678 batchurls = batchfd.readlines()
3679 batchurls = [x.strip() for x in batchurls]
3680 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
3681 except IOError:
3682 sys.exit(u'ERROR: batch file could not be read')
3683 all_urls = batchurls + args
3684
f3098c4d
PH
3685 # General configuration
3686 cookie_processor = urllib2.HTTPCookieProcessor(jar)
3687 opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
3688 urllib2.install_opener(opener)
3689 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
3690
3691 extractors = gen_extractors()
3692
3693 if opts.list_extractors:
3694 for ie in extractors:
3695 print(ie.IE_NAME)
3696 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
3697 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
3698 for mu in matchedUrls:
3699 print(u' ' + mu)
3700 sys.exit(0)
3701
5adcaa43
GV
3702 # Conflicting, missing and erroneous options
3703 if opts.usenetrc and (opts.username is not None or opts.password is not None):
3704 parser.error(u'using .netrc conflicts with giving username/password')
3705 if opts.password is not None and opts.username is None:
3706 parser.error(u'account username missing')
3707 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
3708 parser.error(u'using output template conflicts with using title, literal title or auto number')
3709 if opts.usetitle and opts.useliteral:
3710 parser.error(u'using title conflicts with using literal title')
3711 if opts.username is not None and opts.password is None:
3712 opts.password = getpass.getpass(u'Type account password and press return:')
3713 if opts.ratelimit is not None:
3714 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
3715 if numeric_limit is None:
3716 parser.error(u'invalid rate limit specified')
3717 opts.ratelimit = numeric_limit
3718 if opts.retries is not None:
8cc44341 3719 try:
5adcaa43 3720 opts.retries = long(opts.retries)
8cc44341 3721 except (TypeError, ValueError), err:
5adcaa43
GV
3722 parser.error(u'invalid retry count specified')
3723 try:
2c8d32de 3724 opts.playliststart = int(opts.playliststart)
5adcaa43 3725 if opts.playliststart <= 0:
2c8d32de 3726 raise ValueError(u'Playlist start must be positive')
5adcaa43
GV
3727 except (TypeError, ValueError), err:
3728 parser.error(u'invalid playlist start number specified')
3729 try:
2c8d32de 3730 opts.playlistend = int(opts.playlistend)
5adcaa43 3731 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2c8d32de 3732 raise ValueError(u'Playlist end must be greater than playlist start')
5adcaa43
GV
3733 except (TypeError, ValueError), err:
3734 parser.error(u'invalid playlist end number specified')
3735 if opts.extractaudio:
3736 if opts.audioformat not in ['best', 'aac', 'mp3']:
3737 parser.error(u'invalid audio format specified')
3738
5adcaa43
GV
3739 # File downloader
3740 fd = FileDownloader({
3741 'usenetrc': opts.usenetrc,
3742 'username': opts.username,
3743 'password': opts.password,
da0db53a 3744 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
5adcaa43
GV
3745 'forceurl': opts.geturl,
3746 'forcetitle': opts.gettitle,
3747 'forcethumbnail': opts.getthumbnail,
3748 'forcedescription': opts.getdescription,
3749 'forcefilename': opts.getfilename,
da0db53a 3750 'forceformat': opts.getformat,
9b4556c4 3751 'simulate': opts.simulate,
da0db53a 3752 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
5adcaa43
GV
3753 'format': opts.format,
3754 'format_limit': opts.format_limit,
3755 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3756 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3757 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3758 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3759 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3760 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3761 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3762 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3763 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3764 or u'%(id)s.%(ext)s'),
3765 'ignoreerrors': opts.ignoreerrors,
3766 'ratelimit': opts.ratelimit,
3767 'nooverwrites': opts.nooverwrites,
3768 'retries': opts.retries,
3769 'continuedl': opts.continue_dl,
3770 'noprogress': opts.noprogress,
3771 'playliststart': opts.playliststart,
3772 'playlistend': opts.playlistend,
3773 'logtostderr': opts.outtmpl == '-',
3774 'consoletitle': opts.consoletitle,
3775 'nopart': opts.nopart,
3776 'updatetime': opts.updatetime,
2c8d32de
PH
3777 'writedescription': opts.writedescription,
3778 'writeinfojson': opts.writeinfojson,
20e91e83
ABP
3779 'matchtitle': opts.matchtitle,
3780 'rejecttitle': opts.rejecttitle,
5adcaa43 3781 })
8c5dc3ad
PH
3782 for extractor in extractors:
3783 fd.add_info_extractor(extractor)
5adcaa43
GV
3784
3785 # PostProcessors
3786 if opts.extractaudio:
c99dcbd2 3787 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
5adcaa43
GV
3788
3789 # Update version
3790 if opts.update_self:
3791 updateSelf(fd, sys.argv[0])
3792
3793 # Maybe do nothing
3794 if len(all_urls) < 1:
3795 if not opts.update_self:
3796 parser.error(u'you must provide at least one URL')
3797 else:
3798 sys.exit()
3799 retcode = fd.download(all_urls)
80066952 3800
5adcaa43
GV
3801 # Dump cookie jar if requested
3802 if opts.cookiefile is not None:
3803 try:
3804 jar.save()
3805 except (IOError, OSError), err:
3806 sys.exit(u'ERROR: unable to save cookie jar')
80066952 3807
5adcaa43 3808 sys.exit(retcode)
80066952 3809
4fa74b52 3810
5adcaa43
GV
3811if __name__ == '__main__':
3812 try:
3813 main()
e5bf0f55
RG
3814 except DownloadError:
3815 sys.exit(1)
3816 except SameFileError:
76a7f364 3817 sys.exit(u'ERROR: fixed output name but more than one file to download')
4fa74b52 3818 except KeyboardInterrupt:
76a7f364 3819 sys.exit(u'\nERROR: Interrupted by user')
e9cb9c28
GV
3820
3821# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: