]> jfr.im git - yt-dlp.git/blob - youtube-dl
080490ded626ac36b3a9a4f76a83b02f294308d6
[yt-dlp.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # Author: Vasyl' Vavrychuk
7 # Author: Witold Baryluk
8 # Author: Paweł Paprota
9 # Author: Gergely Imreh
10 # License: Public domain code
11 import cookielib
12 import ctypes
13 import datetime
14 import email.utils
15 import gzip
16 import htmlentitydefs
17 import httplib
18 import locale
19 import math
20 import netrc
21 import os
22 import os.path
23 import re
24 import socket
25 import string
26 import StringIO
27 import subprocess
28 import sys
29 import time
30 import urllib
31 import urllib2
32 import zlib
33
34 # parse_qs was moved from the cgi module to the urlparse module recently.
35 try:
36 from urlparse import parse_qs
37 except ImportError:
38 from cgi import parse_qs
39
40 std_headers = {
41 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
42 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
43 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44 'Accept-Encoding': 'gzip, deflate',
45 'Accept-Language': 'en-us,en;q=0.5',
46 }
47
48 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
49
50 def preferredencoding():
51 """Get preferred encoding.
52
53 Returns the best encoding scheme for the system, based on
54 locale.getpreferredencoding() and some further tweaks.
55 """
56 def yield_preferredencoding():
57 try:
58 pref = locale.getpreferredencoding()
59 u'TEST'.encode(pref)
60 except:
61 pref = 'UTF-8'
62 while True:
63 yield pref
64 return yield_preferredencoding().next()
65
66 def htmlentity_transform(matchobj):
67 """Transforms an HTML entity to a Unicode character.
68
69 This function receives a match object and is intended to be used with
70 the re.sub() function.
71 """
72 entity = matchobj.group(1)
73
74 # Known non-numeric HTML entity
75 if entity in htmlentitydefs.name2codepoint:
76 return unichr(htmlentitydefs.name2codepoint[entity])
77
78 # Unicode character
79 mobj = re.match(ur'(?u)#(x?\d+)', entity)
80 if mobj is not None:
81 numstr = mobj.group(1)
82 if numstr.startswith(u'x'):
83 base = 16
84 numstr = u'0%s' % numstr
85 else:
86 base = 10
87 return unichr(long(numstr, base))
88
89 # Unknown entity in name, return its literal representation
90 return (u'&%s;' % entity)
91
92 def sanitize_title(utitle):
93 """Sanitizes a video title so it could be used as part of a filename."""
94 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
95 return utitle.replace(unicode(os.sep), u'%')
96
97 def sanitize_open(filename, open_mode):
98 """Try to open the given filename, and slightly tweak it if this fails.
99
100 Attempts to open the given filename. If this fails, it tries to change
101 the filename slightly, step by step, until it's either able to open it
102 or it fails and raises a final exception, like the standard open()
103 function.
104
105 It returns the tuple (stream, definitive_file_name).
106 """
107 try:
108 if filename == u'-':
109 if sys.platform == 'win32':
110 import msvcrt
111 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
112 return (sys.stdout, filename)
113 stream = open(filename, open_mode)
114 return (stream, filename)
115 except (IOError, OSError), err:
116 # In case of error, try to remove win32 forbidden chars
117 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
118
119 # An exception here should be caught in the caller
120 stream = open(filename, open_mode)
121 return (stream, filename)
122
123 def timeconvert(timestr):
124 """Convert RFC 2822 defined time string into system timestamp"""
125 timestamp = None
126 timetuple = email.utils.parsedate_tz(timestr)
127 if timetuple is not None:
128 timestamp = email.utils.mktime_tz(timetuple)
129 return timestamp
130
131 class DownloadError(Exception):
132 """Download Error exception.
133
134 This exception may be thrown by FileDownloader objects if they are not
135 configured to continue on errors. They will contain the appropriate
136 error message.
137 """
138 pass
139
140 class SameFileError(Exception):
141 """Same File exception.
142
143 This exception will be thrown by FileDownloader objects if they detect
144 multiple files would have to be downloaded to the same file on disk.
145 """
146 pass
147
148 class PostProcessingError(Exception):
149 """Post Processing exception.
150
151 This exception may be raised by PostProcessor's .run() method to
152 indicate an error in the postprocessing task.
153 """
154 pass
155
156 class UnavailableVideoError(Exception):
157 """Unavailable Format exception.
158
159 This exception will be thrown when a video is requested
160 in a format that is not available for that video.
161 """
162 pass
163
164 class ContentTooShortError(Exception):
165 """Content Too Short exception.
166
167 This exception may be raised by FileDownloader objects when a file they
168 download is too small for what the server announced first, indicating
169 the connection was probably interrupted.
170 """
171 # Both in bytes
172 downloaded = None
173 expected = None
174
175 def __init__(self, downloaded, expected):
176 self.downloaded = downloaded
177 self.expected = expected
178
179 class YoutubeDLHandler(urllib2.HTTPHandler):
180 """Handler for HTTP requests and responses.
181
182 This class, when installed with an OpenerDirector, automatically adds
183 the standard headers to every HTTP request and handles gzipped and
184 deflated responses from web servers. If compression is to be avoided in
185 a particular request, the original request in the program code only has
186 to include the HTTP header "Youtubedl-No-Compression", which will be
187 removed before making the real request.
188
189 Part of this code was copied from:
190
191 http://techknack.net/python-urllib2-handlers/
192
193 Andrew Rowls, the author of that code, agreed to release it to the
194 public domain.
195 """
196
197 @staticmethod
198 def deflate(data):
199 try:
200 return zlib.decompress(data, -zlib.MAX_WBITS)
201 except zlib.error:
202 return zlib.decompress(data)
203
204 @staticmethod
205 def addinfourl_wrapper(stream, headers, url, code):
206 if hasattr(urllib2.addinfourl, 'getcode'):
207 return urllib2.addinfourl(stream, headers, url, code)
208 ret = urllib2.addinfourl(stream, headers, url)
209 ret.code = code
210 return ret
211
212 def http_request(self, req):
213 for h in std_headers:
214 if h in req.headers:
215 del req.headers[h]
216 req.add_header(h, std_headers[h])
217 if 'Youtubedl-no-compression' in req.headers:
218 if 'Accept-encoding' in req.headers:
219 del req.headers['Accept-encoding']
220 del req.headers['Youtubedl-no-compression']
221 return req
222
223 def http_response(self, req, resp):
224 old_resp = resp
225 # gzip
226 if resp.headers.get('Content-encoding', '') == 'gzip':
227 gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
228 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
229 resp.msg = old_resp.msg
230 # deflate
231 if resp.headers.get('Content-encoding', '') == 'deflate':
232 gz = StringIO.StringIO(self.deflate(resp.read()))
233 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
234 resp.msg = old_resp.msg
235 return resp
236
237 class FileDownloader(object):
238 """File Downloader class.
239
240 File downloader objects are the ones responsible of downloading the
241 actual video file and writing it to disk if the user has requested
242 it, among some other tasks. In most cases there should be one per
243 program. As, given a video URL, the downloader doesn't know how to
244 extract all the needed information, task that InfoExtractors do, it
245 has to pass the URL to one of them.
246
247 For this, file downloader objects have a method that allows
248 InfoExtractors to be registered in a given order. When it is passed
249 a URL, the file downloader handles it to the first InfoExtractor it
250 finds that reports being able to handle it. The InfoExtractor extracts
251 all the information about the video or videos the URL refers to, and
252 asks the FileDownloader to process the video information, possibly
253 downloading the video.
254
255 File downloaders accept a lot of parameters. In order not to saturate
256 the object constructor with arguments, it receives a dictionary of
257 options instead. These options are available through the params
258 attribute for the InfoExtractors to use. The FileDownloader also
259 registers itself as the downloader in charge for the InfoExtractors
260 that are added to it, so this is a "mutual registration".
261
262 Available options:
263
264 username: Username for authentication purposes.
265 password: Password for authentication purposes.
266 usenetrc: Use netrc for authentication instead.
267 quiet: Do not print messages to stdout.
268 forceurl: Force printing final URL.
269 forcetitle: Force printing title.
270 forcethumbnail: Force printing thumbnail URL.
271 forcedescription: Force printing description.
272 forcefilename: Force printing final filename.
273 simulate: Do not download the video files.
274 format: Video format code.
275 format_limit: Highest quality format to try.
276 outtmpl: Template for output names.
277 ignoreerrors: Do not stop on download errors.
278 ratelimit: Download speed limit, in bytes/sec.
279 nooverwrites: Prevent overwriting files.
280 retries: Number of times to retry for HTTP error 5xx
281 continuedl: Try to continue downloads if possible.
282 noprogress: Do not print the progress bar.
283 playliststart: Playlist item to start at.
284 playlistend: Playlist item to end at.
285 logtostderr: Log messages to stderr instead of stdout.
286 consoletitle: Display progress in console window's titlebar.
287 nopart: Do not use temporary .part files.
288 updatetime: Use the Last-modified header to set output file timestamps.
289 """
290
291 params = None
292 _ies = []
293 _pps = []
294 _download_retcode = None
295 _num_downloads = None
296 _screen_file = None
297
298 def __init__(self, params):
299 """Create a FileDownloader object with the given options."""
300 self._ies = []
301 self._pps = []
302 self._download_retcode = 0
303 self._num_downloads = 0
304 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
305 self.params = params
306
307 @staticmethod
308 def pmkdir(filename):
309 """Create directory components in filename. Similar to Unix "mkdir -p"."""
310 components = filename.split(os.sep)
311 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
312 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
313 for dir in aggregate:
314 if not os.path.exists(dir):
315 os.mkdir(dir)
316
317 @staticmethod
318 def format_bytes(bytes):
319 if bytes is None:
320 return 'N/A'
321 if type(bytes) is str:
322 bytes = float(bytes)
323 if bytes == 0.0:
324 exponent = 0
325 else:
326 exponent = long(math.log(bytes, 1024.0))
327 suffix = 'bkMGTPEZY'[exponent]
328 converted = float(bytes) / float(1024**exponent)
329 return '%.2f%s' % (converted, suffix)
330
331 @staticmethod
332 def calc_percent(byte_counter, data_len):
333 if data_len is None:
334 return '---.-%'
335 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
336
337 @staticmethod
338 def calc_eta(start, now, total, current):
339 if total is None:
340 return '--:--'
341 dif = now - start
342 if current == 0 or dif < 0.001: # One millisecond
343 return '--:--'
344 rate = float(current) / dif
345 eta = long((float(total) - float(current)) / rate)
346 (eta_mins, eta_secs) = divmod(eta, 60)
347 if eta_mins > 99:
348 return '--:--'
349 return '%02d:%02d' % (eta_mins, eta_secs)
350
351 @staticmethod
352 def calc_speed(start, now, bytes):
353 dif = now - start
354 if bytes == 0 or dif < 0.001: # One millisecond
355 return '%10s' % '---b/s'
356 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
357
358 @staticmethod
359 def best_block_size(elapsed_time, bytes):
360 new_min = max(bytes / 2.0, 1.0)
361 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
362 if elapsed_time < 0.001:
363 return long(new_max)
364 rate = bytes / elapsed_time
365 if rate > new_max:
366 return long(new_max)
367 if rate < new_min:
368 return long(new_min)
369 return long(rate)
370
371 @staticmethod
372 def parse_bytes(bytestr):
373 """Parse a string indicating a byte quantity into a long integer."""
374 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
375 if matchobj is None:
376 return None
377 number = float(matchobj.group(1))
378 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
379 return long(round(number * multiplier))
380
381 def add_info_extractor(self, ie):
382 """Add an InfoExtractor object to the end of the list."""
383 self._ies.append(ie)
384 ie.set_downloader(self)
385
386 def add_post_processor(self, pp):
387 """Add a PostProcessor object to the end of the chain."""
388 self._pps.append(pp)
389 pp.set_downloader(self)
390
391 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
392 """Print message to stdout if not in quiet mode."""
393 try:
394 if not self.params.get('quiet', False):
395 terminator = [u'\n', u''][skip_eol]
396 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
397 self._screen_file.flush()
398 except (UnicodeEncodeError), err:
399 if not ignore_encoding_errors:
400 raise
401
402 def to_stderr(self, message):
403 """Print message to stderr."""
404 print >>sys.stderr, message.encode(preferredencoding())
405
406 def to_cons_title(self, message):
407 """Set console/terminal window title to message."""
408 if not self.params.get('consoletitle', False):
409 return
410 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
411 # c_wchar_p() might not be necessary if `message` is
412 # already of type unicode()
413 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
414 elif 'TERM' in os.environ:
415 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
416
417 def fixed_template(self):
418 """Checks if the output template is fixed."""
419 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
420
421 def trouble(self, message=None):
422 """Determine action to take when a download problem appears.
423
424 Depending on if the downloader has been configured to ignore
425 download errors or not, this method may throw an exception or
426 not when errors are found, after printing the message.
427 """
428 if message is not None:
429 self.to_stderr(message)
430 if not self.params.get('ignoreerrors', False):
431 raise DownloadError(message)
432 self._download_retcode = 1
433
434 def slow_down(self, start_time, byte_counter):
435 """Sleep if the download speed is over the rate limit."""
436 rate_limit = self.params.get('ratelimit', None)
437 if rate_limit is None or byte_counter == 0:
438 return
439 now = time.time()
440 elapsed = now - start_time
441 if elapsed <= 0.0:
442 return
443 speed = float(byte_counter) / elapsed
444 if speed > rate_limit:
445 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
446
447 def temp_name(self, filename):
448 """Returns a temporary filename for the given filename."""
449 if self.params.get('nopart', False) or filename == u'-' or \
450 (os.path.exists(filename) and not os.path.isfile(filename)):
451 return filename
452 return filename + u'.part'
453
454 def undo_temp_name(self, filename):
455 if filename.endswith(u'.part'):
456 return filename[:-len(u'.part')]
457 return filename
458
459 def try_rename(self, old_filename, new_filename):
460 try:
461 if old_filename == new_filename:
462 return
463 os.rename(old_filename, new_filename)
464 except (IOError, OSError), err:
465 self.trouble(u'ERROR: unable to rename file')
466
467 def try_utime(self, filename, last_modified_hdr):
468 """Try to set the last-modified time of the given file."""
469 if last_modified_hdr is None:
470 return
471 if not os.path.isfile(filename):
472 return
473 timestr = last_modified_hdr
474 if timestr is None:
475 return
476 filetime = timeconvert(timestr)
477 if filetime is None:
478 return
479 try:
480 os.utime(filename,(time.time(), filetime))
481 except:
482 pass
483
484 def report_destination(self, filename):
485 """Report destination filename."""
486 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
487
488 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
489 """Report download progress."""
490 if self.params.get('noprogress', False):
491 return
492 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
493 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
494 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
495 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
496
497 def report_resuming_byte(self, resume_len):
498 """Report attempt to resume at given byte."""
499 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
500
501 def report_retry(self, count, retries):
502 """Report retry in case of HTTP error 5xx"""
503 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
504
505 def report_file_already_downloaded(self, file_name):
506 """Report file has already been fully downloaded."""
507 try:
508 self.to_screen(u'[download] %s has already been downloaded' % file_name)
509 except (UnicodeEncodeError), err:
510 self.to_screen(u'[download] The file has already been downloaded')
511
512 def report_unable_to_resume(self):
513 """Report it was impossible to resume download."""
514 self.to_screen(u'[download] Unable to resume')
515
516 def report_finish(self):
517 """Report download finished."""
518 if self.params.get('noprogress', False):
519 self.to_screen(u'[download] Download completed')
520 else:
521 self.to_screen(u'')
522
523 def increment_downloads(self):
524 """Increment the ordinal that assigns a number to each file."""
525 self._num_downloads += 1
526
527 def prepare_filename(self, info_dict):
528 """Generate the output filename."""
529 try:
530 template_dict = dict(info_dict)
531 template_dict['epoch'] = unicode(long(time.time()))
532 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
533 filename = self.params['outtmpl'] % template_dict
534 return filename
535 except (ValueError, KeyError), err:
536 self.trouble(u'ERROR: invalid system charset or erroneous output template')
537 return None
538
539 def process_info(self, info_dict):
540 """Process a single dictionary returned by an InfoExtractor."""
541 filename = self.prepare_filename(info_dict)
542 # Do nothing else if in simulate mode
543 if self.params.get('simulate', False):
544 # Forced printings
545 if self.params.get('forcetitle', False):
546 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
547 if self.params.get('forceurl', False):
548 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
549 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
550 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
551 if self.params.get('forcedescription', False) and 'description' in info_dict:
552 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
553 if self.params.get('forcefilename', False) and filename is not None:
554 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
555
556 return
557
558 if filename is None:
559 return
560 if self.params.get('nooverwrites', False) and os.path.exists(filename):
561 self.to_stderr(u'WARNING: file exists and will be skipped')
562 return
563
564 try:
565 self.pmkdir(filename)
566 except (OSError, IOError), err:
567 self.trouble(u'ERROR: unable to create directories: %s' % str(err))
568 return
569
570 try:
571 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
572 except (OSError, IOError), err:
573 raise UnavailableVideoError
574 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
575 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
576 return
577 except (ContentTooShortError, ), err:
578 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
579 return
580
581 if success:
582 try:
583 self.post_process(filename, info_dict)
584 except (PostProcessingError), err:
585 self.trouble(u'ERROR: postprocessing: %s' % str(err))
586 return
587
588 def download(self, url_list):
589 """Download a given list of URLs."""
590 if len(url_list) > 1 and self.fixed_template():
591 raise SameFileError(self.params['outtmpl'])
592
593 for url in url_list:
594 suitable_found = False
595 for ie in self._ies:
596 # Go to next InfoExtractor if not suitable
597 if not ie.suitable(url):
598 continue
599
600 # Suitable InfoExtractor found
601 suitable_found = True
602
603 # Extract information from URL and process it
604 ie.extract(url)
605
606 # Suitable InfoExtractor had been found; go to next URL
607 break
608
609 if not suitable_found:
610 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
611
612 return self._download_retcode
613
614 def post_process(self, filename, ie_info):
615 """Run the postprocessing chain on the given file."""
616 info = dict(ie_info)
617 info['filepath'] = filename
618 for pp in self._pps:
619 info = pp.run(info)
620 if info is None:
621 break
622
623 def _download_with_rtmpdump(self, filename, url, player_url):
624 self.report_destination(filename)
625 tmpfilename = self.temp_name(filename)
626
627 # Check for rtmpdump first
628 try:
629 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
630 except (OSError, IOError):
631 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
632 return False
633
634 # Download using rtmpdump. rtmpdump returns exit code 2 when
635 # the connection was interrumpted and resuming appears to be
636 # possible. This is part of rtmpdump's normal usage, AFAIK.
637 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
638 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
639 while retval == 2 or retval == 1:
640 prevsize = os.path.getsize(tmpfilename)
641 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
642 time.sleep(5.0) # This seems to be needed
643 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
644 cursize = os.path.getsize(tmpfilename)
645 if prevsize == cursize and retval == 1:
646 break
647 if retval == 0:
648 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
649 self.try_rename(tmpfilename, filename)
650 return True
651 else:
652 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
653 return False
654
655 def _do_download(self, filename, url, player_url):
656 # Check file already present
657 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
658 self.report_file_already_downloaded(filename)
659 return True
660
661 # Attempt to download using rtmpdump
662 if url.startswith('rtmp'):
663 return self._download_with_rtmpdump(filename, url, player_url)
664
665 tmpfilename = self.temp_name(filename)
666 stream = None
667 open_mode = 'wb'
668
669 # Do not include the Accept-Encoding header
670 headers = {'Youtubedl-no-compression': 'True'}
671 basic_request = urllib2.Request(url, None, headers)
672 request = urllib2.Request(url, None, headers)
673
674 # Establish possible resume length
675 if os.path.isfile(tmpfilename):
676 resume_len = os.path.getsize(tmpfilename)
677 else:
678 resume_len = 0
679
680 # Request parameters in case of being able to resume
681 if self.params.get('continuedl', False) and resume_len != 0:
682 self.report_resuming_byte(resume_len)
683 request.add_header('Range','bytes=%d-' % resume_len)
684 open_mode = 'ab'
685
686 count = 0
687 retries = self.params.get('retries', 0)
688 while count <= retries:
689 # Establish connection
690 try:
691 data = urllib2.urlopen(request)
692 break
693 except (urllib2.HTTPError, ), err:
694 if (err.code < 500 or err.code >= 600) and err.code != 416:
695 # Unexpected HTTP error
696 raise
697 elif err.code == 416:
698 # Unable to resume (requested range not satisfiable)
699 try:
700 # Open the connection again without the range header
701 data = urllib2.urlopen(basic_request)
702 content_length = data.info()['Content-Length']
703 except (urllib2.HTTPError, ), err:
704 if err.code < 500 or err.code >= 600:
705 raise
706 else:
707 # Examine the reported length
708 if (content_length is not None and
709 (resume_len - 100 < long(content_length) < resume_len + 100)):
710 # The file had already been fully downloaded.
711 # Explanation to the above condition: in issue #175 it was revealed that
712 # YouTube sometimes adds or removes a few bytes from the end of the file,
713 # changing the file size slightly and causing problems for some users. So
714 # I decided to implement a suggested change and consider the file
715 # completely downloaded if the file size differs less than 100 bytes from
716 # the one in the hard drive.
717 self.report_file_already_downloaded(filename)
718 self.try_rename(tmpfilename, filename)
719 return True
720 else:
721 # The length does not match, we start the download over
722 self.report_unable_to_resume()
723 open_mode = 'wb'
724 break
725 # Retry
726 count += 1
727 if count <= retries:
728 self.report_retry(count, retries)
729
730 if count > retries:
731 self.trouble(u'ERROR: giving up after %s retries' % retries)
732 return False
733
734 data_len = data.info().get('Content-length', None)
735 if data_len is not None:
736 data_len = long(data_len) + resume_len
737 data_len_str = self.format_bytes(data_len)
738 byte_counter = 0 + resume_len
739 block_size = 1024
740 start = time.time()
741 while True:
742 # Download and write
743 before = time.time()
744 data_block = data.read(block_size)
745 after = time.time()
746 if len(data_block) == 0:
747 break
748 byte_counter += len(data_block)
749
750 # Open file just in time
751 if stream is None:
752 try:
753 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
754 filename = self.undo_temp_name(tmpfilename)
755 self.report_destination(filename)
756 except (OSError, IOError), err:
757 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
758 return False
759 try:
760 stream.write(data_block)
761 except (IOError, OSError), err:
762 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
763 return False
764 block_size = self.best_block_size(after - before, len(data_block))
765
766 # Progress message
767 percent_str = self.calc_percent(byte_counter, data_len)
768 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
769 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
770 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
771
772 # Apply rate limit
773 self.slow_down(start, byte_counter - resume_len)
774
775 stream.close()
776 self.report_finish()
777 if data_len is not None and byte_counter != data_len:
778 raise ContentTooShortError(byte_counter, long(data_len))
779 self.try_rename(tmpfilename, filename)
780
781 # Update file modification time
782 if self.params.get('updatetime', True):
783 self.try_utime(filename, data.info().get('last-modified', None))
784
785 return True
786
787 class InfoExtractor(object):
788 """Information Extractor class.
789
790 Information extractors are the classes that, given a URL, extract
791 information from the video (or videos) the URL refers to. This
792 information includes the real video URL, the video title and simplified
793 title, author and others. The information is stored in a dictionary
794 which is then passed to the FileDownloader. The FileDownloader
795 processes this information possibly downloading the video to the file
796 system, among other possible outcomes. The dictionaries must include
797 the following fields:
798
799 id: Video identifier.
800 url: Final video URL.
801 uploader: Nickname of the video uploader.
802 title: Literal title.
803 stitle: Simplified title.
804 ext: Video filename extension.
805 format: Video format.
806 player_url: SWF Player URL (may be None).
807
808 The following fields are optional. Their primary purpose is to allow
809 youtube-dl to serve as the backend for a video search function, such
810 as the one in youtube2mp3. They are only used when their respective
811 forced printing functions are called:
812
813 thumbnail: Full URL to a video thumbnail image.
814 description: One-line video description.
815
816 Subclasses of this one should re-define the _real_initialize() and
817 _real_extract() methods, as well as the suitable() static method.
818 Probably, they should also be instantiated and added to the main
819 downloader.
820 """
821
822 _ready = False
823 _downloader = None
824
825 def __init__(self, downloader=None):
826 """Constructor. Receives an optional downloader."""
827 self._ready = False
828 self.set_downloader(downloader)
829
830 @staticmethod
831 def suitable(url):
832 """Receives a URL and returns True if suitable for this IE."""
833 return False
834
835 def initialize(self):
836 """Initializes an instance (authentication, etc)."""
837 if not self._ready:
838 self._real_initialize()
839 self._ready = True
840
841 def extract(self, url):
842 """Extracts URL information and returns it in list of dicts."""
843 self.initialize()
844 return self._real_extract(url)
845
846 def set_downloader(self, downloader):
847 """Sets the downloader for this IE."""
848 self._downloader = downloader
849
850 def _real_initialize(self):
851 """Real initialization process. Redefine in subclasses."""
852 pass
853
854 def _real_extract(self, url):
855 """Real extraction process. Redefine in subclasses."""
856 pass
857
858 class YoutubeIE(InfoExtractor):
859 """Information extractor for youtube.com."""
860
861 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
862 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
863 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
864 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
865 _NETRC_MACHINE = 'youtube'
866 # Listed in order of quality
867 _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
868 _video_extensions = {
869 '13': '3gp',
870 '17': 'mp4',
871 '18': 'mp4',
872 '22': 'mp4',
873 '37': 'mp4',
874 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
875 '43': 'webm',
876 '45': 'webm',
877 }
878
879 @staticmethod
880 def suitable(url):
881 return (re.match(YoutubeIE._VALID_URL, url) is not None)
882
883 def report_lang(self):
884 """Report attempt to set language."""
885 self._downloader.to_screen(u'[youtube] Setting language')
886
887 def report_login(self):
888 """Report attempt to log in."""
889 self._downloader.to_screen(u'[youtube] Logging in')
890
891 def report_age_confirmation(self):
892 """Report attempt to confirm age."""
893 self._downloader.to_screen(u'[youtube] Confirming age')
894
895 def report_video_webpage_download(self, video_id):
896 """Report attempt to download video webpage."""
897 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
898
899 def report_video_info_webpage_download(self, video_id):
900 """Report attempt to download video info webpage."""
901 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
902
903 def report_information_extraction(self, video_id):
904 """Report attempt to extract video information."""
905 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
906
907 def report_unavailable_format(self, video_id, format):
908 """Report extracted video URL."""
909 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
910
911 def report_rtmp_download(self):
912 """Indicate the download will use the RTMP protocol."""
913 self._downloader.to_screen(u'[youtube] RTMP download detected')
914
915 def _real_initialize(self):
916 if self._downloader is None:
917 return
918
919 username = None
920 password = None
921 downloader_params = self._downloader.params
922
923 # Attempt to use provided username and password or .netrc data
924 if downloader_params.get('username', None) is not None:
925 username = downloader_params['username']
926 password = downloader_params['password']
927 elif downloader_params.get('usenetrc', False):
928 try:
929 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
930 if info is not None:
931 username = info[0]
932 password = info[2]
933 else:
934 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
935 except (IOError, netrc.NetrcParseError), err:
936 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
937 return
938
939 # Set language
940 request = urllib2.Request(self._LANG_URL)
941 try:
942 self.report_lang()
943 urllib2.urlopen(request).read()
944 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
945 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
946 return
947
948 # No authentication to be performed
949 if username is None:
950 return
951
952 # Log in
953 login_form = {
954 'current_form': 'loginForm',
955 'next': '/',
956 'action_login': 'Log In',
957 'username': username,
958 'password': password,
959 }
960 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
961 try:
962 self.report_login()
963 login_results = urllib2.urlopen(request).read()
964 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
965 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
966 return
967 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
968 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
969 return
970
971 # Confirm age
972 age_form = {
973 'next_url': '/',
974 'action_confirm': 'Confirm',
975 }
976 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
977 try:
978 self.report_age_confirmation()
979 age_results = urllib2.urlopen(request).read()
980 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
981 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
982 return
983
984 def _real_extract(self, url):
985 # Extract video id from URL
986 mobj = re.match(self._VALID_URL, url)
987 if mobj is None:
988 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
989 return
990 video_id = mobj.group(2)
991
992 # Get video webpage
993 self.report_video_webpage_download(video_id)
994 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
995 try:
996 video_webpage = urllib2.urlopen(request).read()
997 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
998 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
999 return
1000
1001 # Attempt to extract SWF player URL
1002 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1003 if mobj is not None:
1004 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1005 else:
1006 player_url = None
1007
1008 # Get video info
1009 self.report_video_info_webpage_download(video_id)
1010 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1011 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1012 % (video_id, el_type))
1013 request = urllib2.Request(video_info_url)
1014 try:
1015 video_info_webpage = urllib2.urlopen(request).read()
1016 video_info = parse_qs(video_info_webpage)
1017 if 'token' in video_info:
1018 break
1019 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1020 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1021 return
1022 if 'token' not in video_info:
1023 if 'reason' in video_info:
1024 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1025 else:
1026 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1027 return
1028
1029 # Start extracting information
1030 self.report_information_extraction(video_id)
1031
1032 # uploader
1033 if 'author' not in video_info:
1034 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1035 return
1036 video_uploader = urllib.unquote_plus(video_info['author'][0])
1037
1038 # title
1039 if 'title' not in video_info:
1040 self._downloader.trouble(u'ERROR: unable to extract video title')
1041 return
1042 video_title = urllib.unquote_plus(video_info['title'][0])
1043 video_title = video_title.decode('utf-8')
1044 video_title = sanitize_title(video_title)
1045
1046 # simplified title
1047 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1048 simple_title = simple_title.strip(ur'_')
1049
1050 # thumbnail image
1051 if 'thumbnail_url' not in video_info:
1052 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1053 video_thumbnail = ''
1054 else: # don't panic if we can't find it
1055 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1056
1057 # upload date
1058 upload_date = u'NA'
1059 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1060 if mobj is not None:
1061 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1062 format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1063 for expression in format_expressions:
1064 try:
1065 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1066 except:
1067 pass
1068
1069 # description
1070 video_description = 'No description available.'
1071 if self._downloader.params.get('forcedescription', False):
1072 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1073 if mobj is not None:
1074 video_description = mobj.group(1)
1075
1076 # token
1077 video_token = urllib.unquote_plus(video_info['token'][0])
1078
1079 # Decide which formats to download
1080 req_format = self._downloader.params.get('format', None)
1081
1082 if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]:
1083 url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
1084 format_limit = self._downloader.params.get('format_limit', None)
1085 if format_limit is not None and format_limit in self._available_formats:
1086 format_list = self._available_formats[self._available_formats.index(format_limit):]
1087 else:
1088 format_list = self._available_formats
1089 existing_formats = [x for x in format_list if x in url_map]
1090 if len(existing_formats) == 0:
1091 self._downloader.trouble(u'ERROR: no known formats available for video')
1092 return
1093 if req_format is None:
1094 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1095 elif req_format == '-1':
1096 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1097 else:
1098 # Specific format
1099 if req_format not in url_map:
1100 self._downloader.trouble(u'ERROR: requested format not available')
1101 return
1102 video_url_list = [(req_format, url_map[req_format])] # Specific format
1103
1104 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1105 self.report_rtmp_download()
1106 video_url_list = [(None, video_info['conn'][0])]
1107
1108 else:
1109 self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1110 return
1111
1112 for format_param, video_real_url in video_url_list:
1113 # At this point we have a new video
1114 self._downloader.increment_downloads()
1115
1116 # Extension
1117 video_extension = self._video_extensions.get(format_param, 'flv')
1118
1119 # Find the video URL in fmt_url_map or conn paramters
1120 try:
1121 # Process video information
1122 self._downloader.process_info({
1123 'id': video_id.decode('utf-8'),
1124 'url': video_real_url.decode('utf-8'),
1125 'uploader': video_uploader.decode('utf-8'),
1126 'upload_date': upload_date,
1127 'title': video_title,
1128 'stitle': simple_title,
1129 'ext': video_extension.decode('utf-8'),
1130 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
1131 'thumbnail': video_thumbnail.decode('utf-8'),
1132 'description': video_description.decode('utf-8'),
1133 'player_url': player_url,
1134 })
1135 except UnavailableVideoError, err:
1136 self._downloader.trouble(u'\nERROR: unable to download video')
1137
1138
1139 class MetacafeIE(InfoExtractor):
1140 """Information Extractor for metacafe.com."""
1141
1142 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1143 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1144 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1145 _youtube_ie = None
1146
1147 def __init__(self, youtube_ie, downloader=None):
1148 InfoExtractor.__init__(self, downloader)
1149 self._youtube_ie = youtube_ie
1150
1151 @staticmethod
1152 def suitable(url):
1153 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1154
1155 def report_disclaimer(self):
1156 """Report disclaimer retrieval."""
1157 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1158
1159 def report_age_confirmation(self):
1160 """Report attempt to confirm age."""
1161 self._downloader.to_screen(u'[metacafe] Confirming age')
1162
1163 def report_download_webpage(self, video_id):
1164 """Report webpage download."""
1165 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1166
1167 def report_extraction(self, video_id):
1168 """Report information extraction."""
1169 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1170
1171 def _real_initialize(self):
1172 # Retrieve disclaimer
1173 request = urllib2.Request(self._DISCLAIMER)
1174 try:
1175 self.report_disclaimer()
1176 disclaimer = urllib2.urlopen(request).read()
1177 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1178 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1179 return
1180
1181 # Confirm age
1182 disclaimer_form = {
1183 'filters': '0',
1184 'submit': "Continue - I'm over 18",
1185 }
1186 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1187 try:
1188 self.report_age_confirmation()
1189 disclaimer = urllib2.urlopen(request).read()
1190 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1191 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1192 return
1193
1194 def _real_extract(self, url):
1195 # Extract id and simplified title from URL
1196 mobj = re.match(self._VALID_URL, url)
1197 if mobj is None:
1198 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1199 return
1200
1201 video_id = mobj.group(1)
1202
1203 # Check if video comes from YouTube
1204 mobj2 = re.match(r'^yt-(.*)$', video_id)
1205 if mobj2 is not None:
1206 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1207 return
1208
1209 # At this point we have a new video
1210 self._downloader.increment_downloads()
1211
1212 simple_title = mobj.group(2).decode('utf-8')
1213
1214 # Retrieve video webpage to extract further information
1215 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1216 try:
1217 self.report_download_webpage(video_id)
1218 webpage = urllib2.urlopen(request).read()
1219 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1220 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1221 return
1222
1223 # Extract URL, uploader and title from webpage
1224 self.report_extraction(video_id)
1225 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1226 if mobj is not None:
1227 mediaURL = urllib.unquote(mobj.group(1))
1228 video_extension = mediaURL[-3:]
1229
1230 # Extract gdaKey if available
1231 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1232 if mobj is None:
1233 video_url = mediaURL
1234 else:
1235 gdaKey = mobj.group(1)
1236 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1237 else:
1238 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1239 if mobj is None:
1240 self._downloader.trouble(u'ERROR: unable to extract media URL')
1241 return
1242 vardict = parse_qs(mobj.group(1))
1243 if 'mediaData' not in vardict:
1244 self._downloader.trouble(u'ERROR: unable to extract media URL')
1245 return
1246 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1247 if mobj is None:
1248 self._downloader.trouble(u'ERROR: unable to extract media URL')
1249 return
1250 mediaURL = mobj.group(1).replace('\\/', '/')
1251 video_extension = mediaURL[-3:]
1252 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1253
1254 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1255 if mobj is None:
1256 self._downloader.trouble(u'ERROR: unable to extract title')
1257 return
1258 video_title = mobj.group(1).decode('utf-8')
1259 video_title = sanitize_title(video_title)
1260
1261 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1262 if mobj is None:
1263 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1264 return
1265 video_uploader = mobj.group(1)
1266
1267 try:
1268 # Process video information
1269 self._downloader.process_info({
1270 'id': video_id.decode('utf-8'),
1271 'url': video_url.decode('utf-8'),
1272 'uploader': video_uploader.decode('utf-8'),
1273 'upload_date': u'NA',
1274 'title': video_title,
1275 'stitle': simple_title,
1276 'ext': video_extension.decode('utf-8'),
1277 'format': u'NA',
1278 'player_url': None,
1279 })
1280 except UnavailableVideoError:
1281 self._downloader.trouble(u'\nERROR: unable to download video')
1282
1283
1284 class DailymotionIE(InfoExtractor):
1285 """Information Extractor for Dailymotion"""
1286
1287 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1288
1289 def __init__(self, downloader=None):
1290 InfoExtractor.__init__(self, downloader)
1291
1292 @staticmethod
1293 def suitable(url):
1294 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1295
1296 def report_download_webpage(self, video_id):
1297 """Report webpage download."""
1298 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1299
1300 def report_extraction(self, video_id):
1301 """Report information extraction."""
1302 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1303
1304 def _real_initialize(self):
1305 return
1306
1307 def _real_extract(self, url):
1308 # Extract id and simplified title from URL
1309 mobj = re.match(self._VALID_URL, url)
1310 if mobj is None:
1311 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1312 return
1313
1314 # At this point we have a new video
1315 self._downloader.increment_downloads()
1316 video_id = mobj.group(1)
1317
1318 simple_title = mobj.group(2).decode('utf-8')
1319 video_extension = 'flv'
1320
1321 # Retrieve video webpage to extract further information
1322 request = urllib2.Request(url)
1323 try:
1324 self.report_download_webpage(video_id)
1325 webpage = urllib2.urlopen(request).read()
1326 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1327 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1328 return
1329
1330 # Extract URL, uploader and title from webpage
1331 self.report_extraction(video_id)
1332 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1333 if mobj is None:
1334 self._downloader.trouble(u'ERROR: unable to extract media URL')
1335 return
1336 mediaURL = urllib.unquote(mobj.group(1))
1337
1338 # if needed add http://www.dailymotion.com/ if relative URL
1339
1340 video_url = mediaURL
1341
1342 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1343 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1344 if mobj is None:
1345 self._downloader.trouble(u'ERROR: unable to extract title')
1346 return
1347 video_title = mobj.group(1).decode('utf-8')
1348 video_title = sanitize_title(video_title)
1349
1350 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1351 if mobj is None:
1352 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1353 return
1354 video_uploader = mobj.group(1)
1355
1356 try:
1357 # Process video information
1358 self._downloader.process_info({
1359 'id': video_id.decode('utf-8'),
1360 'url': video_url.decode('utf-8'),
1361 'uploader': video_uploader.decode('utf-8'),
1362 'upload_date': u'NA',
1363 'title': video_title,
1364 'stitle': simple_title,
1365 'ext': video_extension.decode('utf-8'),
1366 'format': u'NA',
1367 'player_url': None,
1368 })
1369 except UnavailableVideoError:
1370 self._downloader.trouble(u'\nERROR: unable to download video')
1371
1372 class GoogleIE(InfoExtractor):
1373 """Information extractor for video.google.com."""
1374
1375 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1376
1377 def __init__(self, downloader=None):
1378 InfoExtractor.__init__(self, downloader)
1379
1380 @staticmethod
1381 def suitable(url):
1382 return (re.match(GoogleIE._VALID_URL, url) is not None)
1383
1384 def report_download_webpage(self, video_id):
1385 """Report webpage download."""
1386 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1387
1388 def report_extraction(self, video_id):
1389 """Report information extraction."""
1390 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1391
1392 def _real_initialize(self):
1393 return
1394
1395 def _real_extract(self, url):
1396 # Extract id from URL
1397 mobj = re.match(self._VALID_URL, url)
1398 if mobj is None:
1399 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1400 return
1401
1402 # At this point we have a new video
1403 self._downloader.increment_downloads()
1404 video_id = mobj.group(1)
1405
1406 video_extension = 'mp4'
1407
1408 # Retrieve video webpage to extract further information
1409 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1410 try:
1411 self.report_download_webpage(video_id)
1412 webpage = urllib2.urlopen(request).read()
1413 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1414 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1415 return
1416
1417 # Extract URL, uploader, and title from webpage
1418 self.report_extraction(video_id)
1419 mobj = re.search(r"download_url:'([^']+)'", webpage)
1420 if mobj is None:
1421 video_extension = 'flv'
1422 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1423 if mobj is None:
1424 self._downloader.trouble(u'ERROR: unable to extract media URL')
1425 return
1426 mediaURL = urllib.unquote(mobj.group(1))
1427 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1428 mediaURL = mediaURL.replace('\\x26', '\x26')
1429
1430 video_url = mediaURL
1431
1432 mobj = re.search(r'<title>(.*)</title>', webpage)
1433 if mobj is None:
1434 self._downloader.trouble(u'ERROR: unable to extract title')
1435 return
1436 video_title = mobj.group(1).decode('utf-8')
1437 video_title = sanitize_title(video_title)
1438 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1439
1440 # Extract video description
1441 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1442 if mobj is None:
1443 self._downloader.trouble(u'ERROR: unable to extract video description')
1444 return
1445 video_description = mobj.group(1).decode('utf-8')
1446 if not video_description:
1447 video_description = 'No description available.'
1448
1449 # Extract video thumbnail
1450 if self._downloader.params.get('forcethumbnail', False):
1451 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1452 try:
1453 webpage = urllib2.urlopen(request).read()
1454 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1455 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1456 return
1457 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1458 if mobj is None:
1459 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1460 return
1461 video_thumbnail = mobj.group(1)
1462 else: # we need something to pass to process_info
1463 video_thumbnail = ''
1464
1465
1466 try:
1467 # Process video information
1468 self._downloader.process_info({
1469 'id': video_id.decode('utf-8'),
1470 'url': video_url.decode('utf-8'),
1471 'uploader': u'NA',
1472 'upload_date': u'NA',
1473 'title': video_title,
1474 'stitle': simple_title,
1475 'ext': video_extension.decode('utf-8'),
1476 'format': u'NA',
1477 'player_url': None,
1478 })
1479 except UnavailableVideoError:
1480 self._downloader.trouble(u'\nERROR: unable to download video')
1481
1482
1483 class PhotobucketIE(InfoExtractor):
1484 """Information extractor for photobucket.com."""
1485
1486 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1487
1488 def __init__(self, downloader=None):
1489 InfoExtractor.__init__(self, downloader)
1490
1491 @staticmethod
1492 def suitable(url):
1493 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1494
1495 def report_download_webpage(self, video_id):
1496 """Report webpage download."""
1497 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1498
1499 def report_extraction(self, video_id):
1500 """Report information extraction."""
1501 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1502
1503 def _real_initialize(self):
1504 return
1505
1506 def _real_extract(self, url):
1507 # Extract id from URL
1508 mobj = re.match(self._VALID_URL, url)
1509 if mobj is None:
1510 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1511 return
1512
1513 # At this point we have a new video
1514 self._downloader.increment_downloads()
1515 video_id = mobj.group(1)
1516
1517 video_extension = 'flv'
1518
1519 # Retrieve video webpage to extract further information
1520 request = urllib2.Request(url)
1521 try:
1522 self.report_download_webpage(video_id)
1523 webpage = urllib2.urlopen(request).read()
1524 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1525 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1526 return
1527
1528 # Extract URL, uploader, and title from webpage
1529 self.report_extraction(video_id)
1530 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1531 if mobj is None:
1532 self._downloader.trouble(u'ERROR: unable to extract media URL')
1533 return
1534 mediaURL = urllib.unquote(mobj.group(1))
1535
1536 video_url = mediaURL
1537
1538 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1539 if mobj is None:
1540 self._downloader.trouble(u'ERROR: unable to extract title')
1541 return
1542 video_title = mobj.group(1).decode('utf-8')
1543 video_title = sanitize_title(video_title)
1544 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1545
1546 video_uploader = mobj.group(2).decode('utf-8')
1547
1548 try:
1549 # Process video information
1550 self._downloader.process_info({
1551 'id': video_id.decode('utf-8'),
1552 'url': video_url.decode('utf-8'),
1553 'uploader': video_uploader,
1554 'upload_date': u'NA',
1555 'title': video_title,
1556 'stitle': simple_title,
1557 'ext': video_extension.decode('utf-8'),
1558 'format': u'NA',
1559 'player_url': None,
1560 })
1561 except UnavailableVideoError:
1562 self._downloader.trouble(u'\nERROR: unable to download video')
1563
1564
1565 class YahooIE(InfoExtractor):
1566 """Information extractor for video.yahoo.com."""
1567
1568 # _VALID_URL matches all Yahoo! Video URLs
1569 # _VPAGE_URL matches only the extractable '/watch/' URLs
1570 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1571 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1572
1573 def __init__(self, downloader=None):
1574 InfoExtractor.__init__(self, downloader)
1575
1576 @staticmethod
1577 def suitable(url):
1578 return (re.match(YahooIE._VALID_URL, url) is not None)
1579
1580 def report_download_webpage(self, video_id):
1581 """Report webpage download."""
1582 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1583
1584 def report_extraction(self, video_id):
1585 """Report information extraction."""
1586 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1587
1588 def _real_initialize(self):
1589 return
1590
1591 def _real_extract(self, url, new_video=True):
1592 # Extract ID from URL
1593 mobj = re.match(self._VALID_URL, url)
1594 if mobj is None:
1595 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1596 return
1597
1598 # At this point we have a new video
1599 self._downloader.increment_downloads()
1600 video_id = mobj.group(2)
1601 video_extension = 'flv'
1602
1603 # Rewrite valid but non-extractable URLs as
1604 # extractable English language /watch/ URLs
1605 if re.match(self._VPAGE_URL, url) is None:
1606 request = urllib2.Request(url)
1607 try:
1608 webpage = urllib2.urlopen(request).read()
1609 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1610 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1611 return
1612
1613 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1614 if mobj is None:
1615 self._downloader.trouble(u'ERROR: Unable to extract id field')
1616 return
1617 yahoo_id = mobj.group(1)
1618
1619 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1620 if mobj is None:
1621 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1622 return
1623 yahoo_vid = mobj.group(1)
1624
1625 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1626 return self._real_extract(url, new_video=False)
1627
1628 # Retrieve video webpage to extract further information
1629 request = urllib2.Request(url)
1630 try:
1631 self.report_download_webpage(video_id)
1632 webpage = urllib2.urlopen(request).read()
1633 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1634 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1635 return
1636
1637 # Extract uploader and title from webpage
1638 self.report_extraction(video_id)
1639 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1640 if mobj is None:
1641 self._downloader.trouble(u'ERROR: unable to extract video title')
1642 return
1643 video_title = mobj.group(1).decode('utf-8')
1644 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1645
1646 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1647 if mobj is None:
1648 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1649 return
1650 video_uploader = mobj.group(1).decode('utf-8')
1651
1652 # Extract video thumbnail
1653 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1654 if mobj is None:
1655 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1656 return
1657 video_thumbnail = mobj.group(1).decode('utf-8')
1658
1659 # Extract video description
1660 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1661 if mobj is None:
1662 self._downloader.trouble(u'ERROR: unable to extract video description')
1663 return
1664 video_description = mobj.group(1).decode('utf-8')
1665 if not video_description: video_description = 'No description available.'
1666
1667 # Extract video height and width
1668 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1669 if mobj is None:
1670 self._downloader.trouble(u'ERROR: unable to extract video height')
1671 return
1672 yv_video_height = mobj.group(1)
1673
1674 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1675 if mobj is None:
1676 self._downloader.trouble(u'ERROR: unable to extract video width')
1677 return
1678 yv_video_width = mobj.group(1)
1679
1680 # Retrieve video playlist to extract media URL
1681 # I'm not completely sure what all these options are, but we
1682 # seem to need most of them, otherwise the server sends a 401.
1683 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1684 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1685 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1686 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1687 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1688 try:
1689 self.report_download_webpage(video_id)
1690 webpage = urllib2.urlopen(request).read()
1691 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1692 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1693 return
1694
1695 # Extract media URL from playlist XML
1696 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1697 if mobj is None:
1698 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1699 return
1700 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1701 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1702
1703 try:
1704 # Process video information
1705 self._downloader.process_info({
1706 'id': video_id.decode('utf-8'),
1707 'url': video_url,
1708 'uploader': video_uploader,
1709 'upload_date': u'NA',
1710 'title': video_title,
1711 'stitle': simple_title,
1712 'ext': video_extension.decode('utf-8'),
1713 'thumbnail': video_thumbnail.decode('utf-8'),
1714 'description': video_description,
1715 'thumbnail': video_thumbnail,
1716 'description': video_description,
1717 'player_url': None,
1718 })
1719 except UnavailableVideoError:
1720 self._downloader.trouble(u'\nERROR: unable to download video')
1721
1722
1723 class VimeoIE(InfoExtractor):
1724 """Information extractor for vimeo.com."""
1725
1726 # _VALID_URL matches Vimeo URLs
1727 _VALID_URL = r'(?:http://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)'
1728
1729 def __init__(self, downloader=None):
1730 InfoExtractor.__init__(self, downloader)
1731
1732 @staticmethod
1733 def suitable(url):
1734 return (re.match(VimeoIE._VALID_URL, url) is not None)
1735
1736 def report_download_webpage(self, video_id):
1737 """Report webpage download."""
1738 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
1739
1740 def report_extraction(self, video_id):
1741 """Report information extraction."""
1742 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
1743
1744 def _real_initialize(self):
1745 return
1746
1747 def _real_extract(self, url, new_video=True):
1748 # Extract ID from URL
1749 mobj = re.match(self._VALID_URL, url)
1750 if mobj is None:
1751 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1752 return
1753
1754 # At this point we have a new video
1755 self._downloader.increment_downloads()
1756 video_id = mobj.group(1)
1757 video_extension = 'flv' # FIXME
1758
1759 # Retrieve video webpage to extract further information
1760 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
1761 try:
1762 self.report_download_webpage(video_id)
1763 webpage = urllib2.urlopen(request).read()
1764 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1765 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1766 return
1767
1768 # Now we begin extracting as much information as we can from what we
1769 # retrieved. First we extract the information common to all extractors,
1770 # and latter we extract those that are Vimeo specific.
1771 self.report_extraction(video_id)
1772
1773 # Extract title
1774 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
1775 if mobj is None:
1776 self._downloader.trouble(u'ERROR: unable to extract video title')
1777 return
1778 video_title = mobj.group(1).decode('utf-8')
1779 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1780
1781 # Extract uploader
1782 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
1783 if mobj is None:
1784 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1785 return
1786 video_uploader = mobj.group(1).decode('utf-8')
1787
1788 # Extract video thumbnail
1789 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
1790 if mobj is None:
1791 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1792 return
1793 video_thumbnail = mobj.group(1).decode('utf-8')
1794
1795 # # Extract video description
1796 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
1797 # if mobj is None:
1798 # self._downloader.trouble(u'ERROR: unable to extract video description')
1799 # return
1800 # video_description = mobj.group(1).decode('utf-8')
1801 # if not video_description: video_description = 'No description available.'
1802 video_description = 'Foo.'
1803
1804 # Vimeo specific: extract request signature
1805 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
1806 if mobj is None:
1807 self._downloader.trouble(u'ERROR: unable to extract request signature')
1808 return
1809 sig = mobj.group(1).decode('utf-8')
1810
1811 # Vimeo specific: Extract request signature expiration
1812 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
1813 if mobj is None:
1814 self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
1815 return
1816 sig_exp = mobj.group(1).decode('utf-8')
1817
1818 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
1819
1820 try:
1821 # Process video information
1822 self._downloader.process_info({
1823 'id': video_id.decode('utf-8'),
1824 'url': video_url,
1825 'uploader': video_uploader,
1826 'upload_date': u'NA',
1827 'title': video_title,
1828 'stitle': simple_title,
1829 'ext': video_extension.decode('utf-8'),
1830 'thumbnail': video_thumbnail.decode('utf-8'),
1831 'description': video_description,
1832 'thumbnail': video_thumbnail,
1833 'description': video_description,
1834 'player_url': None,
1835 })
1836 except UnavailableVideoError:
1837 self._downloader.trouble(u'ERROR: unable to download video')
1838
1839
1840 class GenericIE(InfoExtractor):
1841 """Generic last-resort information extractor."""
1842
1843 def __init__(self, downloader=None):
1844 InfoExtractor.__init__(self, downloader)
1845
1846 @staticmethod
1847 def suitable(url):
1848 return True
1849
1850 def report_download_webpage(self, video_id):
1851 """Report webpage download."""
1852 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1853 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1854
1855 def report_extraction(self, video_id):
1856 """Report information extraction."""
1857 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1858
1859 def _real_initialize(self):
1860 return
1861
1862 def _real_extract(self, url):
1863 # At this point we have a new video
1864 self._downloader.increment_downloads()
1865
1866 video_id = url.split('/')[-1]
1867 request = urllib2.Request(url)
1868 try:
1869 self.report_download_webpage(video_id)
1870 webpage = urllib2.urlopen(request).read()
1871 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1872 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1873 return
1874 except ValueError, err:
1875 # since this is the last-resort InfoExtractor, if
1876 # this error is thrown, it'll be thrown here
1877 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1878 return
1879
1880 self.report_extraction(video_id)
1881 # Start with something easy: JW Player in SWFObject
1882 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1883 if mobj is None:
1884 # Broaden the search a little bit
1885 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1886 if mobj is None:
1887 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1888 return
1889
1890 # It's possible that one of the regexes
1891 # matched, but returned an empty group:
1892 if mobj.group(1) is None:
1893 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1894 return
1895
1896 video_url = urllib.unquote(mobj.group(1))
1897 video_id = os.path.basename(video_url)
1898
1899 # here's a fun little line of code for you:
1900 video_extension = os.path.splitext(video_id)[1][1:]
1901 video_id = os.path.splitext(video_id)[0]
1902
1903 # it's tempting to parse this further, but you would
1904 # have to take into account all the variations like
1905 # Video Title - Site Name
1906 # Site Name | Video Title
1907 # Video Title - Tagline | Site Name
1908 # and so on and so forth; it's just not practical
1909 mobj = re.search(r'<title>(.*)</title>', webpage)
1910 if mobj is None:
1911 self._downloader.trouble(u'ERROR: unable to extract title')
1912 return
1913 video_title = mobj.group(1).decode('utf-8')
1914 video_title = sanitize_title(video_title)
1915 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1916
1917 # video uploader is domain name
1918 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1919 if mobj is None:
1920 self._downloader.trouble(u'ERROR: unable to extract title')
1921 return
1922 video_uploader = mobj.group(1).decode('utf-8')
1923
1924 try:
1925 # Process video information
1926 self._downloader.process_info({
1927 'id': video_id.decode('utf-8'),
1928 'url': video_url.decode('utf-8'),
1929 'uploader': video_uploader,
1930 'upload_date': u'NA',
1931 'title': video_title,
1932 'stitle': simple_title,
1933 'ext': video_extension.decode('utf-8'),
1934 'format': u'NA',
1935 'player_url': None,
1936 })
1937 except UnavailableVideoError, err:
1938 self._downloader.trouble(u'\nERROR: unable to download video')
1939
1940
1941 class YoutubeSearchIE(InfoExtractor):
1942 """Information Extractor for YouTube search queries."""
1943 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1944 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1945 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1946 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1947 _youtube_ie = None
1948 _max_youtube_results = 1000
1949
1950 def __init__(self, youtube_ie, downloader=None):
1951 InfoExtractor.__init__(self, downloader)
1952 self._youtube_ie = youtube_ie
1953
1954 @staticmethod
1955 def suitable(url):
1956 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1957
1958 def report_download_page(self, query, pagenum):
1959 """Report attempt to download playlist page with given number."""
1960 query = query.decode(preferredencoding())
1961 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1962
1963 def _real_initialize(self):
1964 self._youtube_ie.initialize()
1965
1966 def _real_extract(self, query):
1967 mobj = re.match(self._VALID_QUERY, query)
1968 if mobj is None:
1969 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1970 return
1971
1972 prefix, query = query.split(':')
1973 prefix = prefix[8:]
1974 query = query.encode('utf-8')
1975 if prefix == '':
1976 self._download_n_results(query, 1)
1977 return
1978 elif prefix == 'all':
1979 self._download_n_results(query, self._max_youtube_results)
1980 return
1981 else:
1982 try:
1983 n = long(prefix)
1984 if n <= 0:
1985 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1986 return
1987 elif n > self._max_youtube_results:
1988 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1989 n = self._max_youtube_results
1990 self._download_n_results(query, n)
1991 return
1992 except ValueError: # parsing prefix as integer fails
1993 self._download_n_results(query, 1)
1994 return
1995
1996 def _download_n_results(self, query, n):
1997 """Downloads a specified number of results for a query"""
1998
1999 video_ids = []
2000 already_seen = set()
2001 pagenum = 1
2002
2003 while True:
2004 self.report_download_page(query, pagenum)
2005 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2006 request = urllib2.Request(result_url)
2007 try:
2008 page = urllib2.urlopen(request).read()
2009 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2010 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2011 return
2012
2013 # Extract video identifiers
2014 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2015 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2016 if video_id not in already_seen:
2017 video_ids.append(video_id)
2018 already_seen.add(video_id)
2019 if len(video_ids) == n:
2020 # Specified n videos reached
2021 for id in video_ids:
2022 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2023 return
2024
2025 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2026 for id in video_ids:
2027 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2028 return
2029
2030 pagenum = pagenum + 1
2031
2032 class GoogleSearchIE(InfoExtractor):
2033 """Information Extractor for Google Video search queries."""
2034 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
2035 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2036 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2037 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2038 _google_ie = None
2039 _max_google_results = 1000
2040
2041 def __init__(self, google_ie, downloader=None):
2042 InfoExtractor.__init__(self, downloader)
2043 self._google_ie = google_ie
2044
2045 @staticmethod
2046 def suitable(url):
2047 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
2048
2049 def report_download_page(self, query, pagenum):
2050 """Report attempt to download playlist page with given number."""
2051 query = query.decode(preferredencoding())
2052 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
2053
2054 def _real_initialize(self):
2055 self._google_ie.initialize()
2056
2057 def _real_extract(self, query):
2058 mobj = re.match(self._VALID_QUERY, query)
2059 if mobj is None:
2060 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2061 return
2062
2063 prefix, query = query.split(':')
2064 prefix = prefix[8:]
2065 query = query.encode('utf-8')
2066 if prefix == '':
2067 self._download_n_results(query, 1)
2068 return
2069 elif prefix == 'all':
2070 self._download_n_results(query, self._max_google_results)
2071 return
2072 else:
2073 try:
2074 n = long(prefix)
2075 if n <= 0:
2076 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2077 return
2078 elif n > self._max_google_results:
2079 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
2080 n = self._max_google_results
2081 self._download_n_results(query, n)
2082 return
2083 except ValueError: # parsing prefix as integer fails
2084 self._download_n_results(query, 1)
2085 return
2086
2087 def _download_n_results(self, query, n):
2088 """Downloads a specified number of results for a query"""
2089
2090 video_ids = []
2091 already_seen = set()
2092 pagenum = 1
2093
2094 while True:
2095 self.report_download_page(query, pagenum)
2096 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2097 request = urllib2.Request(result_url)
2098 try:
2099 page = urllib2.urlopen(request).read()
2100 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2101 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2102 return
2103
2104 # Extract video identifiers
2105 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2106 video_id = mobj.group(1)
2107 if video_id not in already_seen:
2108 video_ids.append(video_id)
2109 already_seen.add(video_id)
2110 if len(video_ids) == n:
2111 # Specified n videos reached
2112 for id in video_ids:
2113 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2114 return
2115
2116 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2117 for id in video_ids:
2118 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2119 return
2120
2121 pagenum = pagenum + 1
2122
2123 class YahooSearchIE(InfoExtractor):
2124 """Information Extractor for Yahoo! Video search queries."""
2125 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
2126 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2127 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2128 _MORE_PAGES_INDICATOR = r'\s*Next'
2129 _yahoo_ie = None
2130 _max_yahoo_results = 1000
2131
2132 def __init__(self, yahoo_ie, downloader=None):
2133 InfoExtractor.__init__(self, downloader)
2134 self._yahoo_ie = yahoo_ie
2135
2136 @staticmethod
2137 def suitable(url):
2138 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
2139
2140 def report_download_page(self, query, pagenum):
2141 """Report attempt to download playlist page with given number."""
2142 query = query.decode(preferredencoding())
2143 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2144
2145 def _real_initialize(self):
2146 self._yahoo_ie.initialize()
2147
2148 def _real_extract(self, query):
2149 mobj = re.match(self._VALID_QUERY, query)
2150 if mobj is None:
2151 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2152 return
2153
2154 prefix, query = query.split(':')
2155 prefix = prefix[8:]
2156 query = query.encode('utf-8')
2157 if prefix == '':
2158 self._download_n_results(query, 1)
2159 return
2160 elif prefix == 'all':
2161 self._download_n_results(query, self._max_yahoo_results)
2162 return
2163 else:
2164 try:
2165 n = long(prefix)
2166 if n <= 0:
2167 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2168 return
2169 elif n > self._max_yahoo_results:
2170 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2171 n = self._max_yahoo_results
2172 self._download_n_results(query, n)
2173 return
2174 except ValueError: # parsing prefix as integer fails
2175 self._download_n_results(query, 1)
2176 return
2177
2178 def _download_n_results(self, query, n):
2179 """Downloads a specified number of results for a query"""
2180
2181 video_ids = []
2182 already_seen = set()
2183 pagenum = 1
2184
2185 while True:
2186 self.report_download_page(query, pagenum)
2187 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2188 request = urllib2.Request(result_url)
2189 try:
2190 page = urllib2.urlopen(request).read()
2191 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2192 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2193 return
2194
2195 # Extract video identifiers
2196 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2197 video_id = mobj.group(1)
2198 if video_id not in already_seen:
2199 video_ids.append(video_id)
2200 already_seen.add(video_id)
2201 if len(video_ids) == n:
2202 # Specified n videos reached
2203 for id in video_ids:
2204 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2205 return
2206
2207 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2208 for id in video_ids:
2209 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2210 return
2211
2212 pagenum = pagenum + 1
2213
2214 class YoutubePlaylistIE(InfoExtractor):
2215 """Information Extractor for YouTube playlists."""
2216
2217 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2218 _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2219 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2220 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2221 _youtube_ie = None
2222
2223 def __init__(self, youtube_ie, downloader=None):
2224 InfoExtractor.__init__(self, downloader)
2225 self._youtube_ie = youtube_ie
2226
2227 @staticmethod
2228 def suitable(url):
2229 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2230
2231 def report_download_page(self, playlist_id, pagenum):
2232 """Report attempt to download playlist page with given number."""
2233 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2234
2235 def _real_initialize(self):
2236 self._youtube_ie.initialize()
2237
2238 def _real_extract(self, url):
2239 # Extract playlist id
2240 mobj = re.match(self._VALID_URL, url)
2241 if mobj is None:
2242 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2243 return
2244
2245 # Single video case
2246 if mobj.group(3) is not None:
2247 self._youtube_ie.extract(mobj.group(3))
2248 return
2249
2250 # Download playlist pages
2251 # prefix is 'p' as default for playlists but there are other types that need extra care
2252 playlist_prefix = mobj.group(1)
2253 if playlist_prefix == 'a':
2254 playlist_access = 'artist'
2255 else:
2256 playlist_prefix = 'p'
2257 playlist_access = 'view_play_list'
2258 playlist_id = mobj.group(2)
2259 video_ids = []
2260 pagenum = 1
2261
2262 while True:
2263 self.report_download_page(playlist_id, pagenum)
2264 request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
2265 try:
2266 page = urllib2.urlopen(request).read()
2267 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2268 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2269 return
2270
2271 # Extract video identifiers
2272 ids_in_page = []
2273 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2274 if mobj.group(1) not in ids_in_page:
2275 ids_in_page.append(mobj.group(1))
2276 video_ids.extend(ids_in_page)
2277
2278 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2279 break
2280 pagenum = pagenum + 1
2281
2282 playliststart = self._downloader.params.get('playliststart', 1) - 1
2283 playlistend = self._downloader.params.get('playlistend', -1)
2284 video_ids = video_ids[playliststart:playlistend]
2285
2286 for id in video_ids:
2287 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2288 return
2289
2290 class YoutubeUserIE(InfoExtractor):
2291 """Information Extractor for YouTube users."""
2292
2293 _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2294 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2295 _GDATA_PAGE_SIZE = 50
2296 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2297 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2298 _youtube_ie = None
2299
2300 def __init__(self, youtube_ie, downloader=None):
2301 InfoExtractor.__init__(self, downloader)
2302 self._youtube_ie = youtube_ie
2303
2304 @staticmethod
2305 def suitable(url):
2306 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2307
2308 def report_download_page(self, username, start_index):
2309 """Report attempt to download user page."""
2310 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2311 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2312
2313 def _real_initialize(self):
2314 self._youtube_ie.initialize()
2315
2316 def _real_extract(self, url):
2317 # Extract username
2318 mobj = re.match(self._VALID_URL, url)
2319 if mobj is None:
2320 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2321 return
2322
2323 username = mobj.group(1)
2324
2325 # Download video ids using YouTube Data API. Result size per
2326 # query is limited (currently to 50 videos) so we need to query
2327 # page by page until there are no video ids - it means we got
2328 # all of them.
2329
2330 video_ids = []
2331 pagenum = 0
2332
2333 while True:
2334 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2335 self.report_download_page(username, start_index)
2336
2337 request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2338
2339 try:
2340 page = urllib2.urlopen(request).read()
2341 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2342 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2343 return
2344
2345 # Extract video identifiers
2346 ids_in_page = []
2347
2348 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2349 if mobj.group(1) not in ids_in_page:
2350 ids_in_page.append(mobj.group(1))
2351
2352 video_ids.extend(ids_in_page)
2353
2354 # A little optimization - if current page is not
2355 # "full", ie. does not contain PAGE_SIZE video ids then
2356 # we can assume that this page is the last one - there
2357 # are no more ids on further pages - no need to query
2358 # again.
2359
2360 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2361 break
2362
2363 pagenum += 1
2364
2365 all_ids_count = len(video_ids)
2366 playliststart = self._downloader.params.get('playliststart', 1) - 1
2367 playlistend = self._downloader.params.get('playlistend', -1)
2368
2369 if playlistend == -1:
2370 video_ids = video_ids[playliststart:]
2371 else:
2372 video_ids = video_ids[playliststart:playlistend]
2373
2374 self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2375 (username, all_ids_count, len(video_ids)))
2376
2377 for video_id in video_ids:
2378 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2379
2380
2381 class DepositFilesIE(InfoExtractor):
2382 """Information extractor for depositfiles.com"""
2383
2384 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2385
2386 def __init__(self, downloader=None):
2387 InfoExtractor.__init__(self, downloader)
2388
2389 @staticmethod
2390 def suitable(url):
2391 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2392
2393 def report_download_webpage(self, file_id):
2394 """Report webpage download."""
2395 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2396
2397 def report_extraction(self, file_id):
2398 """Report information extraction."""
2399 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2400
2401 def _real_initialize(self):
2402 return
2403
2404 def _real_extract(self, url):
2405 # At this point we have a new file
2406 self._downloader.increment_downloads()
2407
2408 file_id = url.split('/')[-1]
2409 # Rebuild url in english locale
2410 url = 'http://depositfiles.com/en/files/' + file_id
2411
2412 # Retrieve file webpage with 'Free download' button pressed
2413 free_download_indication = { 'gateway_result' : '1' }
2414 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2415 try:
2416 self.report_download_webpage(file_id)
2417 webpage = urllib2.urlopen(request).read()
2418 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2419 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2420 return
2421
2422 # Search for the real file URL
2423 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2424 if (mobj is None) or (mobj.group(1) is None):
2425 # Try to figure out reason of the error.
2426 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2427 if (mobj is not None) and (mobj.group(1) is not None):
2428 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2429 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2430 else:
2431 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2432 return
2433
2434 file_url = mobj.group(1)
2435 file_extension = os.path.splitext(file_url)[1][1:]
2436
2437 # Search for file title
2438 mobj = re.search(r'<b title="(.*?)">', webpage)
2439 if mobj is None:
2440 self._downloader.trouble(u'ERROR: unable to extract title')
2441 return
2442 file_title = mobj.group(1).decode('utf-8')
2443
2444 try:
2445 # Process file information
2446 self._downloader.process_info({
2447 'id': file_id.decode('utf-8'),
2448 'url': file_url.decode('utf-8'),
2449 'uploader': u'NA',
2450 'upload_date': u'NA',
2451 'title': file_title,
2452 'stitle': file_title,
2453 'ext': file_extension.decode('utf-8'),
2454 'format': u'NA',
2455 'player_url': None,
2456 })
2457 except UnavailableVideoError, err:
2458 self._downloader.trouble(u'ERROR: unable to download file')
2459
2460 class FacebookIE(InfoExtractor):
2461 """Information Extractor for Facebook"""
2462
2463 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2464 _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2465 _NETRC_MACHINE = 'facebook'
2466 _available_formats = ['highqual', 'lowqual']
2467 _video_extensions = {
2468 'highqual': 'mp4',
2469 'lowqual': 'mp4',
2470 }
2471
2472 def __init__(self, downloader=None):
2473 InfoExtractor.__init__(self, downloader)
2474
2475 @staticmethod
2476 def suitable(url):
2477 return (re.match(FacebookIE._VALID_URL, url) is not None)
2478
2479 def _reporter(self, message):
2480 """Add header and report message."""
2481 self._downloader.to_screen(u'[facebook] %s' % message)
2482
2483 def report_login(self):
2484 """Report attempt to log in."""
2485 self._reporter(u'Logging in')
2486
2487 def report_video_webpage_download(self, video_id):
2488 """Report attempt to download video webpage."""
2489 self._reporter(u'%s: Downloading video webpage' % video_id)
2490
2491 def report_information_extraction(self, video_id):
2492 """Report attempt to extract video information."""
2493 self._reporter(u'%s: Extracting video information' % video_id)
2494
2495 def _parse_page(self, video_webpage):
2496 """Extract video information from page"""
2497 # General data
2498 data = {'title': r'class="video_title datawrap">(.*?)</',
2499 'description': r'<div class="datawrap">(.*?)</div>',
2500 'owner': r'\("video_owner_name", "(.*?)"\)',
2501 'upload_date': r'data-date="(.*?)"',
2502 'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2503 }
2504 video_info = {}
2505 for piece in data.keys():
2506 mobj = re.search(data[piece], video_webpage)
2507 if mobj is not None:
2508 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2509
2510 # Video urls
2511 video_urls = {}
2512 for fmt in self._available_formats:
2513 mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2514 if mobj is not None:
2515 # URL is in a Javascript segment inside an escaped Unicode format within
2516 # the generally utf-8 page
2517 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2518 video_info['video_urls'] = video_urls
2519
2520 return video_info
2521
2522 def _real_initialize(self):
2523 if self._downloader is None:
2524 return
2525
2526 useremail = None
2527 password = None
2528 downloader_params = self._downloader.params
2529
2530 # Attempt to use provided username and password or .netrc data
2531 if downloader_params.get('username', None) is not None:
2532 useremail = downloader_params['username']
2533 password = downloader_params['password']
2534 elif downloader_params.get('usenetrc', False):
2535 try:
2536 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2537 if info is not None:
2538 useremail = info[0]
2539 password = info[2]
2540 else:
2541 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2542 except (IOError, netrc.NetrcParseError), err:
2543 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2544 return
2545
2546 if useremail is None:
2547 return
2548
2549 # Log in
2550 login_form = {
2551 'email': useremail,
2552 'pass': password,
2553 'login': 'Log+In'
2554 }
2555 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2556 try:
2557 self.report_login()
2558 login_results = urllib2.urlopen(request).read()
2559 if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2560 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2561 return
2562 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2563 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2564 return
2565
2566 def _real_extract(self, url):
2567 mobj = re.match(self._VALID_URL, url)
2568 if mobj is None:
2569 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2570 return
2571 video_id = mobj.group('ID')
2572
2573 # Get video webpage
2574 self.report_video_webpage_download(video_id)
2575 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2576 try:
2577 page = urllib2.urlopen(request)
2578 video_webpage = page.read()
2579 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2580 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2581 return
2582
2583 # Start extracting information
2584 self.report_information_extraction(video_id)
2585
2586 # Extract information
2587 video_info = self._parse_page(video_webpage)
2588
2589 # uploader
2590 if 'owner' not in video_info:
2591 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2592 return
2593 video_uploader = video_info['owner']
2594
2595 # title
2596 if 'title' not in video_info:
2597 self._downloader.trouble(u'ERROR: unable to extract video title')
2598 return
2599 video_title = video_info['title']
2600 video_title = video_title.decode('utf-8')
2601 video_title = sanitize_title(video_title)
2602
2603 # simplified title
2604 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
2605 simple_title = simple_title.strip(ur'_')
2606
2607 # thumbnail image
2608 if 'thumbnail' not in video_info:
2609 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2610 video_thumbnail = ''
2611 else:
2612 video_thumbnail = video_info['thumbnail']
2613
2614 # upload date
2615 upload_date = u'NA'
2616 if 'upload_date' in video_info:
2617 upload_time = video_info['upload_date']
2618 timetuple = email.utils.parsedate_tz(upload_time)
2619 if timetuple is not None:
2620 try:
2621 upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2622 except:
2623 pass
2624
2625 # description
2626 video_description = 'No description available.'
2627 if (self._downloader.params.get('forcedescription', False) and
2628 'description' in video_info):
2629 video_description = video_info['description']
2630
2631 url_map = video_info['video_urls']
2632 if len(url_map.keys()) > 0:
2633 # Decide which formats to download
2634 req_format = self._downloader.params.get('format', None)
2635 format_limit = self._downloader.params.get('format_limit', None)
2636
2637 if format_limit is not None and format_limit in self._available_formats:
2638 format_list = self._available_formats[self._available_formats.index(format_limit):]
2639 else:
2640 format_list = self._available_formats
2641 existing_formats = [x for x in format_list if x in url_map]
2642 if len(existing_formats) == 0:
2643 self._downloader.trouble(u'ERROR: no known formats available for video')
2644 return
2645 if req_format is None:
2646 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2647 elif req_format == '-1':
2648 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2649 else:
2650 # Specific format
2651 if req_format not in url_map:
2652 self._downloader.trouble(u'ERROR: requested format not available')
2653 return
2654 video_url_list = [(req_format, url_map[req_format])] # Specific format
2655
2656 for format_param, video_real_url in video_url_list:
2657
2658 # At this point we have a new video
2659 self._downloader.increment_downloads()
2660
2661 # Extension
2662 video_extension = self._video_extensions.get(format_param, 'mp4')
2663
2664 # Find the video URL in fmt_url_map or conn paramters
2665 try:
2666 # Process video information
2667 self._downloader.process_info({
2668 'id': video_id.decode('utf-8'),
2669 'url': video_real_url.decode('utf-8'),
2670 'uploader': video_uploader.decode('utf-8'),
2671 'upload_date': upload_date,
2672 'title': video_title,
2673 'stitle': simple_title,
2674 'ext': video_extension.decode('utf-8'),
2675 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
2676 'thumbnail': video_thumbnail.decode('utf-8'),
2677 'description': video_description.decode('utf-8'),
2678 'player_url': None,
2679 })
2680 except UnavailableVideoError, err:
2681 self._downloader.trouble(u'\nERROR: unable to download video')
2682
2683 class PostProcessor(object):
2684 """Post Processor class.
2685
2686 PostProcessor objects can be added to downloaders with their
2687 add_post_processor() method. When the downloader has finished a
2688 successful download, it will take its internal chain of PostProcessors
2689 and start calling the run() method on each one of them, first with
2690 an initial argument and then with the returned value of the previous
2691 PostProcessor.
2692
2693 The chain will be stopped if one of them ever returns None or the end
2694 of the chain is reached.
2695
2696 PostProcessor objects follow a "mutual registration" process similar
2697 to InfoExtractor objects.
2698 """
2699
2700 _downloader = None
2701
2702 def __init__(self, downloader=None):
2703 self._downloader = downloader
2704
2705 def set_downloader(self, downloader):
2706 """Sets the downloader for this PP."""
2707 self._downloader = downloader
2708
2709 def run(self, information):
2710 """Run the PostProcessor.
2711
2712 The "information" argument is a dictionary like the ones
2713 composed by InfoExtractors. The only difference is that this
2714 one has an extra field called "filepath" that points to the
2715 downloaded file.
2716
2717 When this method returns None, the postprocessing chain is
2718 stopped. However, this method may return an information
2719 dictionary that will be passed to the next postprocessing
2720 object in the chain. It can be the one it received after
2721 changing some fields.
2722
2723 In addition, this method may raise a PostProcessingError
2724 exception that will be taken into account by the downloader
2725 it was called from.
2726 """
2727 return information # by default, do nothing
2728
2729 class FFmpegExtractAudioPP(PostProcessor):
2730
2731 def __init__(self, downloader=None, preferredcodec=None):
2732 PostProcessor.__init__(self, downloader)
2733 if preferredcodec is None:
2734 preferredcodec = 'best'
2735 self._preferredcodec = preferredcodec
2736
2737 @staticmethod
2738 def get_audio_codec(path):
2739 try:
2740 cmd = ['ffprobe', '-show_streams', '--', path]
2741 handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
2742 output = handle.communicate()[0]
2743 if handle.wait() != 0:
2744 return None
2745 except (IOError, OSError):
2746 return None
2747 audio_codec = None
2748 for line in output.split('\n'):
2749 if line.startswith('codec_name='):
2750 audio_codec = line.split('=')[1].strip()
2751 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
2752 return audio_codec
2753 return None
2754
2755 @staticmethod
2756 def run_ffmpeg(path, out_path, codec, more_opts):
2757 try:
2758 cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
2759 ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
2760 return (ret == 0)
2761 except (IOError, OSError):
2762 return False
2763
2764 def run(self, information):
2765 path = information['filepath']
2766
2767 filecodec = self.get_audio_codec(path)
2768 if filecodec is None:
2769 self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
2770 return None
2771
2772 more_opts = []
2773 if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
2774 if filecodec == 'aac' or filecodec == 'mp3':
2775 # Lossless if possible
2776 acodec = 'copy'
2777 extension = filecodec
2778 if filecodec == 'aac':
2779 more_opts = ['-f', 'adts']
2780 else:
2781 # MP3 otherwise.
2782 acodec = 'libmp3lame'
2783 extension = 'mp3'
2784 more_opts = ['-ab', '128k']
2785 else:
2786 # We convert the audio (lossy)
2787 acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
2788 extension = self._preferredcodec
2789 more_opts = ['-ab', '128k']
2790 if self._preferredcodec == 'aac':
2791 more_opts += ['-f', 'adts']
2792
2793 (prefix, ext) = os.path.splitext(path)
2794 new_path = prefix + '.' + extension
2795 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
2796 status = self.run_ffmpeg(path, new_path, acodec, more_opts)
2797
2798 if not status:
2799 self._downloader.to_stderr(u'WARNING: error running ffmpeg')
2800 return None
2801
2802 try:
2803 os.remove(path)
2804 except (IOError, OSError):
2805 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
2806 return None
2807
2808 information['filepath'] = new_path
2809 return information
2810
2811 ### MAIN PROGRAM ###
2812 if __name__ == '__main__':
2813 try:
2814 # Modules needed only when running the main program
2815 import getpass
2816 import optparse
2817
2818 # Function to update the program file with the latest version from the repository.
2819 def update_self(downloader, filename):
2820 # Note: downloader only used for options
2821 if not os.access(filename, os.W_OK):
2822 sys.exit('ERROR: no write permissions on %s' % filename)
2823
2824 downloader.to_screen('Updating to latest stable version...')
2825 try:
2826 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2827 latest_version = urllib.urlopen(latest_url).read().strip()
2828 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2829 newcontent = urllib.urlopen(prog_url).read()
2830 except (IOError, OSError), err:
2831 sys.exit('ERROR: unable to download latest version')
2832 try:
2833 stream = open(filename, 'w')
2834 stream.write(newcontent)
2835 stream.close()
2836 except (IOError, OSError), err:
2837 sys.exit('ERROR: unable to overwrite current version')
2838 downloader.to_screen('Updated to version %s' % latest_version)
2839
2840 # Parse command line
2841 parser = optparse.OptionParser(
2842 usage='Usage: %prog [options] url...',
2843 version='2011.03.29',
2844 conflict_handler='resolve',
2845 )
2846
2847 parser.add_option('-h', '--help',
2848 action='help', help='print this help text and exit')
2849 parser.add_option('-v', '--version',
2850 action='version', help='print program version and exit')
2851 parser.add_option('-U', '--update',
2852 action='store_true', dest='update_self', help='update this program to latest stable version')
2853 parser.add_option('-i', '--ignore-errors',
2854 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2855 parser.add_option('-r', '--rate-limit',
2856 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2857 parser.add_option('-R', '--retries',
2858 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2859 parser.add_option('--playlist-start',
2860 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2861 parser.add_option('--playlist-end',
2862 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2863 parser.add_option('--dump-user-agent',
2864 action='store_true', dest='dump_user_agent',
2865 help='display the current browser identification', default=False)
2866
2867 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2868 authentication.add_option('-u', '--username',
2869 dest='username', metavar='USERNAME', help='account username')
2870 authentication.add_option('-p', '--password',
2871 dest='password', metavar='PASSWORD', help='account password')
2872 authentication.add_option('-n', '--netrc',
2873 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2874 parser.add_option_group(authentication)
2875
2876 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2877 video_format.add_option('-f', '--format',
2878 action='store', dest='format', metavar='FORMAT', help='video format code')
2879 video_format.add_option('--all-formats',
2880 action='store_const', dest='format', help='download all available video formats', const='-1')
2881 video_format.add_option('--max-quality',
2882 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2883 parser.add_option_group(video_format)
2884
2885 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2886 verbosity.add_option('-q', '--quiet',
2887 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2888 verbosity.add_option('-s', '--simulate',
2889 action='store_true', dest='simulate', help='do not download video', default=False)
2890 verbosity.add_option('-g', '--get-url',
2891 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2892 verbosity.add_option('-e', '--get-title',
2893 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2894 verbosity.add_option('--get-thumbnail',
2895 action='store_true', dest='getthumbnail',
2896 help='simulate, quiet but print thumbnail URL', default=False)
2897 verbosity.add_option('--get-description',
2898 action='store_true', dest='getdescription',
2899 help='simulate, quiet but print video description', default=False)
2900 verbosity.add_option('--get-filename',
2901 action='store_true', dest='getfilename',
2902 help='simulate, quiet but print output filename', default=False)
2903 verbosity.add_option('--no-progress',
2904 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2905 verbosity.add_option('--console-title',
2906 action='store_true', dest='consoletitle',
2907 help='display progress in console titlebar', default=False)
2908 parser.add_option_group(verbosity)
2909
2910 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2911 filesystem.add_option('-t', '--title',
2912 action='store_true', dest='usetitle', help='use title in file name', default=False)
2913 filesystem.add_option('-l', '--literal',
2914 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2915 filesystem.add_option('-A', '--auto-number',
2916 action='store_true', dest='autonumber',
2917 help='number downloaded files starting from 00000', default=False)
2918 filesystem.add_option('-o', '--output',
2919 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2920 filesystem.add_option('-a', '--batch-file',
2921 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2922 filesystem.add_option('-w', '--no-overwrites',
2923 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2924 filesystem.add_option('-c', '--continue',
2925 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2926 filesystem.add_option('--cookies',
2927 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2928 filesystem.add_option('--no-part',
2929 action='store_true', dest='nopart', help='do not use .part files', default=False)
2930 filesystem.add_option('--no-mtime',
2931 action='store_false', dest='updatetime',
2932 help='do not use the Last-modified header to set the file modification time', default=True)
2933 parser.add_option_group(filesystem)
2934
2935 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
2936 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
2937 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
2938 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
2939 help='"best", "aac" or "mp3"; best by default')
2940 parser.add_option_group(postproc)
2941
2942 (opts, args) = parser.parse_args()
2943
2944 # Open appropriate CookieJar
2945 if opts.cookiefile is None:
2946 jar = cookielib.CookieJar()
2947 else:
2948 try:
2949 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2950 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2951 jar.load()
2952 except (IOError, OSError), err:
2953 sys.exit(u'ERROR: unable to open cookie file')
2954
2955 # Dump user agent
2956 if opts.dump_user_agent:
2957 print std_headers['User-Agent']
2958 sys.exit(0)
2959
2960 # General configuration
2961 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2962 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2963 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2964
2965 # Batch file verification
2966 batchurls = []
2967 if opts.batchfile is not None:
2968 try:
2969 if opts.batchfile == '-':
2970 batchfd = sys.stdin
2971 else:
2972 batchfd = open(opts.batchfile, 'r')
2973 batchurls = batchfd.readlines()
2974 batchurls = [x.strip() for x in batchurls]
2975 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2976 except IOError:
2977 sys.exit(u'ERROR: batch file could not be read')
2978 all_urls = batchurls + args
2979
2980 # Conflicting, missing and erroneous options
2981 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2982 parser.error(u'using .netrc conflicts with giving username/password')
2983 if opts.password is not None and opts.username is None:
2984 parser.error(u'account username missing')
2985 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2986 parser.error(u'using output template conflicts with using title, literal title or auto number')
2987 if opts.usetitle and opts.useliteral:
2988 parser.error(u'using title conflicts with using literal title')
2989 if opts.username is not None and opts.password is None:
2990 opts.password = getpass.getpass(u'Type account password and press return:')
2991 if opts.ratelimit is not None:
2992 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2993 if numeric_limit is None:
2994 parser.error(u'invalid rate limit specified')
2995 opts.ratelimit = numeric_limit
2996 if opts.retries is not None:
2997 try:
2998 opts.retries = long(opts.retries)
2999 except (TypeError, ValueError), err:
3000 parser.error(u'invalid retry count specified')
3001 try:
3002 opts.playliststart = long(opts.playliststart)
3003 if opts.playliststart <= 0:
3004 raise ValueError
3005 except (TypeError, ValueError), err:
3006 parser.error(u'invalid playlist start number specified')
3007 try:
3008 opts.playlistend = long(opts.playlistend)
3009 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
3010 raise ValueError
3011 except (TypeError, ValueError), err:
3012 parser.error(u'invalid playlist end number specified')
3013 if opts.extractaudio:
3014 if opts.audioformat not in ['best', 'aac', 'mp3']:
3015 parser.error(u'invalid audio format specified')
3016
3017 # Information extractors
3018 vimeo_ie = VimeoIE()
3019 youtube_ie = YoutubeIE()
3020 metacafe_ie = MetacafeIE(youtube_ie)
3021 dailymotion_ie = DailymotionIE()
3022 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
3023 youtube_user_ie = YoutubeUserIE(youtube_ie)
3024 youtube_search_ie = YoutubeSearchIE(youtube_ie)
3025 google_ie = GoogleIE()
3026 google_search_ie = GoogleSearchIE(google_ie)
3027 photobucket_ie = PhotobucketIE()
3028 yahoo_ie = YahooIE()
3029 yahoo_search_ie = YahooSearchIE(yahoo_ie)
3030 deposit_files_ie = DepositFilesIE()
3031 facebook_ie = FacebookIE()
3032 generic_ie = GenericIE()
3033
3034 # File downloader
3035 fd = FileDownloader({
3036 'usenetrc': opts.usenetrc,
3037 'username': opts.username,
3038 'password': opts.password,
3039 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3040 'forceurl': opts.geturl,
3041 'forcetitle': opts.gettitle,
3042 'forcethumbnail': opts.getthumbnail,
3043 'forcedescription': opts.getdescription,
3044 'forcefilename': opts.getfilename,
3045 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
3046 'format': opts.format,
3047 'format_limit': opts.format_limit,
3048 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
3049 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
3050 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
3051 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
3052 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
3053 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
3054 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
3055 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
3056 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
3057 or u'%(id)s.%(ext)s'),
3058 'ignoreerrors': opts.ignoreerrors,
3059 'ratelimit': opts.ratelimit,
3060 'nooverwrites': opts.nooverwrites,
3061 'retries': opts.retries,
3062 'continuedl': opts.continue_dl,
3063 'noprogress': opts.noprogress,
3064 'playliststart': opts.playliststart,
3065 'playlistend': opts.playlistend,
3066 'logtostderr': opts.outtmpl == '-',
3067 'consoletitle': opts.consoletitle,
3068 'nopart': opts.nopart,
3069 'updatetime': opts.updatetime,
3070 })
3071 fd.add_info_extractor(vimeo_ie)
3072 fd.add_info_extractor(youtube_search_ie)
3073 fd.add_info_extractor(youtube_pl_ie)
3074 fd.add_info_extractor(youtube_user_ie)
3075 fd.add_info_extractor(metacafe_ie)
3076 fd.add_info_extractor(dailymotion_ie)
3077 fd.add_info_extractor(youtube_ie)
3078 fd.add_info_extractor(google_ie)
3079 fd.add_info_extractor(google_search_ie)
3080 fd.add_info_extractor(photobucket_ie)
3081 fd.add_info_extractor(yahoo_ie)
3082 fd.add_info_extractor(yahoo_search_ie)
3083 fd.add_info_extractor(deposit_files_ie)
3084 fd.add_info_extractor(facebook_ie)
3085
3086 # This must come last since it's the
3087 # fallback if none of the others work
3088 fd.add_info_extractor(generic_ie)
3089
3090 # PostProcessors
3091 if opts.extractaudio:
3092 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
3093
3094 # Update version
3095 if opts.update_self:
3096 update_self(fd, sys.argv[0])
3097
3098 # Maybe do nothing
3099 if len(all_urls) < 1:
3100 if not opts.update_self:
3101 parser.error(u'you must provide at least one URL')
3102 else:
3103 sys.exit()
3104 retcode = fd.download(all_urls)
3105
3106 # Dump cookie jar if requested
3107 if opts.cookiefile is not None:
3108 try:
3109 jar.save()
3110 except (IOError, OSError), err:
3111 sys.exit(u'ERROR: unable to save cookie jar')
3112
3113 sys.exit(retcode)
3114
3115 except DownloadError:
3116 sys.exit(1)
3117 except SameFileError:
3118 sys.exit(u'ERROR: fixed output name but more than one file to download')
3119 except KeyboardInterrupt:
3120 sys.exit(u'\nERROR: Interrupted by user')