]> jfr.im git - yt-dlp.git/blob - youtube-dl
Fix bug in regular expression for youtu.be links
[yt-dlp.git] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # Author: Vasyl' Vavrychuk
7 # License: Public domain code
8 import cookielib
9 import ctypes
10 import datetime
11 import htmlentitydefs
12 import httplib
13 import locale
14 import math
15 import netrc
16 import os
17 import os.path
18 import re
19 import socket
20 import string
21 import subprocess
22 import sys
23 import time
24 import urllib
25 import urllib2
26
27 # parse_qs was moved from the cgi module to the urlparse module recently.
28 try:
29 from urlparse import parse_qs
30 except ImportError:
31 from cgi import parse_qs
32
33 std_headers = {
34 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
35 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
36 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
37 'Accept-Encoding': 'gzip, deflate',
38 'Accept-Language': 'en-us,en;q=0.5',
39 }
40
41 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
42
43 def preferredencoding():
44 """Get preferred encoding.
45
46 Returns the best encoding scheme for the system, based on
47 locale.getpreferredencoding() and some further tweaks.
48 """
49 def yield_preferredencoding():
50 try:
51 pref = locale.getpreferredencoding()
52 u'TEST'.encode(pref)
53 except:
54 pref = 'UTF-8'
55 while True:
56 yield pref
57 return yield_preferredencoding().next()
58
59 def htmlentity_transform(matchobj):
60 """Transforms an HTML entity to a Unicode character.
61
62 This function receives a match object and is intended to be used with
63 the re.sub() function.
64 """
65 entity = matchobj.group(1)
66
67 # Known non-numeric HTML entity
68 if entity in htmlentitydefs.name2codepoint:
69 return unichr(htmlentitydefs.name2codepoint[entity])
70
71 # Unicode character
72 mobj = re.match(ur'(?u)#(x?\d+)', entity)
73 if mobj is not None:
74 numstr = mobj.group(1)
75 if numstr.startswith(u'x'):
76 base = 16
77 numstr = u'0%s' % numstr
78 else:
79 base = 10
80 return unichr(long(numstr, base))
81
82 # Unknown entity in name, return its literal representation
83 return (u'&%s;' % entity)
84
85 def sanitize_title(utitle):
86 """Sanitizes a video title so it could be used as part of a filename."""
87 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
88 return utitle.replace(unicode(os.sep), u'%')
89
90 def sanitize_open(filename, open_mode):
91 """Try to open the given filename, and slightly tweak it if this fails.
92
93 Attempts to open the given filename. If this fails, it tries to change
94 the filename slightly, step by step, until it's either able to open it
95 or it fails and raises a final exception, like the standard open()
96 function.
97
98 It returns the tuple (stream, definitive_file_name).
99 """
100 try:
101 if filename == u'-':
102 if sys.platform == 'win32':
103 import msvcrt
104 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
105 return (sys.stdout, filename)
106 stream = open(filename, open_mode)
107 return (stream, filename)
108 except (IOError, OSError), err:
109 # In case of error, try to remove win32 forbidden chars
110 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
111
112 # An exception here should be caught in the caller
113 stream = open(filename, open_mode)
114 return (stream, filename)
115
116 class DownloadError(Exception):
117 """Download Error exception.
118
119 This exception may be thrown by FileDownloader objects if they are not
120 configured to continue on errors. They will contain the appropriate
121 error message.
122 """
123 pass
124
125 class SameFileError(Exception):
126 """Same File exception.
127
128 This exception will be thrown by FileDownloader objects if they detect
129 multiple files would have to be downloaded to the same file on disk.
130 """
131 pass
132
133 class PostProcessingError(Exception):
134 """Post Processing exception.
135
136 This exception may be raised by PostProcessor's .run() method to
137 indicate an error in the postprocessing task.
138 """
139 pass
140
141 class UnavailableVideoError(Exception):
142 """Unavailable Format exception.
143
144 This exception will be thrown when a video is requested
145 in a format that is not available for that video.
146 """
147 pass
148
149 class ContentTooShortError(Exception):
150 """Content Too Short exception.
151
152 This exception may be raised by FileDownloader objects when a file they
153 download is too small for what the server announced first, indicating
154 the connection was probably interrupted.
155 """
156 # Both in bytes
157 downloaded = None
158 expected = None
159
160 def __init__(self, downloaded, expected):
161 self.downloaded = downloaded
162 self.expected = expected
163
164 class FileDownloader(object):
165 """File Downloader class.
166
167 File downloader objects are the ones responsible of downloading the
168 actual video file and writing it to disk if the user has requested
169 it, among some other tasks. In most cases there should be one per
170 program. As, given a video URL, the downloader doesn't know how to
171 extract all the needed information, task that InfoExtractors do, it
172 has to pass the URL to one of them.
173
174 For this, file downloader objects have a method that allows
175 InfoExtractors to be registered in a given order. When it is passed
176 a URL, the file downloader handles it to the first InfoExtractor it
177 finds that reports being able to handle it. The InfoExtractor extracts
178 all the information about the video or videos the URL refers to, and
179 asks the FileDownloader to process the video information, possibly
180 downloading the video.
181
182 File downloaders accept a lot of parameters. In order not to saturate
183 the object constructor with arguments, it receives a dictionary of
184 options instead. These options are available through the params
185 attribute for the InfoExtractors to use. The FileDownloader also
186 registers itself as the downloader in charge for the InfoExtractors
187 that are added to it, so this is a "mutual registration".
188
189 Available options:
190
191 username: Username for authentication purposes.
192 password: Password for authentication purposes.
193 usenetrc: Use netrc for authentication instead.
194 quiet: Do not print messages to stdout.
195 forceurl: Force printing final URL.
196 forcetitle: Force printing title.
197 forcethumbnail: Force printing thumbnail URL.
198 forcedescription: Force printing description.
199 simulate: Do not download the video files.
200 format: Video format code.
201 format_limit: Highest quality format to try.
202 outtmpl: Template for output names.
203 ignoreerrors: Do not stop on download errors.
204 ratelimit: Download speed limit, in bytes/sec.
205 nooverwrites: Prevent overwriting files.
206 retries: Number of times to retry for HTTP error 5xx
207 continuedl: Try to continue downloads if possible.
208 noprogress: Do not print the progress bar.
209 playliststart: Playlist item to start at.
210 playlistend: Playlist item to end at.
211 logtostderr: Log messages to stderr instead of stdout.
212 consoletitle: Display progress in console window's titlebar.
213 nopart: Do not use temporary .part files.
214 """
215
216 params = None
217 _ies = []
218 _pps = []
219 _download_retcode = None
220 _num_downloads = None
221 _screen_file = None
222
223 def __init__(self, params):
224 """Create a FileDownloader object with the given options."""
225 self._ies = []
226 self._pps = []
227 self._download_retcode = 0
228 self._num_downloads = 0
229 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
230 self.params = params
231
232 @staticmethod
233 def pmkdir(filename):
234 """Create directory components in filename. Similar to Unix "mkdir -p"."""
235 components = filename.split(os.sep)
236 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
237 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
238 for dir in aggregate:
239 if not os.path.exists(dir):
240 os.mkdir(dir)
241
242 @staticmethod
243 def format_bytes(bytes):
244 if bytes is None:
245 return 'N/A'
246 if type(bytes) is str:
247 bytes = float(bytes)
248 if bytes == 0.0:
249 exponent = 0
250 else:
251 exponent = long(math.log(bytes, 1024.0))
252 suffix = 'bkMGTPEZY'[exponent]
253 converted = float(bytes) / float(1024**exponent)
254 return '%.2f%s' % (converted, suffix)
255
256 @staticmethod
257 def calc_percent(byte_counter, data_len):
258 if data_len is None:
259 return '---.-%'
260 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
261
262 @staticmethod
263 def calc_eta(start, now, total, current):
264 if total is None:
265 return '--:--'
266 dif = now - start
267 if current == 0 or dif < 0.001: # One millisecond
268 return '--:--'
269 rate = float(current) / dif
270 eta = long((float(total) - float(current)) / rate)
271 (eta_mins, eta_secs) = divmod(eta, 60)
272 if eta_mins > 99:
273 return '--:--'
274 return '%02d:%02d' % (eta_mins, eta_secs)
275
276 @staticmethod
277 def calc_speed(start, now, bytes):
278 dif = now - start
279 if bytes == 0 or dif < 0.001: # One millisecond
280 return '%10s' % '---b/s'
281 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
282
283 @staticmethod
284 def best_block_size(elapsed_time, bytes):
285 new_min = max(bytes / 2.0, 1.0)
286 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
287 if elapsed_time < 0.001:
288 return long(new_max)
289 rate = bytes / elapsed_time
290 if rate > new_max:
291 return long(new_max)
292 if rate < new_min:
293 return long(new_min)
294 return long(rate)
295
296 @staticmethod
297 def parse_bytes(bytestr):
298 """Parse a string indicating a byte quantity into a long integer."""
299 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
300 if matchobj is None:
301 return None
302 number = float(matchobj.group(1))
303 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
304 return long(round(number * multiplier))
305
306 def add_info_extractor(self, ie):
307 """Add an InfoExtractor object to the end of the list."""
308 self._ies.append(ie)
309 ie.set_downloader(self)
310
311 def add_post_processor(self, pp):
312 """Add a PostProcessor object to the end of the chain."""
313 self._pps.append(pp)
314 pp.set_downloader(self)
315
316 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
317 """Print message to stdout if not in quiet mode."""
318 try:
319 if not self.params.get('quiet', False):
320 terminator = [u'\n', u''][skip_eol]
321 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
322 self._screen_file.flush()
323 except (UnicodeEncodeError), err:
324 if not ignore_encoding_errors:
325 raise
326
327 def to_stderr(self, message):
328 """Print message to stderr."""
329 print >>sys.stderr, message.encode(preferredencoding())
330
331 def to_cons_title(self, message):
332 """Set console/terminal window title to message."""
333 if not self.params.get('consoletitle', False):
334 return
335 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
336 # c_wchar_p() might not be necessary if `message` is
337 # already of type unicode()
338 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
339 elif 'TERM' in os.environ:
340 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
341
342 def fixed_template(self):
343 """Checks if the output template is fixed."""
344 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
345
346 def trouble(self, message=None):
347 """Determine action to take when a download problem appears.
348
349 Depending on if the downloader has been configured to ignore
350 download errors or not, this method may throw an exception or
351 not when errors are found, after printing the message.
352 """
353 if message is not None:
354 self.to_stderr(message)
355 if not self.params.get('ignoreerrors', False):
356 raise DownloadError(message)
357 self._download_retcode = 1
358
359 def slow_down(self, start_time, byte_counter):
360 """Sleep if the download speed is over the rate limit."""
361 rate_limit = self.params.get('ratelimit', None)
362 if rate_limit is None or byte_counter == 0:
363 return
364 now = time.time()
365 elapsed = now - start_time
366 if elapsed <= 0.0:
367 return
368 speed = float(byte_counter) / elapsed
369 if speed > rate_limit:
370 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
371
372 def temp_name(self, filename):
373 """Returns a temporary filename for the given filename."""
374 if self.params.get('nopart', False) or filename == u'-' or \
375 (os.path.exists(filename) and not os.path.isfile(filename)):
376 return filename
377 return filename + u'.part'
378
379 def try_rename(self, old_filename, new_filename):
380 try:
381 if old_filename == new_filename:
382 return
383 os.rename(old_filename, new_filename)
384 except (IOError, OSError), err:
385 self.trouble(u'ERROR: unable to rename file')
386
387 def report_destination(self, filename):
388 """Report destination filename."""
389 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
390
391 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
392 """Report download progress."""
393 if self.params.get('noprogress', False):
394 return
395 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
396 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
397 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
398 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
399
400 def report_resuming_byte(self, resume_len):
401 """Report attempt to resume at given byte."""
402 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
403
404 def report_retry(self, count, retries):
405 """Report retry in case of HTTP error 5xx"""
406 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
407
408 def report_file_already_downloaded(self, file_name):
409 """Report file has already been fully downloaded."""
410 try:
411 self.to_screen(u'[download] %s has already been downloaded' % file_name)
412 except (UnicodeEncodeError), err:
413 self.to_screen(u'[download] The file has already been downloaded')
414
415 def report_unable_to_resume(self):
416 """Report it was impossible to resume download."""
417 self.to_screen(u'[download] Unable to resume')
418
419 def report_finish(self):
420 """Report download finished."""
421 if self.params.get('noprogress', False):
422 self.to_screen(u'[download] Download completed')
423 else:
424 self.to_screen(u'')
425
426 def increment_downloads(self):
427 """Increment the ordinal that assigns a number to each file."""
428 self._num_downloads += 1
429
430 def process_info(self, info_dict):
431 """Process a single dictionary returned by an InfoExtractor."""
432 # Do nothing else if in simulate mode
433 if self.params.get('simulate', False):
434 # Forced printings
435 if self.params.get('forcetitle', False):
436 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
437 if self.params.get('forceurl', False):
438 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
439 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
440 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
441 if self.params.get('forcedescription', False) and 'description' in info_dict:
442 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
443
444 return
445
446 try:
447 template_dict = dict(info_dict)
448 template_dict['epoch'] = unicode(long(time.time()))
449 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
450 filename = self.params['outtmpl'] % template_dict
451 except (ValueError, KeyError), err:
452 self.trouble(u'ERROR: invalid system charset or erroneous output template')
453 return
454 if self.params.get('nooverwrites', False) and os.path.exists(filename):
455 self.to_stderr(u'WARNING: file exists and will be skipped')
456 return
457
458 try:
459 self.pmkdir(filename)
460 except (OSError, IOError), err:
461 self.trouble(u'ERROR: unable to create directories: %s' % str(err))
462 return
463
464 try:
465 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
466 except (OSError, IOError), err:
467 raise UnavailableVideoError
468 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
469 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
470 return
471 except (ContentTooShortError, ), err:
472 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
473 return
474
475 if success:
476 try:
477 self.post_process(filename, info_dict)
478 except (PostProcessingError), err:
479 self.trouble(u'ERROR: postprocessing: %s' % str(err))
480 return
481
482 def download(self, url_list):
483 """Download a given list of URLs."""
484 if len(url_list) > 1 and self.fixed_template():
485 raise SameFileError(self.params['outtmpl'])
486
487 for url in url_list:
488 suitable_found = False
489 for ie in self._ies:
490 # Go to next InfoExtractor if not suitable
491 if not ie.suitable(url):
492 continue
493
494 # Suitable InfoExtractor found
495 suitable_found = True
496
497 # Extract information from URL and process it
498 ie.extract(url)
499
500 # Suitable InfoExtractor had been found; go to next URL
501 break
502
503 if not suitable_found:
504 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
505
506 return self._download_retcode
507
508 def post_process(self, filename, ie_info):
509 """Run the postprocessing chain on the given file."""
510 info = dict(ie_info)
511 info['filepath'] = filename
512 for pp in self._pps:
513 info = pp.run(info)
514 if info is None:
515 break
516
517 def _download_with_rtmpdump(self, filename, url, player_url):
518 self.report_destination(filename)
519 tmpfilename = self.temp_name(filename)
520
521 # Check for rtmpdump first
522 try:
523 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
524 except (OSError, IOError):
525 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
526 return False
527
528 # Download using rtmpdump. rtmpdump returns exit code 2 when
529 # the connection was interrumpted and resuming appears to be
530 # possible. This is part of rtmpdump's normal usage, AFAIK.
531 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
532 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
533 while retval == 2 or retval == 1:
534 prevsize = os.path.getsize(tmpfilename)
535 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
536 time.sleep(5.0) # This seems to be needed
537 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
538 cursize = os.path.getsize(tmpfilename)
539 if prevsize == cursize and retval == 1:
540 break
541 if retval == 0:
542 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
543 self.try_rename(tmpfilename, filename)
544 return True
545 else:
546 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
547 return False
548
549 def _do_download(self, filename, url, player_url):
550 # Check file already present
551 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
552 self.report_file_already_downloaded(filename)
553 return True
554
555 # Attempt to download using rtmpdump
556 if url.startswith('rtmp'):
557 return self._download_with_rtmpdump(filename, url, player_url)
558
559 tmpfilename = self.temp_name(filename)
560 stream = None
561 open_mode = 'wb'
562 basic_request = urllib2.Request(url, None, std_headers)
563 request = urllib2.Request(url, None, std_headers)
564
565 # Establish possible resume length
566 if os.path.isfile(tmpfilename):
567 resume_len = os.path.getsize(tmpfilename)
568 else:
569 resume_len = 0
570
571 # Request parameters in case of being able to resume
572 if self.params.get('continuedl', False) and resume_len != 0:
573 self.report_resuming_byte(resume_len)
574 request.add_header('Range','bytes=%d-' % resume_len)
575 open_mode = 'ab'
576
577 count = 0
578 retries = self.params.get('retries', 0)
579 while count <= retries:
580 # Establish connection
581 try:
582 data = urllib2.urlopen(request)
583 break
584 except (urllib2.HTTPError, ), err:
585 if (err.code < 500 or err.code >= 600) and err.code != 416:
586 # Unexpected HTTP error
587 raise
588 elif err.code == 416:
589 # Unable to resume (requested range not satisfiable)
590 try:
591 # Open the connection again without the range header
592 data = urllib2.urlopen(basic_request)
593 content_length = data.info()['Content-Length']
594 except (urllib2.HTTPError, ), err:
595 if err.code < 500 or err.code >= 600:
596 raise
597 else:
598 # Examine the reported length
599 if (content_length is not None and
600 (resume_len - 100 < long(content_length) < resume_len + 100)):
601 # The file had already been fully downloaded.
602 # Explanation to the above condition: in issue #175 it was revealed that
603 # YouTube sometimes adds or removes a few bytes from the end of the file,
604 # changing the file size slightly and causing problems for some users. So
605 # I decided to implement a suggested change and consider the file
606 # completely downloaded if the file size differs less than 100 bytes from
607 # the one in the hard drive.
608 self.report_file_already_downloaded(filename)
609 self.try_rename(tmpfilename, filename)
610 return True
611 else:
612 # The length does not match, we start the download over
613 self.report_unable_to_resume()
614 open_mode = 'wb'
615 break
616 # Retry
617 count += 1
618 if count <= retries:
619 self.report_retry(count, retries)
620
621 if count > retries:
622 self.trouble(u'ERROR: giving up after %s retries' % retries)
623 return False
624
625 data_len = data.info().get('Content-length', None)
626 if data_len is not None:
627 data_len = long(data_len) + resume_len
628 data_len_str = self.format_bytes(data_len)
629 byte_counter = 0 + resume_len
630 block_size = 1024
631 start = time.time()
632 while True:
633 # Download and write
634 before = time.time()
635 data_block = data.read(block_size)
636 after = time.time()
637 if len(data_block) == 0:
638 break
639 byte_counter += len(data_block)
640
641 # Open file just in time
642 if stream is None:
643 try:
644 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
645 self.report_destination(filename)
646 except (OSError, IOError), err:
647 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
648 return False
649 try:
650 stream.write(data_block)
651 except (IOError, OSError), err:
652 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
653 return False
654 block_size = self.best_block_size(after - before, len(data_block))
655
656 # Progress message
657 percent_str = self.calc_percent(byte_counter, data_len)
658 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
659 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
660 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
661
662 # Apply rate limit
663 self.slow_down(start, byte_counter - resume_len)
664
665 stream.close()
666 self.report_finish()
667 if data_len is not None and byte_counter != data_len:
668 raise ContentTooShortError(byte_counter, long(data_len))
669 self.try_rename(tmpfilename, filename)
670 return True
671
672 class InfoExtractor(object):
673 """Information Extractor class.
674
675 Information extractors are the classes that, given a URL, extract
676 information from the video (or videos) the URL refers to. This
677 information includes the real video URL, the video title and simplified
678 title, author and others. The information is stored in a dictionary
679 which is then passed to the FileDownloader. The FileDownloader
680 processes this information possibly downloading the video to the file
681 system, among other possible outcomes. The dictionaries must include
682 the following fields:
683
684 id: Video identifier.
685 url: Final video URL.
686 uploader: Nickname of the video uploader.
687 title: Literal title.
688 stitle: Simplified title.
689 ext: Video filename extension.
690 format: Video format.
691 player_url: SWF Player URL (may be None).
692
693 The following fields are optional. Their primary purpose is to allow
694 youtube-dl to serve as the backend for a video search function, such
695 as the one in youtube2mp3. They are only used when their respective
696 forced printing functions are called:
697
698 thumbnail: Full URL to a video thumbnail image.
699 description: One-line video description.
700
701 Subclasses of this one should re-define the _real_initialize() and
702 _real_extract() methods, as well as the suitable() static method.
703 Probably, they should also be instantiated and added to the main
704 downloader.
705 """
706
707 _ready = False
708 _downloader = None
709
710 def __init__(self, downloader=None):
711 """Constructor. Receives an optional downloader."""
712 self._ready = False
713 self.set_downloader(downloader)
714
715 @staticmethod
716 def suitable(url):
717 """Receives a URL and returns True if suitable for this IE."""
718 return False
719
720 def initialize(self):
721 """Initializes an instance (authentication, etc)."""
722 if not self._ready:
723 self._real_initialize()
724 self._ready = True
725
726 def extract(self, url):
727 """Extracts URL information and returns it in list of dicts."""
728 self.initialize()
729 return self._real_extract(url)
730
731 def set_downloader(self, downloader):
732 """Sets the downloader for this IE."""
733 self._downloader = downloader
734
735 def _real_initialize(self):
736 """Real initialization process. Redefine in subclasses."""
737 pass
738
739 def _real_extract(self, url):
740 """Real extraction process. Redefine in subclasses."""
741 pass
742
743 class YoutubeIE(InfoExtractor):
744 """Information extractor for youtube.com."""
745
746 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
747 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
748 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
749 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
750 _NETRC_MACHINE = 'youtube'
751 # Listed in order of quality
752 _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
753 _video_extensions = {
754 '13': '3gp',
755 '17': 'mp4',
756 '18': 'mp4',
757 '22': 'mp4',
758 '37': 'mp4',
759 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
760 '43': 'webm',
761 '45': 'webm',
762 }
763
764 @staticmethod
765 def suitable(url):
766 return (re.match(YoutubeIE._VALID_URL, url) is not None)
767
768 def report_lang(self):
769 """Report attempt to set language."""
770 self._downloader.to_screen(u'[youtube] Setting language')
771
772 def report_login(self):
773 """Report attempt to log in."""
774 self._downloader.to_screen(u'[youtube] Logging in')
775
776 def report_age_confirmation(self):
777 """Report attempt to confirm age."""
778 self._downloader.to_screen(u'[youtube] Confirming age')
779
780 def report_video_webpage_download(self, video_id):
781 """Report attempt to download video webpage."""
782 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
783
784 def report_video_info_webpage_download(self, video_id):
785 """Report attempt to download video info webpage."""
786 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
787
788 def report_information_extraction(self, video_id):
789 """Report attempt to extract video information."""
790 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
791
792 def report_unavailable_format(self, video_id, format):
793 """Report extracted video URL."""
794 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
795
796 def report_rtmp_download(self):
797 """Indicate the download will use the RTMP protocol."""
798 self._downloader.to_screen(u'[youtube] RTMP download detected')
799
800 def _real_initialize(self):
801 if self._downloader is None:
802 return
803
804 username = None
805 password = None
806 downloader_params = self._downloader.params
807
808 # Attempt to use provided username and password or .netrc data
809 if downloader_params.get('username', None) is not None:
810 username = downloader_params['username']
811 password = downloader_params['password']
812 elif downloader_params.get('usenetrc', False):
813 try:
814 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
815 if info is not None:
816 username = info[0]
817 password = info[2]
818 else:
819 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
820 except (IOError, netrc.NetrcParseError), err:
821 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
822 return
823
824 # Set language
825 request = urllib2.Request(self._LANG_URL, None, std_headers)
826 try:
827 self.report_lang()
828 urllib2.urlopen(request).read()
829 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
830 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
831 return
832
833 # No authentication to be performed
834 if username is None:
835 return
836
837 # Log in
838 login_form = {
839 'current_form': 'loginForm',
840 'next': '/',
841 'action_login': 'Log In',
842 'username': username,
843 'password': password,
844 }
845 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
846 try:
847 self.report_login()
848 login_results = urllib2.urlopen(request).read()
849 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
850 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
851 return
852 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
853 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
854 return
855
856 # Confirm age
857 age_form = {
858 'next_url': '/',
859 'action_confirm': 'Confirm',
860 }
861 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
862 try:
863 self.report_age_confirmation()
864 age_results = urllib2.urlopen(request).read()
865 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
866 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
867 return
868
869 def _real_extract(self, url):
870 # Extract video id from URL
871 mobj = re.match(self._VALID_URL, url)
872 if mobj is None:
873 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
874 return
875 video_id = mobj.group(2)
876
877 # Get video webpage
878 self.report_video_webpage_download(video_id)
879 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id, None, std_headers)
880 try:
881 video_webpage = urllib2.urlopen(request).read()
882 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
883 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
884 return
885
886 # Attempt to extract SWF player URL
887 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
888 if mobj is not None:
889 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
890 else:
891 player_url = None
892
893 # Get video info
894 self.report_video_info_webpage_download(video_id)
895 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
896 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
897 % (video_id, el_type))
898 request = urllib2.Request(video_info_url, None, std_headers)
899 try:
900 video_info_webpage = urllib2.urlopen(request).read()
901 video_info = parse_qs(video_info_webpage)
902 if 'token' in video_info:
903 break
904 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
905 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
906 return
907 if 'token' not in video_info:
908 if 'reason' in video_info:
909 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
910 else:
911 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
912 return
913
914 # Start extracting information
915 self.report_information_extraction(video_id)
916
917 # uploader
918 if 'author' not in video_info:
919 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
920 return
921 video_uploader = urllib.unquote_plus(video_info['author'][0])
922
923 # title
924 if 'title' not in video_info:
925 self._downloader.trouble(u'ERROR: unable to extract video title')
926 return
927 video_title = urllib.unquote_plus(video_info['title'][0])
928 video_title = video_title.decode('utf-8')
929 video_title = sanitize_title(video_title)
930
931 # simplified title
932 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
933 simple_title = simple_title.strip(ur'_')
934
935 # thumbnail image
936 if 'thumbnail_url' not in video_info:
937 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
938 video_thumbnail = ''
939 else: # don't panic if we can't find it
940 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
941
942 # upload date
943 upload_date = u'NA'
944 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
945 if mobj is not None:
946 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
947 format_expressions = ['%d %B %Y', '%B %d %Y']
948 for expression in format_expressions:
949 try:
950 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
951 except:
952 pass
953
954 # description
955 video_description = 'No description available.'
956 if self._downloader.params.get('forcedescription', False):
957 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
958 if mobj is not None:
959 video_description = mobj.group(1)
960
961 # token
962 video_token = urllib.unquote_plus(video_info['token'][0])
963
964 # Decide which formats to download
965 req_format = self._downloader.params.get('format', None)
966
967 if 'fmt_url_map' in video_info:
968 url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
969 format_limit = self._downloader.params.get('format_limit', None)
970 if format_limit is not None and format_limit in self._available_formats:
971 format_list = self._available_formats[self._available_formats.index(format_limit):]
972 else:
973 format_list = self._available_formats
974 existing_formats = [x for x in format_list if x in url_map]
975 if len(existing_formats) == 0:
976 self._downloader.trouble(u'ERROR: no known formats available for video')
977 return
978 if req_format is None:
979 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
980 elif req_format == '-1':
981 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
982 else:
983 # Specific format
984 if req_format not in url_map:
985 self._downloader.trouble(u'ERROR: requested format not available')
986 return
987 video_url_list = [(req_format, url_map[req_format])] # Specific format
988
989 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
990 self.report_rtmp_download()
991 video_url_list = [(None, video_info['conn'][0])]
992
993 else:
994 self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
995 return
996
997 for format_param, video_real_url in video_url_list:
998 # At this point we have a new video
999 self._downloader.increment_downloads()
1000
1001 # Extension
1002 video_extension = self._video_extensions.get(format_param, 'flv')
1003
1004 # Find the video URL in fmt_url_map or conn paramters
1005 try:
1006 # Process video information
1007 self._downloader.process_info({
1008 'id': video_id.decode('utf-8'),
1009 'url': video_real_url.decode('utf-8'),
1010 'uploader': video_uploader.decode('utf-8'),
1011 'upload_date': upload_date,
1012 'title': video_title,
1013 'stitle': simple_title,
1014 'ext': video_extension.decode('utf-8'),
1015 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
1016 'thumbnail': video_thumbnail.decode('utf-8'),
1017 'description': video_description.decode('utf-8'),
1018 'player_url': player_url,
1019 })
1020 except UnavailableVideoError, err:
1021 self._downloader.trouble(u'\nERROR: unable to download video')
1022
1023
1024 class MetacafeIE(InfoExtractor):
1025 """Information Extractor for metacafe.com."""
1026
1027 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1028 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1029 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1030 _youtube_ie = None
1031
1032 def __init__(self, youtube_ie, downloader=None):
1033 InfoExtractor.__init__(self, downloader)
1034 self._youtube_ie = youtube_ie
1035
1036 @staticmethod
1037 def suitable(url):
1038 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1039
1040 def report_disclaimer(self):
1041 """Report disclaimer retrieval."""
1042 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1043
1044 def report_age_confirmation(self):
1045 """Report attempt to confirm age."""
1046 self._downloader.to_screen(u'[metacafe] Confirming age')
1047
1048 def report_download_webpage(self, video_id):
1049 """Report webpage download."""
1050 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1051
1052 def report_extraction(self, video_id):
1053 """Report information extraction."""
1054 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1055
1056 def _real_initialize(self):
1057 # Retrieve disclaimer
1058 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
1059 try:
1060 self.report_disclaimer()
1061 disclaimer = urllib2.urlopen(request).read()
1062 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1063 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1064 return
1065
1066 # Confirm age
1067 disclaimer_form = {
1068 'filters': '0',
1069 'submit': "Continue - I'm over 18",
1070 }
1071 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1072 try:
1073 self.report_age_confirmation()
1074 disclaimer = urllib2.urlopen(request).read()
1075 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1076 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1077 return
1078
1079 def _real_extract(self, url):
1080 # Extract id and simplified title from URL
1081 mobj = re.match(self._VALID_URL, url)
1082 if mobj is None:
1083 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1084 return
1085
1086 video_id = mobj.group(1)
1087
1088 # Check if video comes from YouTube
1089 mobj2 = re.match(r'^yt-(.*)$', video_id)
1090 if mobj2 is not None:
1091 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1092 return
1093
1094 # At this point we have a new video
1095 self._downloader.increment_downloads()
1096
1097 simple_title = mobj.group(2).decode('utf-8')
1098
1099 # Retrieve video webpage to extract further information
1100 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1101 try:
1102 self.report_download_webpage(video_id)
1103 webpage = urllib2.urlopen(request).read()
1104 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1105 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1106 return
1107
1108 # Extract URL, uploader and title from webpage
1109 self.report_extraction(video_id)
1110 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1111 if mobj is not None:
1112 mediaURL = urllib.unquote(mobj.group(1))
1113 video_extension = mediaURL[-3:]
1114
1115 # Extract gdaKey if available
1116 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1117 if mobj is None:
1118 video_url = mediaURL
1119 else:
1120 gdaKey = mobj.group(1)
1121 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1122 else:
1123 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1124 if mobj is None:
1125 self._downloader.trouble(u'ERROR: unable to extract media URL')
1126 return
1127 vardict = parse_qs(mobj.group(1))
1128 if 'mediaData' not in vardict:
1129 self._downloader.trouble(u'ERROR: unable to extract media URL')
1130 return
1131 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1132 if mobj is None:
1133 self._downloader.trouble(u'ERROR: unable to extract media URL')
1134 return
1135 mediaURL = mobj.group(1).replace('\\/', '/')
1136 video_extension = mediaURL[-3:]
1137 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1138
1139 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1140 if mobj is None:
1141 self._downloader.trouble(u'ERROR: unable to extract title')
1142 return
1143 video_title = mobj.group(1).decode('utf-8')
1144 video_title = sanitize_title(video_title)
1145
1146 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1147 if mobj is None:
1148 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1149 return
1150 video_uploader = mobj.group(1)
1151
1152 try:
1153 # Process video information
1154 self._downloader.process_info({
1155 'id': video_id.decode('utf-8'),
1156 'url': video_url.decode('utf-8'),
1157 'uploader': video_uploader.decode('utf-8'),
1158 'upload_date': u'NA',
1159 'title': video_title,
1160 'stitle': simple_title,
1161 'ext': video_extension.decode('utf-8'),
1162 'format': u'NA',
1163 'player_url': None,
1164 })
1165 except UnavailableVideoError:
1166 self._downloader.trouble(u'\nERROR: unable to download video')
1167
1168
1169 class DailymotionIE(InfoExtractor):
1170 """Information Extractor for Dailymotion"""
1171
1172 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1173
1174 def __init__(self, downloader=None):
1175 InfoExtractor.__init__(self, downloader)
1176
1177 @staticmethod
1178 def suitable(url):
1179 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1180
1181 def report_download_webpage(self, video_id):
1182 """Report webpage download."""
1183 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1184
1185 def report_extraction(self, video_id):
1186 """Report information extraction."""
1187 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1188
1189 def _real_initialize(self):
1190 return
1191
1192 def _real_extract(self, url):
1193 # Extract id and simplified title from URL
1194 mobj = re.match(self._VALID_URL, url)
1195 if mobj is None:
1196 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1197 return
1198
1199 # At this point we have a new video
1200 self._downloader.increment_downloads()
1201 video_id = mobj.group(1)
1202
1203 simple_title = mobj.group(2).decode('utf-8')
1204 video_extension = 'flv'
1205
1206 # Retrieve video webpage to extract further information
1207 request = urllib2.Request(url)
1208 try:
1209 self.report_download_webpage(video_id)
1210 webpage = urllib2.urlopen(request).read()
1211 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1212 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1213 return
1214
1215 # Extract URL, uploader and title from webpage
1216 self.report_extraction(video_id)
1217 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1218 if mobj is None:
1219 self._downloader.trouble(u'ERROR: unable to extract media URL')
1220 return
1221 mediaURL = urllib.unquote(mobj.group(1))
1222
1223 # if needed add http://www.dailymotion.com/ if relative URL
1224
1225 video_url = mediaURL
1226
1227 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1228 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1229 if mobj is None:
1230 self._downloader.trouble(u'ERROR: unable to extract title')
1231 return
1232 video_title = mobj.group(1).decode('utf-8')
1233 video_title = sanitize_title(video_title)
1234
1235 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1236 if mobj is None:
1237 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1238 return
1239 video_uploader = mobj.group(1)
1240
1241 try:
1242 # Process video information
1243 self._downloader.process_info({
1244 'id': video_id.decode('utf-8'),
1245 'url': video_url.decode('utf-8'),
1246 'uploader': video_uploader.decode('utf-8'),
1247 'upload_date': u'NA',
1248 'title': video_title,
1249 'stitle': simple_title,
1250 'ext': video_extension.decode('utf-8'),
1251 'format': u'NA',
1252 'player_url': None,
1253 })
1254 except UnavailableVideoError:
1255 self._downloader.trouble(u'\nERROR: unable to download video')
1256
1257 class GoogleIE(InfoExtractor):
1258 """Information extractor for video.google.com."""
1259
1260 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1261
1262 def __init__(self, downloader=None):
1263 InfoExtractor.__init__(self, downloader)
1264
1265 @staticmethod
1266 def suitable(url):
1267 return (re.match(GoogleIE._VALID_URL, url) is not None)
1268
1269 def report_download_webpage(self, video_id):
1270 """Report webpage download."""
1271 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1272
1273 def report_extraction(self, video_id):
1274 """Report information extraction."""
1275 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1276
1277 def _real_initialize(self):
1278 return
1279
1280 def _real_extract(self, url):
1281 # Extract id from URL
1282 mobj = re.match(self._VALID_URL, url)
1283 if mobj is None:
1284 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1285 return
1286
1287 # At this point we have a new video
1288 self._downloader.increment_downloads()
1289 video_id = mobj.group(1)
1290
1291 video_extension = 'mp4'
1292
1293 # Retrieve video webpage to extract further information
1294 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1295 try:
1296 self.report_download_webpage(video_id)
1297 webpage = urllib2.urlopen(request).read()
1298 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1299 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1300 return
1301
1302 # Extract URL, uploader, and title from webpage
1303 self.report_extraction(video_id)
1304 mobj = re.search(r"download_url:'([^']+)'", webpage)
1305 if mobj is None:
1306 video_extension = 'flv'
1307 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1308 if mobj is None:
1309 self._downloader.trouble(u'ERROR: unable to extract media URL')
1310 return
1311 mediaURL = urllib.unquote(mobj.group(1))
1312 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1313 mediaURL = mediaURL.replace('\\x26', '\x26')
1314
1315 video_url = mediaURL
1316
1317 mobj = re.search(r'<title>(.*)</title>', webpage)
1318 if mobj is None:
1319 self._downloader.trouble(u'ERROR: unable to extract title')
1320 return
1321 video_title = mobj.group(1).decode('utf-8')
1322 video_title = sanitize_title(video_title)
1323 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1324
1325 # Extract video description
1326 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1327 if mobj is None:
1328 self._downloader.trouble(u'ERROR: unable to extract video description')
1329 return
1330 video_description = mobj.group(1).decode('utf-8')
1331 if not video_description:
1332 video_description = 'No description available.'
1333
1334 # Extract video thumbnail
1335 if self._downloader.params.get('forcethumbnail', False):
1336 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1337 try:
1338 webpage = urllib2.urlopen(request).read()
1339 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1340 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1341 return
1342 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1343 if mobj is None:
1344 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1345 return
1346 video_thumbnail = mobj.group(1)
1347 else: # we need something to pass to process_info
1348 video_thumbnail = ''
1349
1350
1351 try:
1352 # Process video information
1353 self._downloader.process_info({
1354 'id': video_id.decode('utf-8'),
1355 'url': video_url.decode('utf-8'),
1356 'uploader': u'NA',
1357 'upload_date': u'NA',
1358 'title': video_title,
1359 'stitle': simple_title,
1360 'ext': video_extension.decode('utf-8'),
1361 'format': u'NA',
1362 'player_url': None,
1363 })
1364 except UnavailableVideoError:
1365 self._downloader.trouble(u'\nERROR: unable to download video')
1366
1367
1368 class PhotobucketIE(InfoExtractor):
1369 """Information extractor for photobucket.com."""
1370
1371 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1372
1373 def __init__(self, downloader=None):
1374 InfoExtractor.__init__(self, downloader)
1375
1376 @staticmethod
1377 def suitable(url):
1378 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1379
1380 def report_download_webpage(self, video_id):
1381 """Report webpage download."""
1382 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1383
1384 def report_extraction(self, video_id):
1385 """Report information extraction."""
1386 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1387
1388 def _real_initialize(self):
1389 return
1390
1391 def _real_extract(self, url):
1392 # Extract id from URL
1393 mobj = re.match(self._VALID_URL, url)
1394 if mobj is None:
1395 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1396 return
1397
1398 # At this point we have a new video
1399 self._downloader.increment_downloads()
1400 video_id = mobj.group(1)
1401
1402 video_extension = 'flv'
1403
1404 # Retrieve video webpage to extract further information
1405 request = urllib2.Request(url)
1406 try:
1407 self.report_download_webpage(video_id)
1408 webpage = urllib2.urlopen(request).read()
1409 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1410 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1411 return
1412
1413 # Extract URL, uploader, and title from webpage
1414 self.report_extraction(video_id)
1415 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1416 if mobj is None:
1417 self._downloader.trouble(u'ERROR: unable to extract media URL')
1418 return
1419 mediaURL = urllib.unquote(mobj.group(1))
1420
1421 video_url = mediaURL
1422
1423 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1424 if mobj is None:
1425 self._downloader.trouble(u'ERROR: unable to extract title')
1426 return
1427 video_title = mobj.group(1).decode('utf-8')
1428 video_title = sanitize_title(video_title)
1429 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1430
1431 video_uploader = mobj.group(2).decode('utf-8')
1432
1433 try:
1434 # Process video information
1435 self._downloader.process_info({
1436 'id': video_id.decode('utf-8'),
1437 'url': video_url.decode('utf-8'),
1438 'uploader': video_uploader,
1439 'upload_date': u'NA',
1440 'title': video_title,
1441 'stitle': simple_title,
1442 'ext': video_extension.decode('utf-8'),
1443 'format': u'NA',
1444 'player_url': None,
1445 })
1446 except UnavailableVideoError:
1447 self._downloader.trouble(u'\nERROR: unable to download video')
1448
1449
1450 class YahooIE(InfoExtractor):
1451 """Information extractor for video.yahoo.com."""
1452
1453 # _VALID_URL matches all Yahoo! Video URLs
1454 # _VPAGE_URL matches only the extractable '/watch/' URLs
1455 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1456 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1457
1458 def __init__(self, downloader=None):
1459 InfoExtractor.__init__(self, downloader)
1460
1461 @staticmethod
1462 def suitable(url):
1463 return (re.match(YahooIE._VALID_URL, url) is not None)
1464
1465 def report_download_webpage(self, video_id):
1466 """Report webpage download."""
1467 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1468
1469 def report_extraction(self, video_id):
1470 """Report information extraction."""
1471 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1472
1473 def _real_initialize(self):
1474 return
1475
1476 def _real_extract(self, url, new_video=True):
1477 # Extract ID from URL
1478 mobj = re.match(self._VALID_URL, url)
1479 if mobj is None:
1480 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1481 return
1482
1483 # At this point we have a new video
1484 self._downloader.increment_downloads()
1485 video_id = mobj.group(2)
1486 video_extension = 'flv'
1487
1488 # Rewrite valid but non-extractable URLs as
1489 # extractable English language /watch/ URLs
1490 if re.match(self._VPAGE_URL, url) is None:
1491 request = urllib2.Request(url)
1492 try:
1493 webpage = urllib2.urlopen(request).read()
1494 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1495 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1496 return
1497
1498 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1499 if mobj is None:
1500 self._downloader.trouble(u'ERROR: Unable to extract id field')
1501 return
1502 yahoo_id = mobj.group(1)
1503
1504 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1505 if mobj is None:
1506 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1507 return
1508 yahoo_vid = mobj.group(1)
1509
1510 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1511 return self._real_extract(url, new_video=False)
1512
1513 # Retrieve video webpage to extract further information
1514 request = urllib2.Request(url)
1515 try:
1516 self.report_download_webpage(video_id)
1517 webpage = urllib2.urlopen(request).read()
1518 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1519 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1520 return
1521
1522 # Extract uploader and title from webpage
1523 self.report_extraction(video_id)
1524 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1525 if mobj is None:
1526 self._downloader.trouble(u'ERROR: unable to extract video title')
1527 return
1528 video_title = mobj.group(1).decode('utf-8')
1529 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1530
1531 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1532 if mobj is None:
1533 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1534 return
1535 video_uploader = mobj.group(1).decode('utf-8')
1536
1537 # Extract video thumbnail
1538 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1539 if mobj is None:
1540 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1541 return
1542 video_thumbnail = mobj.group(1).decode('utf-8')
1543
1544 # Extract video description
1545 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1546 if mobj is None:
1547 self._downloader.trouble(u'ERROR: unable to extract video description')
1548 return
1549 video_description = mobj.group(1).decode('utf-8')
1550 if not video_description: video_description = 'No description available.'
1551
1552 # Extract video height and width
1553 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1554 if mobj is None:
1555 self._downloader.trouble(u'ERROR: unable to extract video height')
1556 return
1557 yv_video_height = mobj.group(1)
1558
1559 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1560 if mobj is None:
1561 self._downloader.trouble(u'ERROR: unable to extract video width')
1562 return
1563 yv_video_width = mobj.group(1)
1564
1565 # Retrieve video playlist to extract media URL
1566 # I'm not completely sure what all these options are, but we
1567 # seem to need most of them, otherwise the server sends a 401.
1568 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1569 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1570 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1571 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1572 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1573 try:
1574 self.report_download_webpage(video_id)
1575 webpage = urllib2.urlopen(request).read()
1576 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1577 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1578 return
1579
1580 # Extract media URL from playlist XML
1581 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1582 if mobj is None:
1583 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1584 return
1585 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1586 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1587
1588 try:
1589 # Process video information
1590 self._downloader.process_info({
1591 'id': video_id.decode('utf-8'),
1592 'url': video_url,
1593 'uploader': video_uploader,
1594 'upload_date': u'NA',
1595 'title': video_title,
1596 'stitle': simple_title,
1597 'ext': video_extension.decode('utf-8'),
1598 'thumbnail': video_thumbnail.decode('utf-8'),
1599 'description': video_description,
1600 'thumbnail': video_thumbnail,
1601 'description': video_description,
1602 'player_url': None,
1603 })
1604 except UnavailableVideoError:
1605 self._downloader.trouble(u'\nERROR: unable to download video')
1606
1607
1608 class GenericIE(InfoExtractor):
1609 """Generic last-resort information extractor."""
1610
1611 def __init__(self, downloader=None):
1612 InfoExtractor.__init__(self, downloader)
1613
1614 @staticmethod
1615 def suitable(url):
1616 return True
1617
1618 def report_download_webpage(self, video_id):
1619 """Report webpage download."""
1620 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1621 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1622
1623 def report_extraction(self, video_id):
1624 """Report information extraction."""
1625 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1626
1627 def _real_initialize(self):
1628 return
1629
1630 def _real_extract(self, url):
1631 # At this point we have a new video
1632 self._downloader.increment_downloads()
1633
1634 video_id = url.split('/')[-1]
1635 request = urllib2.Request(url)
1636 try:
1637 self.report_download_webpage(video_id)
1638 webpage = urllib2.urlopen(request).read()
1639 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1640 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1641 return
1642 except ValueError, err:
1643 # since this is the last-resort InfoExtractor, if
1644 # this error is thrown, it'll be thrown here
1645 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1646 return
1647
1648 self.report_extraction(video_id)
1649 # Start with something easy: JW Player in SWFObject
1650 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1651 if mobj is None:
1652 # Broaden the search a little bit
1653 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1654 if mobj is None:
1655 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1656 return
1657
1658 # It's possible that one of the regexes
1659 # matched, but returned an empty group:
1660 if mobj.group(1) is None:
1661 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1662 return
1663
1664 video_url = urllib.unquote(mobj.group(1))
1665 video_id = os.path.basename(video_url)
1666
1667 # here's a fun little line of code for you:
1668 video_extension = os.path.splitext(video_id)[1][1:]
1669 video_id = os.path.splitext(video_id)[0]
1670
1671 # it's tempting to parse this further, but you would
1672 # have to take into account all the variations like
1673 # Video Title - Site Name
1674 # Site Name | Video Title
1675 # Video Title - Tagline | Site Name
1676 # and so on and so forth; it's just not practical
1677 mobj = re.search(r'<title>(.*)</title>', webpage)
1678 if mobj is None:
1679 self._downloader.trouble(u'ERROR: unable to extract title')
1680 return
1681 video_title = mobj.group(1).decode('utf-8')
1682 video_title = sanitize_title(video_title)
1683 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1684
1685 # video uploader is domain name
1686 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1687 if mobj is None:
1688 self._downloader.trouble(u'ERROR: unable to extract title')
1689 return
1690 video_uploader = mobj.group(1).decode('utf-8')
1691
1692 try:
1693 # Process video information
1694 self._downloader.process_info({
1695 'id': video_id.decode('utf-8'),
1696 'url': video_url.decode('utf-8'),
1697 'uploader': video_uploader,
1698 'upload_date': u'NA',
1699 'title': video_title,
1700 'stitle': simple_title,
1701 'ext': video_extension.decode('utf-8'),
1702 'format': u'NA',
1703 'player_url': None,
1704 })
1705 except UnavailableVideoError, err:
1706 self._downloader.trouble(u'\nERROR: unable to download video')
1707
1708
1709 class YoutubeSearchIE(InfoExtractor):
1710 """Information Extractor for YouTube search queries."""
1711 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1712 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1713 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1714 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1715 _youtube_ie = None
1716 _max_youtube_results = 1000
1717
1718 def __init__(self, youtube_ie, downloader=None):
1719 InfoExtractor.__init__(self, downloader)
1720 self._youtube_ie = youtube_ie
1721
1722 @staticmethod
1723 def suitable(url):
1724 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1725
1726 def report_download_page(self, query, pagenum):
1727 """Report attempt to download playlist page with given number."""
1728 query = query.decode(preferredencoding())
1729 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1730
1731 def _real_initialize(self):
1732 self._youtube_ie.initialize()
1733
1734 def _real_extract(self, query):
1735 mobj = re.match(self._VALID_QUERY, query)
1736 if mobj is None:
1737 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1738 return
1739
1740 prefix, query = query.split(':')
1741 prefix = prefix[8:]
1742 query = query.encode('utf-8')
1743 if prefix == '':
1744 self._download_n_results(query, 1)
1745 return
1746 elif prefix == 'all':
1747 self._download_n_results(query, self._max_youtube_results)
1748 return
1749 else:
1750 try:
1751 n = long(prefix)
1752 if n <= 0:
1753 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1754 return
1755 elif n > self._max_youtube_results:
1756 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1757 n = self._max_youtube_results
1758 self._download_n_results(query, n)
1759 return
1760 except ValueError: # parsing prefix as integer fails
1761 self._download_n_results(query, 1)
1762 return
1763
1764 def _download_n_results(self, query, n):
1765 """Downloads a specified number of results for a query"""
1766
1767 video_ids = []
1768 already_seen = set()
1769 pagenum = 1
1770
1771 while True:
1772 self.report_download_page(query, pagenum)
1773 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1774 request = urllib2.Request(result_url, None, std_headers)
1775 try:
1776 page = urllib2.urlopen(request).read()
1777 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1778 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1779 return
1780
1781 # Extract video identifiers
1782 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1783 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1784 if video_id not in already_seen:
1785 video_ids.append(video_id)
1786 already_seen.add(video_id)
1787 if len(video_ids) == n:
1788 # Specified n videos reached
1789 for id in video_ids:
1790 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1791 return
1792
1793 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1794 for id in video_ids:
1795 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1796 return
1797
1798 pagenum = pagenum + 1
1799
1800 class GoogleSearchIE(InfoExtractor):
1801 """Information Extractor for Google Video search queries."""
1802 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1803 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1804 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1805 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1806 _google_ie = None
1807 _max_google_results = 1000
1808
1809 def __init__(self, google_ie, downloader=None):
1810 InfoExtractor.__init__(self, downloader)
1811 self._google_ie = google_ie
1812
1813 @staticmethod
1814 def suitable(url):
1815 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1816
1817 def report_download_page(self, query, pagenum):
1818 """Report attempt to download playlist page with given number."""
1819 query = query.decode(preferredencoding())
1820 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1821
1822 def _real_initialize(self):
1823 self._google_ie.initialize()
1824
1825 def _real_extract(self, query):
1826 mobj = re.match(self._VALID_QUERY, query)
1827 if mobj is None:
1828 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1829 return
1830
1831 prefix, query = query.split(':')
1832 prefix = prefix[8:]
1833 query = query.encode('utf-8')
1834 if prefix == '':
1835 self._download_n_results(query, 1)
1836 return
1837 elif prefix == 'all':
1838 self._download_n_results(query, self._max_google_results)
1839 return
1840 else:
1841 try:
1842 n = long(prefix)
1843 if n <= 0:
1844 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1845 return
1846 elif n > self._max_google_results:
1847 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
1848 n = self._max_google_results
1849 self._download_n_results(query, n)
1850 return
1851 except ValueError: # parsing prefix as integer fails
1852 self._download_n_results(query, 1)
1853 return
1854
1855 def _download_n_results(self, query, n):
1856 """Downloads a specified number of results for a query"""
1857
1858 video_ids = []
1859 already_seen = set()
1860 pagenum = 1
1861
1862 while True:
1863 self.report_download_page(query, pagenum)
1864 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1865 request = urllib2.Request(result_url, None, std_headers)
1866 try:
1867 page = urllib2.urlopen(request).read()
1868 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1869 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1870 return
1871
1872 # Extract video identifiers
1873 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1874 video_id = mobj.group(1)
1875 if video_id not in already_seen:
1876 video_ids.append(video_id)
1877 already_seen.add(video_id)
1878 if len(video_ids) == n:
1879 # Specified n videos reached
1880 for id in video_ids:
1881 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1882 return
1883
1884 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1885 for id in video_ids:
1886 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1887 return
1888
1889 pagenum = pagenum + 1
1890
1891 class YahooSearchIE(InfoExtractor):
1892 """Information Extractor for Yahoo! Video search queries."""
1893 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1894 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1895 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1896 _MORE_PAGES_INDICATOR = r'\s*Next'
1897 _yahoo_ie = None
1898 _max_yahoo_results = 1000
1899
1900 def __init__(self, yahoo_ie, downloader=None):
1901 InfoExtractor.__init__(self, downloader)
1902 self._yahoo_ie = yahoo_ie
1903
1904 @staticmethod
1905 def suitable(url):
1906 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1907
1908 def report_download_page(self, query, pagenum):
1909 """Report attempt to download playlist page with given number."""
1910 query = query.decode(preferredencoding())
1911 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1912
1913 def _real_initialize(self):
1914 self._yahoo_ie.initialize()
1915
1916 def _real_extract(self, query):
1917 mobj = re.match(self._VALID_QUERY, query)
1918 if mobj is None:
1919 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1920 return
1921
1922 prefix, query = query.split(':')
1923 prefix = prefix[8:]
1924 query = query.encode('utf-8')
1925 if prefix == '':
1926 self._download_n_results(query, 1)
1927 return
1928 elif prefix == 'all':
1929 self._download_n_results(query, self._max_yahoo_results)
1930 return
1931 else:
1932 try:
1933 n = long(prefix)
1934 if n <= 0:
1935 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1936 return
1937 elif n > self._max_yahoo_results:
1938 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
1939 n = self._max_yahoo_results
1940 self._download_n_results(query, n)
1941 return
1942 except ValueError: # parsing prefix as integer fails
1943 self._download_n_results(query, 1)
1944 return
1945
1946 def _download_n_results(self, query, n):
1947 """Downloads a specified number of results for a query"""
1948
1949 video_ids = []
1950 already_seen = set()
1951 pagenum = 1
1952
1953 while True:
1954 self.report_download_page(query, pagenum)
1955 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1956 request = urllib2.Request(result_url, None, std_headers)
1957 try:
1958 page = urllib2.urlopen(request).read()
1959 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1960 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1961 return
1962
1963 # Extract video identifiers
1964 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1965 video_id = mobj.group(1)
1966 if video_id not in already_seen:
1967 video_ids.append(video_id)
1968 already_seen.add(video_id)
1969 if len(video_ids) == n:
1970 # Specified n videos reached
1971 for id in video_ids:
1972 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1973 return
1974
1975 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1976 for id in video_ids:
1977 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1978 return
1979
1980 pagenum = pagenum + 1
1981
1982 class YoutubePlaylistIE(InfoExtractor):
1983 """Information Extractor for YouTube playlists."""
1984
1985 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1986 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1987 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1988 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1989 _youtube_ie = None
1990
1991 def __init__(self, youtube_ie, downloader=None):
1992 InfoExtractor.__init__(self, downloader)
1993 self._youtube_ie = youtube_ie
1994
1995 @staticmethod
1996 def suitable(url):
1997 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1998
1999 def report_download_page(self, playlist_id, pagenum):
2000 """Report attempt to download playlist page with given number."""
2001 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2002
2003 def _real_initialize(self):
2004 self._youtube_ie.initialize()
2005
2006 def _real_extract(self, url):
2007 # Extract playlist id
2008 mobj = re.match(self._VALID_URL, url)
2009 if mobj is None:
2010 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2011 return
2012
2013 # Download playlist pages
2014 playlist_id = mobj.group(1)
2015 video_ids = []
2016 pagenum = 1
2017
2018 while True:
2019 self.report_download_page(playlist_id, pagenum)
2020 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
2021 try:
2022 page = urllib2.urlopen(request).read()
2023 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2024 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2025 return
2026
2027 # Extract video identifiers
2028 ids_in_page = []
2029 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2030 if mobj.group(1) not in ids_in_page:
2031 ids_in_page.append(mobj.group(1))
2032 video_ids.extend(ids_in_page)
2033
2034 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2035 break
2036 pagenum = pagenum + 1
2037
2038 playliststart = self._downloader.params.get('playliststart', 1) - 1
2039 playlistend = self._downloader.params.get('playlistend', -1)
2040 video_ids = video_ids[playliststart:playlistend]
2041
2042 for id in video_ids:
2043 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2044 return
2045
2046 class YoutubeUserIE(InfoExtractor):
2047 """Information Extractor for YouTube users."""
2048
2049 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2050 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2051 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2052 _youtube_ie = None
2053
2054 def __init__(self, youtube_ie, downloader=None):
2055 InfoExtractor.__init__(self, downloader)
2056 self._youtube_ie = youtube_ie
2057
2058 @staticmethod
2059 def suitable(url):
2060 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2061
2062 def report_download_page(self, username):
2063 """Report attempt to download user page."""
2064 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2065
2066 def _real_initialize(self):
2067 self._youtube_ie.initialize()
2068
2069 def _real_extract(self, url):
2070 # Extract username
2071 mobj = re.match(self._VALID_URL, url)
2072 if mobj is None:
2073 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2074 return
2075
2076 # Download user page
2077 username = mobj.group(1)
2078 video_ids = []
2079 pagenum = 1
2080
2081 self.report_download_page(username)
2082 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2083 try:
2084 page = urllib2.urlopen(request).read()
2085 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2086 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2087 return
2088
2089 # Extract video identifiers
2090 ids_in_page = []
2091
2092 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2093 if mobj.group(1) not in ids_in_page:
2094 ids_in_page.append(mobj.group(1))
2095 video_ids.extend(ids_in_page)
2096
2097 playliststart = self._downloader.params.get('playliststart', 1) - 1
2098 playlistend = self._downloader.params.get('playlistend', -1)
2099 video_ids = video_ids[playliststart:playlistend]
2100
2101 for id in video_ids:
2102 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2103 return
2104
2105 class DepositFilesIE(InfoExtractor):
2106 """Information extractor for depositfiles.com"""
2107
2108 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2109
2110 def __init__(self, downloader=None):
2111 InfoExtractor.__init__(self, downloader)
2112
2113 @staticmethod
2114 def suitable(url):
2115 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2116
2117 def report_download_webpage(self, file_id):
2118 """Report webpage download."""
2119 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2120
2121 def report_extraction(self, file_id):
2122 """Report information extraction."""
2123 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2124
2125 def _real_initialize(self):
2126 return
2127
2128 def _real_extract(self, url):
2129 # At this point we have a new file
2130 self._downloader.increment_downloads()
2131
2132 file_id = url.split('/')[-1]
2133 # Rebuild url in english locale
2134 url = 'http://depositfiles.com/en/files/' + file_id
2135
2136 # Retrieve file webpage with 'Free download' button pressed
2137 free_download_indication = { 'gateway_result' : '1' }
2138 request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers)
2139 try:
2140 self.report_download_webpage(file_id)
2141 webpage = urllib2.urlopen(request).read()
2142 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2143 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2144 return
2145
2146 # Search for the real file URL
2147 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2148 if (mobj is None) or (mobj.group(1) is None):
2149 # Try to figure out reason of the error.
2150 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2151 if (mobj is not None) and (mobj.group(1) is not None):
2152 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2153 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2154 else:
2155 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2156 return
2157
2158 file_url = mobj.group(1)
2159 file_extension = os.path.splitext(file_url)[1][1:]
2160
2161 # Search for file title
2162 mobj = re.search(r'<b title="(.*?)">', webpage)
2163 if mobj is None:
2164 self._downloader.trouble(u'ERROR: unable to extract title')
2165 return
2166 file_title = mobj.group(1).decode('utf-8')
2167
2168 try:
2169 # Process file information
2170 self._downloader.process_info({
2171 'id': file_id.decode('utf-8'),
2172 'url': file_url.decode('utf-8'),
2173 'uploader': u'NA',
2174 'upload_date': u'NA',
2175 'title': file_title,
2176 'stitle': file_title,
2177 'ext': file_extension.decode('utf-8'),
2178 'format': u'NA',
2179 'player_url': None,
2180 })
2181 except UnavailableVideoError, err:
2182 self._downloader.trouble(u'ERROR: unable to download file')
2183
2184 class PostProcessor(object):
2185 """Post Processor class.
2186
2187 PostProcessor objects can be added to downloaders with their
2188 add_post_processor() method. When the downloader has finished a
2189 successful download, it will take its internal chain of PostProcessors
2190 and start calling the run() method on each one of them, first with
2191 an initial argument and then with the returned value of the previous
2192 PostProcessor.
2193
2194 The chain will be stopped if one of them ever returns None or the end
2195 of the chain is reached.
2196
2197 PostProcessor objects follow a "mutual registration" process similar
2198 to InfoExtractor objects.
2199 """
2200
2201 _downloader = None
2202
2203 def __init__(self, downloader=None):
2204 self._downloader = downloader
2205
2206 def set_downloader(self, downloader):
2207 """Sets the downloader for this PP."""
2208 self._downloader = downloader
2209
2210 def run(self, information):
2211 """Run the PostProcessor.
2212
2213 The "information" argument is a dictionary like the ones
2214 composed by InfoExtractors. The only difference is that this
2215 one has an extra field called "filepath" that points to the
2216 downloaded file.
2217
2218 When this method returns None, the postprocessing chain is
2219 stopped. However, this method may return an information
2220 dictionary that will be passed to the next postprocessing
2221 object in the chain. It can be the one it received after
2222 changing some fields.
2223
2224 In addition, this method may raise a PostProcessingError
2225 exception that will be taken into account by the downloader
2226 it was called from.
2227 """
2228 return information # by default, do nothing
2229
2230 ### MAIN PROGRAM ###
2231 if __name__ == '__main__':
2232 try:
2233 # Modules needed only when running the main program
2234 import getpass
2235 import optparse
2236
2237 # Function to update the program file with the latest version from bitbucket.org
2238 def update_self(downloader, filename):
2239 # Note: downloader only used for options
2240 if not os.access (filename, os.W_OK):
2241 sys.exit('ERROR: no write permissions on %s' % filename)
2242
2243 downloader.to_screen('Updating to latest stable version...')
2244 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2245 latest_version = urllib.urlopen(latest_url).read().strip()
2246 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2247 newcontent = urllib.urlopen(prog_url).read()
2248 stream = open(filename, 'w')
2249 stream.write(newcontent)
2250 stream.close()
2251 downloader.to_screen('Updated to version %s' % latest_version)
2252
2253 # Parse command line
2254 parser = optparse.OptionParser(
2255 usage='Usage: %prog [options] url...',
2256 version='2010.12.09',
2257 conflict_handler='resolve',
2258 )
2259
2260 parser.add_option('-h', '--help',
2261 action='help', help='print this help text and exit')
2262 parser.add_option('-v', '--version',
2263 action='version', help='print program version and exit')
2264 parser.add_option('-U', '--update',
2265 action='store_true', dest='update_self', help='update this program to latest stable version')
2266 parser.add_option('-i', '--ignore-errors',
2267 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2268 parser.add_option('-r', '--rate-limit',
2269 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2270 parser.add_option('-R', '--retries',
2271 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2272 parser.add_option('--playlist-start',
2273 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2274 parser.add_option('--playlist-end',
2275 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2276 parser.add_option('--dump-user-agent',
2277 action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
2278
2279 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2280 authentication.add_option('-u', '--username',
2281 dest='username', metavar='USERNAME', help='account username')
2282 authentication.add_option('-p', '--password',
2283 dest='password', metavar='PASSWORD', help='account password')
2284 authentication.add_option('-n', '--netrc',
2285 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2286 parser.add_option_group(authentication)
2287
2288 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2289 video_format.add_option('-f', '--format',
2290 action='store', dest='format', metavar='FORMAT', help='video format code')
2291 video_format.add_option('--all-formats',
2292 action='store_const', dest='format', help='download all available video formats', const='-1')
2293 video_format.add_option('--max-quality',
2294 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2295 parser.add_option_group(video_format)
2296
2297 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2298 verbosity.add_option('-q', '--quiet',
2299 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2300 verbosity.add_option('-s', '--simulate',
2301 action='store_true', dest='simulate', help='do not download video', default=False)
2302 verbosity.add_option('-g', '--get-url',
2303 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2304 verbosity.add_option('-e', '--get-title',
2305 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2306 verbosity.add_option('--get-thumbnail',
2307 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2308 verbosity.add_option('--get-description',
2309 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2310 verbosity.add_option('--no-progress',
2311 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2312 verbosity.add_option('--console-title',
2313 action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
2314 parser.add_option_group(verbosity)
2315
2316 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2317 filesystem.add_option('-t', '--title',
2318 action='store_true', dest='usetitle', help='use title in file name', default=False)
2319 filesystem.add_option('-l', '--literal',
2320 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2321 filesystem.add_option('-A', '--auto-number',
2322 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2323 filesystem.add_option('-o', '--output',
2324 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2325 filesystem.add_option('-a', '--batch-file',
2326 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2327 filesystem.add_option('-w', '--no-overwrites',
2328 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2329 filesystem.add_option('-c', '--continue',
2330 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2331 filesystem.add_option('--cookies',
2332 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2333 filesystem.add_option('--no-part',
2334 action='store_true', dest='nopart', help='do not use .part files', default=False)
2335 parser.add_option_group(filesystem)
2336
2337 (opts, args) = parser.parse_args()
2338
2339 # Open appropriate CookieJar
2340 if opts.cookiefile is None:
2341 jar = cookielib.CookieJar()
2342 else:
2343 try:
2344 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2345 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2346 jar.load()
2347 except (IOError, OSError), err:
2348 sys.exit(u'ERROR: unable to open cookie file')
2349
2350 # Dump user agent
2351 if opts.dump_user_agent:
2352 print std_headers['User-Agent']
2353 sys.exit(0)
2354
2355 # General configuration
2356 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2357 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2358 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2359 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2360
2361 # Batch file verification
2362 batchurls = []
2363 if opts.batchfile is not None:
2364 try:
2365 if opts.batchfile == '-':
2366 batchfd = sys.stdin
2367 else:
2368 batchfd = open(opts.batchfile, 'r')
2369 batchurls = batchfd.readlines()
2370 batchurls = [x.strip() for x in batchurls]
2371 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2372 except IOError:
2373 sys.exit(u'ERROR: batch file could not be read')
2374 all_urls = batchurls + args
2375
2376 # Conflicting, missing and erroneous options
2377 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2378 parser.error(u'using .netrc conflicts with giving username/password')
2379 if opts.password is not None and opts.username is None:
2380 parser.error(u'account username missing')
2381 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2382 parser.error(u'using output template conflicts with using title, literal title or auto number')
2383 if opts.usetitle and opts.useliteral:
2384 parser.error(u'using title conflicts with using literal title')
2385 if opts.username is not None and opts.password is None:
2386 opts.password = getpass.getpass(u'Type account password and press return:')
2387 if opts.ratelimit is not None:
2388 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2389 if numeric_limit is None:
2390 parser.error(u'invalid rate limit specified')
2391 opts.ratelimit = numeric_limit
2392 if opts.retries is not None:
2393 try:
2394 opts.retries = long(opts.retries)
2395 except (TypeError, ValueError), err:
2396 parser.error(u'invalid retry count specified')
2397 try:
2398 opts.playliststart = long(opts.playliststart)
2399 if opts.playliststart <= 0:
2400 raise ValueError
2401 except (TypeError, ValueError), err:
2402 parser.error(u'invalid playlist start number specified')
2403 try:
2404 opts.playlistend = long(opts.playlistend)
2405 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2406 raise ValueError
2407 except (TypeError, ValueError), err:
2408 parser.error(u'invalid playlist end number specified')
2409
2410 # Information extractors
2411 youtube_ie = YoutubeIE()
2412 metacafe_ie = MetacafeIE(youtube_ie)
2413 dailymotion_ie = DailymotionIE()
2414 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2415 youtube_user_ie = YoutubeUserIE(youtube_ie)
2416 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2417 google_ie = GoogleIE()
2418 google_search_ie = GoogleSearchIE(google_ie)
2419 photobucket_ie = PhotobucketIE()
2420 yahoo_ie = YahooIE()
2421 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2422 deposit_files_ie = DepositFilesIE()
2423 generic_ie = GenericIE()
2424
2425 # File downloader
2426 fd = FileDownloader({
2427 'usenetrc': opts.usenetrc,
2428 'username': opts.username,
2429 'password': opts.password,
2430 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2431 'forceurl': opts.geturl,
2432 'forcetitle': opts.gettitle,
2433 'forcethumbnail': opts.getthumbnail,
2434 'forcedescription': opts.getdescription,
2435 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2436 'format': opts.format,
2437 'format_limit': opts.format_limit,
2438 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2439 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2440 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2441 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2442 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2443 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2444 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2445 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2446 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2447 or u'%(id)s.%(ext)s'),
2448 'ignoreerrors': opts.ignoreerrors,
2449 'ratelimit': opts.ratelimit,
2450 'nooverwrites': opts.nooverwrites,
2451 'retries': opts.retries,
2452 'continuedl': opts.continue_dl,
2453 'noprogress': opts.noprogress,
2454 'playliststart': opts.playliststart,
2455 'playlistend': opts.playlistend,
2456 'logtostderr': opts.outtmpl == '-',
2457 'consoletitle': opts.consoletitle,
2458 'nopart': opts.nopart,
2459 })
2460 fd.add_info_extractor(youtube_search_ie)
2461 fd.add_info_extractor(youtube_pl_ie)
2462 fd.add_info_extractor(youtube_user_ie)
2463 fd.add_info_extractor(metacafe_ie)
2464 fd.add_info_extractor(dailymotion_ie)
2465 fd.add_info_extractor(youtube_ie)
2466 fd.add_info_extractor(google_ie)
2467 fd.add_info_extractor(google_search_ie)
2468 fd.add_info_extractor(photobucket_ie)
2469 fd.add_info_extractor(yahoo_ie)
2470 fd.add_info_extractor(yahoo_search_ie)
2471 fd.add_info_extractor(deposit_files_ie)
2472
2473 # This must come last since it's the
2474 # fallback if none of the others work
2475 fd.add_info_extractor(generic_ie)
2476
2477 # Update version
2478 if opts.update_self:
2479 update_self(fd, sys.argv[0])
2480
2481 # Maybe do nothing
2482 if len(all_urls) < 1:
2483 if not opts.update_self:
2484 parser.error(u'you must provide at least one URL')
2485 else:
2486 sys.exit()
2487 retcode = fd.download(all_urls)
2488
2489 # Dump cookie jar if requested
2490 if opts.cookiefile is not None:
2491 try:
2492 jar.save()
2493 except (IOError, OSError), err:
2494 sys.exit(u'ERROR: unable to save cookie jar')
2495
2496 sys.exit(retcode)
2497
2498 except DownloadError:
2499 sys.exit(1)
2500 except SameFileError:
2501 sys.exit(u'ERROR: fixed output name but more than one file to download')
2502 except KeyboardInterrupt:
2503 sys.exit(u'\nERROR: Interrupted by user')