]> jfr.im git - yt-dlp.git/blame - youtube-dl
Fix metacafe.com code not working due to gdaKey again (fixes issue #185)
[yt-dlp.git] / youtube-dl
CommitLineData
4fa74b52
RG
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# Author: Ricardo Garcia Gonzalez
64a6f26c 4# Author: Danny Colligan
49c0028a 5# Author: Benjamin Johnson
4fa74b52
RG
6# License: Public domain code
7import htmlentitydefs
8import httplib
2546e767 9import locale
4fa74b52
RG
10import math
11import netrc
12import os
13import os.path
14import re
15import socket
16import string
0487b407 17import subprocess
4fa74b52
RG
18import sys
19import time
20import urllib
21import urllib2
a04e80a4
RG
22
23# parse_qs was moved from the cgi module to the urlparse module recently.
24try:
25 from urlparse import parse_qs
26except ImportError:
27 from cgi import parse_qs
4fa74b52 28
f995f712 29std_headers = {
e4db6fd0 30 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7',
4fa74b52 31 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
96942e62 32 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
4fa74b52
RG
33 'Accept-Language': 'en-us,en;q=0.5',
34}
35
36simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
37
eae2666c
RG
38def preferredencoding():
39 """Get preferred encoding.
40
41 Returns the best encoding scheme for the system, based on
42 locale.getpreferredencoding() and some further tweaks.
43 """
f94b636c
RG
44 def yield_preferredencoding():
45 try:
46 pref = locale.getpreferredencoding()
47 u'TEST'.encode(pref)
48 except:
49 pref = 'UTF-8'
50 while True:
51 yield pref
52 return yield_preferredencoding().next()
eae2666c 53
490fd7ae
RG
54def htmlentity_transform(matchobj):
55 """Transforms an HTML entity to a Unicode character.
56
57 This function receives a match object and is intended to be used with
58 the re.sub() function.
59 """
60 entity = matchobj.group(1)
61
62 # Known non-numeric HTML entity
63 if entity in htmlentitydefs.name2codepoint:
64 return unichr(htmlentitydefs.name2codepoint[entity])
65
66 # Unicode character
67 mobj = re.match(ur'(?u)#(x?\d+)', entity)
68 if mobj is not None:
69 numstr = mobj.group(1)
70 if numstr.startswith(u'x'):
71 base = 16
72 numstr = u'0%s' % numstr
73 else:
74 base = 10
75 return unichr(long(numstr, base))
76
77 # Unknown entity in name, return its literal representation
78 return (u'&%s;' % entity)
79
80def sanitize_title(utitle):
31bcb480 81 """Sanitizes a video title so it could be used as part of a filename."""
490fd7ae 82 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
490fd7ae
RG
83 return utitle.replace(unicode(os.sep), u'%')
84
31bcb480
RG
85def sanitize_open(filename, open_mode):
86 """Try to open the given filename, and slightly tweak it if this fails.
87
88 Attempts to open the given filename. If this fails, it tries to change
89 the filename slightly, step by step, until it's either able to open it
90 or it fails and raises a final exception, like the standard open()
91 function.
92
93 It returns the tuple (stream, definitive_file_name).
94 """
95 try:
131bc765
RG
96 if filename == u'-':
97 return (sys.stdout, filename)
31bcb480
RG
98 stream = open(filename, open_mode)
99 return (stream, filename)
100 except (IOError, OSError), err:
101 # In case of error, try to remove win32 forbidden chars
ca6a11fa 102 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
31bcb480
RG
103
104 # An exception here should be caught in the caller
105 stream = open(filename, open_mode)
106 return (stream, filename)
107
108
e5bf0f55
RG
109class DownloadError(Exception):
110 """Download Error exception.
111
112 This exception may be thrown by FileDownloader objects if they are not
113 configured to continue on errors. They will contain the appropriate
114 error message.
115 """
116 pass
117
118class SameFileError(Exception):
119 """Same File exception.
120
121 This exception will be thrown by FileDownloader objects if they detect
122 multiple files would have to be downloaded to the same file on disk.
123 """
124 pass
125
65cd34c5
RG
126class PostProcessingError(Exception):
127 """Post Processing exception.
128
129 This exception may be raised by PostProcessor's .run() method to
130 indicate an error in the postprocessing task.
131 """
132 pass
133
73f4e7af 134class UnavailableVideoError(Exception):
7b7759f5 135 """Unavailable Format exception.
136
137 This exception will be thrown when a video is requested
138 in a format that is not available for that video.
139 """
d69a1c91
RG
140 pass
141
142class ContentTooShortError(Exception):
143 """Content Too Short exception.
144
145 This exception may be raised by FileDownloader objects when a file they
146 download is too small for what the server announced first, indicating
147 the connection was probably interrupted.
148 """
149 # Both in bytes
150 downloaded = None
151 expected = None
152
153 def __init__(self, downloaded, expected):
154 self.downloaded = downloaded
155 self.expected = expected
7b7759f5 156
4fa74b52
RG
157class FileDownloader(object):
158 """File Downloader class.
159
160 File downloader objects are the ones responsible of downloading the
161 actual video file and writing it to disk if the user has requested
162 it, among some other tasks. In most cases there should be one per
163 program. As, given a video URL, the downloader doesn't know how to
164 extract all the needed information, task that InfoExtractors do, it
165 has to pass the URL to one of them.
166
167 For this, file downloader objects have a method that allows
168 InfoExtractors to be registered in a given order. When it is passed
169 a URL, the file downloader handles it to the first InfoExtractor it
2851b2ca
RG
170 finds that reports being able to handle it. The InfoExtractor extracts
171 all the information about the video or videos the URL refers to, and
172 asks the FileDownloader to process the video information, possibly
173 downloading the video.
4fa74b52
RG
174
175 File downloaders accept a lot of parameters. In order not to saturate
176 the object constructor with arguments, it receives a dictionary of
d0a9affb
RG
177 options instead. These options are available through the params
178 attribute for the InfoExtractors to use. The FileDownloader also
179 registers itself as the downloader in charge for the InfoExtractors
180 that are added to it, so this is a "mutual registration".
4fa74b52
RG
181
182 Available options:
183
184 username: Username for authentication purposes.
185 password: Password for authentication purposes.
186 usenetrc: Use netrc for authentication instead.
187 quiet: Do not print messages to stdout.
05a84b35
RG
188 forceurl: Force printing final URL.
189 forcetitle: Force printing title.
b609fd54 190 simulate: Do not download the video files.
4fa74b52 191 format: Video format code.
f2413e67 192 format_limit: Highest quality format to try.
4fa74b52 193 outtmpl: Template for output names.
0086d1ec 194 ignoreerrors: Do not stop on download errors.
acd3d842 195 ratelimit: Download speed limit, in bytes/sec.
0beeff4b 196 nooverwrites: Prevent overwriting files.
7031008c 197 retries: Number of times to retry for HTTP error 503
0cd61126 198 continuedl: Try to continue downloads if possible.
d9835247 199 noprogress: Do not print the progress bar.
4fa74b52
RG
200 """
201
d0a9affb 202 params = None
4fa74b52 203 _ies = []
65cd34c5 204 _pps = []
9bf386d7 205 _download_retcode = None
7d8d0612 206 _num_downloads = None
4fa74b52
RG
207
208 def __init__(self, params):
1c5e2302 209 """Create a FileDownloader object with the given options."""
4fa74b52 210 self._ies = []
65cd34c5 211 self._pps = []
9bf386d7 212 self._download_retcode = 0
7d8d0612 213 self._num_downloads = 0
d0a9affb 214 self.params = params
4fa74b52
RG
215
216 @staticmethod
217 def pmkdir(filename):
218 """Create directory components in filename. Similar to Unix "mkdir -p"."""
219 components = filename.split(os.sep)
220 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
3af1e172 221 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
4fa74b52
RG
222 for dir in aggregate:
223 if not os.path.exists(dir):
224 os.mkdir(dir)
225
226 @staticmethod
227 def format_bytes(bytes):
228 if bytes is None:
229 return 'N/A'
8497c36d
RG
230 if type(bytes) is str:
231 bytes = float(bytes)
232 if bytes == 0.0:
4fa74b52
RG
233 exponent = 0
234 else:
8497c36d 235 exponent = long(math.log(bytes, 1024.0))
4fa74b52 236 suffix = 'bkMGTPEZY'[exponent]
4fa74b52
RG
237 converted = float(bytes) / float(1024**exponent)
238 return '%.2f%s' % (converted, suffix)
239
240 @staticmethod
241 def calc_percent(byte_counter, data_len):
242 if data_len is None:
243 return '---.-%'
244 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
245
246 @staticmethod
247 def calc_eta(start, now, total, current):
248 if total is None:
249 return '--:--'
250 dif = now - start
251 if current == 0 or dif < 0.001: # One millisecond
252 return '--:--'
253 rate = float(current) / dif
254 eta = long((float(total) - float(current)) / rate)
255 (eta_mins, eta_secs) = divmod(eta, 60)
256 if eta_mins > 99:
257 return '--:--'
258 return '%02d:%02d' % (eta_mins, eta_secs)
259
5121ef20 260 @staticmethod
4fa74b52
RG
261 def calc_speed(start, now, bytes):
262 dif = now - start
263 if bytes == 0 or dif < 0.001: # One millisecond
9fcd8355 264 return '%10s' % '---b/s'
4fa74b52
RG
265 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
266
267 @staticmethod
268 def best_block_size(elapsed_time, bytes):
269 new_min = max(bytes / 2.0, 1.0)
270 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
271 if elapsed_time < 0.001:
e1f18b8a 272 return long(new_max)
4fa74b52
RG
273 rate = bytes / elapsed_time
274 if rate > new_max:
e1f18b8a 275 return long(new_max)
4fa74b52 276 if rate < new_min:
e1f18b8a
RG
277 return long(new_min)
278 return long(rate)
4fa74b52 279
acd3d842
RG
280 @staticmethod
281 def parse_bytes(bytestr):
282 """Parse a string indicating a byte quantity into a long integer."""
283 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
284 if matchobj is None:
285 return None
286 number = float(matchobj.group(1))
287 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
288 return long(round(number * multiplier))
289
4fa74b52
RG
290 def add_info_extractor(self, ie):
291 """Add an InfoExtractor object to the end of the list."""
292 self._ies.append(ie)
293 ie.set_downloader(self)
294
65cd34c5
RG
295 def add_post_processor(self, pp):
296 """Add a PostProcessor object to the end of the chain."""
297 self._pps.append(pp)
298 pp.set_downloader(self)
299
43ab0ca4 300 def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
9fcd8355 301 """Print message to stdout if not in quiet mode."""
43ab0ca4
RG
302 try:
303 if not self.params.get('quiet', False):
304 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
9fcd8355 305 sys.stdout.flush()
43ab0ca4
RG
306 except (UnicodeEncodeError), err:
307 if not ignore_encoding_errors:
308 raise
7e5cab67
RG
309
310 def to_stderr(self, message):
311 """Print message to stderr."""
eae2666c 312 print >>sys.stderr, message.encode(preferredencoding())
22899cea
RG
313
314 def fixed_template(self):
315 """Checks if the output template is fixed."""
d0a9affb 316 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
9fcd8355 317
0086d1ec
RG
318 def trouble(self, message=None):
319 """Determine action to take when a download problem appears.
320
321 Depending on if the downloader has been configured to ignore
e5bf0f55 322 download errors or not, this method may throw an exception or
9bf386d7 323 not when errors are found, after printing the message.
0086d1ec
RG
324 """
325 if message is not None:
326 self.to_stderr(message)
d0a9affb 327 if not self.params.get('ignoreerrors', False):
e5bf0f55 328 raise DownloadError(message)
9bf386d7 329 self._download_retcode = 1
0086d1ec 330
acd3d842
RG
331 def slow_down(self, start_time, byte_counter):
332 """Sleep if the download speed is over the rate limit."""
d0a9affb 333 rate_limit = self.params.get('ratelimit', None)
acd3d842
RG
334 if rate_limit is None or byte_counter == 0:
335 return
336 now = time.time()
337 elapsed = now - start_time
338 if elapsed <= 0.0:
339 return
340 speed = float(byte_counter) / elapsed
341 if speed > rate_limit:
342 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
343
bafa5cd9
RG
344 def report_destination(self, filename):
345 """Report destination filename."""
43ab0ca4 346 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
bafa5cd9
RG
347
348 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
349 """Report download progress."""
d9835247
RG
350 if self.params.get('noprogress', False):
351 return
76a7f364 352 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
bafa5cd9 353 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
7db85b2c
RG
354
355 def report_resuming_byte(self, resume_len):
356 """Report attemtp to resume at given byte."""
357 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
358
7031008c
RG
359 def report_retry(self, count, retries):
360 """Report retry in case of HTTP error 503"""
361 self.to_stdout(u'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count, retries))
362
7db85b2c
RG
363 def report_file_already_downloaded(self, file_name):
364 """Report file has already been fully downloaded."""
43ab0ca4
RG
365 try:
366 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
367 except (UnicodeEncodeError), err:
368 self.to_stdout(u'[download] The file has already been downloaded')
7db85b2c
RG
369
370 def report_unable_to_resume(self):
371 """Report it was impossible to resume download."""
372 self.to_stdout(u'[download] Unable to resume')
bafa5cd9
RG
373
374 def report_finish(self):
375 """Report download finished."""
d9835247
RG
376 if self.params.get('noprogress', False):
377 self.to_stdout(u'[download] Download completed')
378 else:
379 self.to_stdout(u'')
df372a65
RG
380
381 def increment_downloads(self):
382 """Increment the ordinal that assigns a number to each file."""
383 self._num_downloads += 1
bafa5cd9 384
c8619e01
RG
385 def process_info(self, info_dict):
386 """Process a single dictionary returned by an InfoExtractor."""
c8619e01
RG
387 # Do nothing else if in simulate mode
388 if self.params.get('simulate', False):
cbfff4db
RG
389 # Forced printings
390 if self.params.get('forcetitle', False):
490fd7ae 391 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
cbfff4db 392 if self.params.get('forceurl', False):
490fd7ae 393 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
7e58d568
RG
394 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
395 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
396 if self.params.get('forcedescription', False) and 'description' in info_dict:
397 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
cbfff4db 398
9bf386d7 399 return
cbfff4db 400
c8619e01 401 try:
ad274509
RG
402 template_dict = dict(info_dict)
403 template_dict['epoch'] = unicode(long(time.time()))
7d8d0612 404 template_dict['ord'] = unicode('%05d' % self._num_downloads)
ad274509 405 filename = self.params['outtmpl'] % template_dict
c8619e01 406 except (ValueError, KeyError), err:
9bf386d7 407 self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
850ab765 408 if self.params.get('nooverwrites', False) and os.path.exists(filename):
7eb0e897 409 self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
9bf386d7 410 return
7b7759f5 411
c8619e01
RG
412 try:
413 self.pmkdir(filename)
414 except (OSError, IOError), err:
9bf386d7
RG
415 self.trouble('ERROR: unable to create directories: %s' % str(err))
416 return
7b7759f5 417
c8619e01 418 try:
e616ec0c 419 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
c8619e01 420 except (OSError, IOError), err:
73f4e7af 421 raise UnavailableVideoError
c8619e01 422 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
9bf386d7
RG
423 self.trouble('ERROR: unable to download video data: %s' % str(err))
424 return
d69a1c91
RG
425 except (ContentTooShortError, ), err:
426 self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
427 return
7b7759f5 428
55e7c75e
RG
429 if success:
430 try:
431 self.post_process(filename, info_dict)
432 except (PostProcessingError), err:
433 self.trouble('ERROR: postprocessing: %s' % str(err))
434 return
c8619e01 435
4fa74b52
RG
436 def download(self, url_list):
437 """Download a given list of URLs."""
22899cea 438 if len(url_list) > 1 and self.fixed_template():
d0a9affb 439 raise SameFileError(self.params['outtmpl'])
22899cea 440
4fa74b52
RG
441 for url in url_list:
442 suitable_found = False
443 for ie in self._ies:
c8619e01 444 # Go to next InfoExtractor if not suitable
4fa74b52
RG
445 if not ie.suitable(url):
446 continue
c8619e01 447
4fa74b52
RG
448 # Suitable InfoExtractor found
449 suitable_found = True
c8619e01 450
6f21f686
RG
451 # Extract information from URL and process it
452 ie.extract(url)
65cd34c5 453
c8619e01 454 # Suitable InfoExtractor had been found; go to next URL
4fa74b52 455 break
c8619e01 456
4fa74b52 457 if not suitable_found:
9bf386d7 458 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
bb681b88 459
9bf386d7 460 return self._download_retcode
65cd34c5
RG
461
462 def post_process(self, filename, ie_info):
463 """Run the postprocessing chain on the given file."""
464 info = dict(ie_info)
465 info['filepath'] = filename
466 for pp in self._pps:
467 info = pp.run(info)
468 if info is None:
469 break
4fa74b52 470
e616ec0c 471 def _download_with_rtmpdump(self, filename, url, player_url):
0487b407
RG
472 self.report_destination(filename)
473
474 # Check for rtmpdump first
475 try:
476 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
477 except (OSError, IOError):
478 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
479 return False
480
481 # Download using rtmpdump. rtmpdump returns exit code 2 when
482 # the connection was interrumpted and resuming appears to be
483 # possible. This is part of rtmpdump's normal usage, AFAIK.
e616ec0c 484 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
1c1821f8
RG
485 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
486 while retval == 2 or retval == 1:
e616ec0c
RG
487 prevsize = os.path.getsize(filename)
488 self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
489 time.sleep(5.0) # This seems to be needed
1c1821f8 490 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
e616ec0c
RG
491 cursize = os.path.getsize(filename)
492 if prevsize == cursize and retval == 1:
493 break
0487b407
RG
494 if retval == 0:
495 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
496 return True
497 else:
eaf4a728 498 self.trouble('\nERROR: rtmpdump exited with code %d' % retval)
0487b407
RG
499 return False
500
e616ec0c 501 def _do_download(self, filename, url, player_url):
0487b407
RG
502 # Attempt to download using rtmpdump
503 if url.startswith('rtmp'):
e616ec0c 504 return self._download_with_rtmpdump(filename, url, player_url)
0487b407 505
55e7c75e 506 stream = None
9c457d2a 507 open_mode = 'wb'
7db85b2c 508 basic_request = urllib2.Request(url, None, std_headers)
4fa74b52 509 request = urllib2.Request(url, None, std_headers)
7db85b2c 510
9c457d2a 511 # Establish possible resume length
55e7c75e
RG
512 if os.path.isfile(filename):
513 resume_len = os.path.getsize(filename)
514 else:
515 resume_len = 0
9c457d2a
RG
516
517 # Request parameters in case of being able to resume
850ab765 518 if self.params.get('continuedl', False) and resume_len != 0:
7db85b2c
RG
519 self.report_resuming_byte(resume_len)
520 request.add_header('Range','bytes=%d-' % resume_len)
9c457d2a 521 open_mode = 'ab'
55e7c75e 522
7031008c
RG
523 count = 0
524 retries = self.params.get('retries', 0)
101e0d1e 525 while count <= retries:
7031008c
RG
526 # Establish connection
527 try:
528 data = urllib2.urlopen(request)
529 break
530 except (urllib2.HTTPError, ), err:
101e0d1e
RG
531 if err.code != 503 and err.code != 416:
532 # Unexpected HTTP error
7031008c 533 raise
101e0d1e
RG
534 elif err.code == 416:
535 # Unable to resume (requested range not satisfiable)
536 try:
537 # Open the connection again without the range header
538 data = urllib2.urlopen(basic_request)
539 content_length = data.info()['Content-Length']
540 except (urllib2.HTTPError, ), err:
541 if err.code != 503:
542 raise
543 else:
544 # Examine the reported length
268fb2bd 545 if (content_length is not None and
204c9398 546 (resume_len - 100 < long(content_length) < resume_len + 100)):
268fb2bd
RG
547 # The file had already been fully downloaded.
548 # Explanation to the above condition: in issue #175 it was revealed that
549 # YouTube sometimes adds or removes a few bytes from the end of the file,
550 # changing the file size slightly and causing problems for some users. So
551 # I decided to implement a suggested change and consider the file
552 # completely downloaded if the file size differs less than 100 bytes from
553 # the one in the hard drive.
101e0d1e
RG
554 self.report_file_already_downloaded(filename)
555 return True
556 else:
557 # The length does not match, we start the download over
558 self.report_unable_to_resume()
559 open_mode = 'wb'
560 break
561 # Retry
562 count += 1
563 if count <= retries:
564 self.report_retry(count, retries)
565
566 if count > retries:
567 self.trouble(u'ERROR: giving up after %s retries' % retries)
568 return False
7db85b2c 569
4fa74b52
RG
570 data_len = data.info().get('Content-length', None)
571 data_len_str = self.format_bytes(data_len)
572 byte_counter = 0
573 block_size = 1024
574 start = time.time()
575 while True:
bafa5cd9 576 # Download and write
4fa74b52
RG
577 before = time.time()
578 data_block = data.read(block_size)
579 after = time.time()
580 data_block_len = len(data_block)
581 if data_block_len == 0:
582 break
583 byte_counter += data_block_len
55e7c75e
RG
584
585 # Open file just in time
586 if stream is None:
587 try:
31bcb480 588 (stream, filename) = sanitize_open(filename, open_mode)
55e7c75e
RG
589 self.report_destination(filename)
590 except (OSError, IOError), err:
591 self.trouble('ERROR: unable to open for writing: %s' % str(err))
592 return False
131efd1a
RG
593 try:
594 stream.write(data_block)
595 except (IOError, OSError), err:
0228ee97 596 self.trouble('\nERROR: unable to write data: %s' % str(err))
4fa74b52
RG
597 block_size = self.best_block_size(after - before, data_block_len)
598
55e7c75e
RG
599 # Progress message
600 percent_str = self.calc_percent(byte_counter, data_len)
601 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
602 speed_str = self.calc_speed(start, time.time(), byte_counter)
603 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
604
acd3d842
RG
605 # Apply rate limit
606 self.slow_down(start, byte_counter)
607
bafa5cd9 608 self.report_finish()
4fa74b52 609 if data_len is not None and str(byte_counter) != data_len:
d69a1c91 610 raise ContentTooShortError(byte_counter, long(data_len))
55e7c75e 611 return True
4fa74b52
RG
612
613class InfoExtractor(object):
614 """Information Extractor class.
615
616 Information extractors are the classes that, given a URL, extract
617 information from the video (or videos) the URL refers to. This
618 information includes the real video URL, the video title and simplified
2851b2ca
RG
619 title, author and others. The information is stored in a dictionary
620 which is then passed to the FileDownloader. The FileDownloader
621 processes this information possibly downloading the video to the file
622 system, among other possible outcomes. The dictionaries must include
4fa74b52
RG
623 the following fields:
624
625 id: Video identifier.
626 url: Final video URL.
627 uploader: Nickname of the video uploader.
628 title: Literal title.
629 stitle: Simplified title.
630 ext: Video filename extension.
6ba562b0 631 format: Video format.
e616ec0c 632 player_url: SWF Player URL (may be None).
4fa74b52 633
7e58d568
RG
634 The following fields are optional. Their primary purpose is to allow
635 youtube-dl to serve as the backend for a video search function, such
636 as the one in youtube2mp3. They are only used when their respective
637 forced printing functions are called:
638
639 thumbnail: Full URL to a video thumbnail image.
640 description: One-line video description.
641
4fa74b52
RG
642 Subclasses of this one should re-define the _real_initialize() and
643 _real_extract() methods, as well as the suitable() static method.
644 Probably, they should also be instantiated and added to the main
645 downloader.
646 """
647
648 _ready = False
649 _downloader = None
650
651 def __init__(self, downloader=None):
652 """Constructor. Receives an optional downloader."""
653 self._ready = False
654 self.set_downloader(downloader)
655
656 @staticmethod
657 def suitable(url):
658 """Receives a URL and returns True if suitable for this IE."""
020f7150 659 return False
4fa74b52
RG
660
661 def initialize(self):
1c5e2302 662 """Initializes an instance (authentication, etc)."""
4fa74b52
RG
663 if not self._ready:
664 self._real_initialize()
665 self._ready = True
666
667 def extract(self, url):
668 """Extracts URL information and returns it in list of dicts."""
669 self.initialize()
670 return self._real_extract(url)
671
672 def set_downloader(self, downloader):
673 """Sets the downloader for this IE."""
674 self._downloader = downloader
675
4fa74b52
RG
676 def _real_initialize(self):
677 """Real initialization process. Redefine in subclasses."""
678 pass
679
680 def _real_extract(self, url):
681 """Real extraction process. Redefine in subclasses."""
682 pass
683
684class YoutubeIE(InfoExtractor):
685 """Information extractor for youtube.com."""
686
8da0080d 687 _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
9715661c 688 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
72ac78b8
RG
689 _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
690 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
4fa74b52 691 _NETRC_MACHINE = 'youtube'
497cd3e6
RG
692 # Listed in order of quality
693 _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
7b7759f5 694 _video_extensions = {
695 '13': '3gp',
696 '17': 'mp4',
697 '18': 'mp4',
698 '22': 'mp4',
d9bc015b 699 '37': 'mp4',
9e9647d9 700 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
0b59bf4a
RG
701 '43': 'webm',
702 '45': 'webm',
7b7759f5 703 }
4fa74b52 704
020f7150
RG
705 @staticmethod
706 def suitable(url):
707 return (re.match(YoutubeIE._VALID_URL, url) is not None)
708
72ac78b8
RG
709 def report_lang(self):
710 """Report attempt to set language."""
3aaf887e 711 self._downloader.to_stdout(u'[youtube] Setting language')
72ac78b8 712
bafa5cd9
RG
713 def report_login(self):
714 """Report attempt to log in."""
3aaf887e 715 self._downloader.to_stdout(u'[youtube] Logging in')
bafa5cd9
RG
716
717 def report_age_confirmation(self):
718 """Report attempt to confirm age."""
3aaf887e 719 self._downloader.to_stdout(u'[youtube] Confirming age')
bafa5cd9 720
e616ec0c
RG
721 def report_video_webpage_download(self, video_id):
722 """Report attempt to download video webpage."""
723 self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
724
71b7300e
RG
725 def report_video_info_webpage_download(self, video_id):
726 """Report attempt to download video info webpage."""
727 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
bafa5cd9
RG
728
729 def report_information_extraction(self, video_id):
730 """Report attempt to extract video information."""
3aaf887e 731 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
bafa5cd9 732
7b7759f5 733 def report_unavailable_format(self, video_id, format):
734 """Report extracted video URL."""
735 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
736
0487b407
RG
737 def report_rtmp_download(self):
738 """Indicate the download will use the RTMP protocol."""
739 self._downloader.to_stdout(u'[youtube] RTMP download detected')
740
4fa74b52
RG
741 def _real_initialize(self):
742 if self._downloader is None:
743 return
744
745 username = None
746 password = None
d0a9affb 747 downloader_params = self._downloader.params
4fa74b52
RG
748
749 # Attempt to use provided username and password or .netrc data
750 if downloader_params.get('username', None) is not None:
751 username = downloader_params['username']
752 password = downloader_params['password']
753 elif downloader_params.get('usenetrc', False):
754 try:
755 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
756 if info is not None:
757 username = info[0]
758 password = info[2]
759 else:
760 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
761 except (IOError, netrc.NetrcParseError), err:
6f21f686 762 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
4fa74b52
RG
763 return
764
72ac78b8 765 # Set language
cc109403 766 request = urllib2.Request(self._LANG_URL, None, std_headers)
72ac78b8
RG
767 try:
768 self.report_lang()
769 urllib2.urlopen(request).read()
770 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 771 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
72ac78b8
RG
772 return
773
cc109403
RG
774 # No authentication to be performed
775 if username is None:
776 return
777
4fa74b52 778 # Log in
9fcd8355
RG
779 login_form = {
780 'current_form': 'loginForm',
4fa74b52
RG
781 'next': '/',
782 'action_login': 'Log In',
783 'username': username,
9fcd8355
RG
784 'password': password,
785 }
4fa74b52
RG
786 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
787 try:
bafa5cd9 788 self.report_login()
4fa74b52
RG
789 login_results = urllib2.urlopen(request).read()
790 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
6f21f686 791 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
4fa74b52
RG
792 return
793 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
6f21f686 794 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
4fa74b52
RG
795 return
796
797 # Confirm age
9fcd8355
RG
798 age_form = {
799 'next_url': '/',
800 'action_confirm': 'Confirm',
801 }
4fa74b52
RG
802 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
803 try:
bafa5cd9 804 self.report_age_confirmation()
4fa74b52
RG
805 age_results = urllib2.urlopen(request).read()
806 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 807 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
e5bf0f55 808 return
4fa74b52
RG
809
810 def _real_extract(self, url):
811 # Extract video id from URL
020f7150 812 mobj = re.match(self._VALID_URL, url)
4fa74b52 813 if mobj is None:
147753eb 814 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 815 return
4fa74b52
RG
816 video_id = mobj.group(2)
817
497cd3e6
RG
818 # Get video webpage
819 self.report_video_webpage_download(video_id)
820 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers)
821 try:
822 video_webpage = urllib2.urlopen(request).read()
823 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
824 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
825 return
968aa884 826
497cd3e6
RG
827 # Attempt to extract SWF player URL
828 mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage)
829 if mobj is not None:
830 player_url = mobj.group(1)
831 else:
832 player_url = None
833
834 # Get video info
835 self.report_video_info_webpage_download(video_id)
836 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
837 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
838 % (video_id, el_type))
839 request = urllib2.Request(video_info_url, None, std_headers)
e616ec0c 840 try:
497cd3e6
RG
841 video_info_webpage = urllib2.urlopen(request).read()
842 video_info = parse_qs(video_info_webpage)
843 if 'token' in video_info:
844 break
e616ec0c 845 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
497cd3e6 846 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
e616ec0c 847 return
f95f29fd
RG
848 if 'token' not in video_info:
849 if 'reason' in video_info:
850 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0])
851 else:
852 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
853 return
854
855 # Start extracting information
497cd3e6
RG
856 self.report_information_extraction(video_id)
857
858 # uploader
859 if 'author' not in video_info:
860 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
861 return
862 video_uploader = urllib.unquote_plus(video_info['author'][0])
e616ec0c 863
497cd3e6
RG
864 # title
865 if 'title' not in video_info:
866 self._downloader.trouble(u'ERROR: unable to extract video title')
867 return
868 video_title = urllib.unquote_plus(video_info['title'][0])
869 video_title = video_title.decode('utf-8')
870 video_title = sanitize_title(video_title)
871
872 # simplified title
873 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
874 simple_title = simple_title.strip(ur'_')
875
876 # thumbnail image
877 if 'thumbnail_url' not in video_info:
878 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
879 video_thumbnail = ''
880 else: # don't panic if we can't find it
881 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
882
883 # description
884 video_description = 'No description available.'
885 if self._downloader.params.get('forcedescription', False):
886 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
e616ec0c 887 if mobj is not None:
497cd3e6
RG
888 video_description = mobj.group(1)
889
5ce7d172
RG
890 # token
891 video_token = urllib.unquote_plus(video_info['token'][0])
892
497cd3e6 893 # Decide which formats to download
2e3a32e4 894 requested_format = self._downloader.params.get('format', None)
5ce7d172 895 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
2e3a32e4 896
5ce7d172 897 if 'fmt_url_map' in video_info:
497cd3e6
RG
898 url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
899 format_limit = self._downloader.params.get('format_limit', None)
900 if format_limit is not None and format_limit in self._available_formats:
901 format_list = self._available_formats[self._available_formats.index(format_limit):]
e616ec0c 902 else:
497cd3e6
RG
903 format_list = self._available_formats
904 existing_formats = [x for x in format_list if x in url_map]
905 if len(existing_formats) == 0:
906 self._downloader.trouble(u'ERROR: no known formats available for video')
968aa884 907 return
497cd3e6 908 if requested_format is None:
5ce7d172 909 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
497cd3e6 910 elif requested_format == '-1':
5ce7d172 911 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
497cd3e6 912 else:
5ce7d172 913 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
2e3a32e4 914
497cd3e6
RG
915 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
916 self.report_rtmp_download()
917 video_url_list = [(None, video_info['conn'][0])]
2e3a32e4 918
497cd3e6
RG
919 else:
920 self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
921 return
7b7759f5 922
497cd3e6
RG
923 for format_param, video_real_url in video_url_list:
924 # At this point we have a new video
925 self._downloader.increment_downloads()
926
927 # Extension
928 video_extension = self._video_extensions.get(format_param, 'flv')
7e58d568 929
497cd3e6 930 # Find the video URL in fmt_url_map or conn paramters
968aa884 931 try:
7b7759f5 932 # Process video information
933 self._downloader.process_info({
934 'id': video_id.decode('utf-8'),
935 'url': video_real_url.decode('utf-8'),
936 'uploader': video_uploader.decode('utf-8'),
937 'title': video_title,
938 'stitle': simple_title,
939 'ext': video_extension.decode('utf-8'),
6ba562b0 940 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
7e58d568
RG
941 'thumbnail': video_thumbnail.decode('utf-8'),
942 'description': video_description.decode('utf-8'),
e616ec0c 943 'player_url': player_url,
7b7759f5 944 })
497cd3e6 945 except UnavailableVideoError, err:
5ce7d172 946 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
42bcd27d 947
4fa74b52 948
020f7150
RG
949class MetacafeIE(InfoExtractor):
950 """Information Extractor for metacafe.com."""
951
952 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
2546e767 953 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
dbccb6cd 954 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
020f7150
RG
955 _youtube_ie = None
956
957 def __init__(self, youtube_ie, downloader=None):
958 InfoExtractor.__init__(self, downloader)
959 self._youtube_ie = youtube_ie
960
961 @staticmethod
962 def suitable(url):
963 return (re.match(MetacafeIE._VALID_URL, url) is not None)
964
965 def report_disclaimer(self):
966 """Report disclaimer retrieval."""
3aaf887e 967 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
020f7150
RG
968
969 def report_age_confirmation(self):
970 """Report attempt to confirm age."""
3aaf887e 971 self._downloader.to_stdout(u'[metacafe] Confirming age')
020f7150
RG
972
973 def report_download_webpage(self, video_id):
974 """Report webpage download."""
3aaf887e 975 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
020f7150
RG
976
977 def report_extraction(self, video_id):
978 """Report information extraction."""
3aaf887e 979 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
020f7150
RG
980
981 def _real_initialize(self):
982 # Retrieve disclaimer
983 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
984 try:
985 self.report_disclaimer()
986 disclaimer = urllib2.urlopen(request).read()
987 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 988 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
020f7150
RG
989 return
990
991 # Confirm age
992 disclaimer_form = {
2546e767 993 'filters': '0',
020f7150
RG
994 'submit': "Continue - I'm over 18",
995 }
dbccb6cd 996 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
020f7150
RG
997 try:
998 self.report_age_confirmation()
999 disclaimer = urllib2.urlopen(request).read()
1000 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1001 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
020f7150
RG
1002 return
1003
1004 def _real_extract(self, url):
1005 # Extract id and simplified title from URL
1006 mobj = re.match(self._VALID_URL, url)
1007 if mobj is None:
147753eb 1008 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
6f21f686 1009 return
020f7150
RG
1010
1011 video_id = mobj.group(1)
1012
1013 # Check if video comes from YouTube
1014 mobj2 = re.match(r'^yt-(.*)$', video_id)
1015 if mobj2 is not None:
6f21f686
RG
1016 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1017 return
020f7150 1018
df372a65 1019 # At this point we have a new video
9bf7fa52 1020 self._downloader.increment_downloads()
df372a65 1021
020f7150
RG
1022 simple_title = mobj.group(2).decode('utf-8')
1023 video_extension = 'flv'
1024
1025 # Retrieve video webpage to extract further information
1026 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1027 try:
1028 self.report_download_webpage(video_id)
1029 webpage = urllib2.urlopen(request).read()
1030 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1031 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
6f21f686 1032 return
020f7150
RG
1033
1034 # Extract URL, uploader and title from webpage
1035 self.report_extraction(video_id)
18963a36 1036 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
020f7150 1037 if mobj is None:
147753eb 1038 self._downloader.trouble(u'ERROR: unable to extract media URL')
6f21f686 1039 return
dbccb6cd 1040 mediaURL = urllib.unquote(mobj.group(1))
020f7150 1041
109626fc
RG
1042 # Extract gdaKey if available
1043 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1044 if mobj is None:
1045 video_url = mediaURL
1046 #self._downloader.trouble(u'ERROR: unable to extract gdaKey')
1047 #return
1048 else:
1049 gdaKey = mobj.group(1)
1050 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
020f7150 1051
2546e767 1052 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
020f7150 1053 if mobj is None:
147753eb 1054 self._downloader.trouble(u'ERROR: unable to extract title')
6f21f686 1055 return
020f7150 1056 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1057 video_title = sanitize_title(video_title)
020f7150 1058
29f07568 1059 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
020f7150 1060 if mobj is None:
147753eb 1061 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
6f21f686 1062 return
dbccb6cd 1063 video_uploader = mobj.group(1)
020f7150 1064
42bcd27d 1065 try:
1066 # Process video information
1067 self._downloader.process_info({
1068 'id': video_id.decode('utf-8'),
1069 'url': video_url.decode('utf-8'),
1070 'uploader': video_uploader.decode('utf-8'),
1071 'title': video_title,
1072 'stitle': simple_title,
1073 'ext': video_extension.decode('utf-8'),
6ba562b0 1074 'format': u'NA',
e616ec0c 1075 'player_url': None,
42bcd27d 1076 })
73f4e7af
RG
1077 except UnavailableVideoError:
1078 self._downloader.trouble(u'ERROR: unable to download video')
020f7150 1079
25af2bce 1080
4135fa45
WB
1081class DailymotionIE(InfoExtractor):
1082 """Information Extractor for Dailymotion"""
1083
1084 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
4135fa45
WB
1085
1086 def __init__(self, downloader=None):
1087 InfoExtractor.__init__(self, downloader)
1088
1089 @staticmethod
1090 def suitable(url):
1091 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1092
4135fa45
WB
1093 def report_download_webpage(self, video_id):
1094 """Report webpage download."""
1095 self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id)
1096
1097 def report_extraction(self, video_id):
1098 """Report information extraction."""
1099 self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id)
1100
1101 def _real_initialize(self):
1102 return
1103
4135fa45
WB
1104 def _real_extract(self, url):
1105 # Extract id and simplified title from URL
1106 mobj = re.match(self._VALID_URL, url)
1107 if mobj is None:
1108 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1109 return
1110
df372a65 1111 # At this point we have a new video
9bf7fa52 1112 self._downloader.increment_downloads()
4135fa45
WB
1113 video_id = mobj.group(1)
1114
1115 simple_title = mobj.group(2).decode('utf-8')
1116 video_extension = 'flv'
1117
1118 # Retrieve video webpage to extract further information
1119 request = urllib2.Request(url)
1120 try:
1121 self.report_download_webpage(video_id)
1122 webpage = urllib2.urlopen(request).read()
1123 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1124 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1125 return
1126
1127 # Extract URL, uploader and title from webpage
1128 self.report_extraction(video_id)
1129 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1130 if mobj is None:
1131 self._downloader.trouble(u'ERROR: unable to extract media URL')
1132 return
1133 mediaURL = urllib.unquote(mobj.group(1))
1134
1135 # if needed add http://www.dailymotion.com/ if relative URL
1136
1137 video_url = mediaURL
1138
1139 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1140 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1141 if mobj is None:
1142 self._downloader.trouble(u'ERROR: unable to extract title')
1143 return
1144 video_title = mobj.group(1).decode('utf-8')
1145 video_title = sanitize_title(video_title)
1146
7e2dd306 1147 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a></div>', webpage)
4135fa45
WB
1148 if mobj is None:
1149 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1150 return
1151 video_uploader = mobj.group(1)
1152
1153 try:
1154 # Process video information
1155 self._downloader.process_info({
1156 'id': video_id.decode('utf-8'),
1157 'url': video_url.decode('utf-8'),
1158 'uploader': video_uploader.decode('utf-8'),
1159 'title': video_title,
1160 'stitle': simple_title,
1161 'ext': video_extension.decode('utf-8'),
1162 'format': u'NA',
1163 'player_url': None,
1164 })
73f4e7af
RG
1165 except UnavailableVideoError:
1166 self._downloader.trouble(u'ERROR: unable to download video')
4135fa45 1167
49c0028a 1168class GoogleIE(InfoExtractor):
1169 """Information extractor for video.google.com."""
1170
490fd7ae 1171 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
49c0028a 1172
1173 def __init__(self, downloader=None):
1174 InfoExtractor.__init__(self, downloader)
1175
1176 @staticmethod
1177 def suitable(url):
1178 return (re.match(GoogleIE._VALID_URL, url) is not None)
1179
1180 def report_download_webpage(self, video_id):
1181 """Report webpage download."""
1182 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1183
1184 def report_extraction(self, video_id):
1185 """Report information extraction."""
1186 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1187
1188 def _real_initialize(self):
1189 return
1190
1191 def _real_extract(self, url):
1192 # Extract id from URL
1193 mobj = re.match(self._VALID_URL, url)
1194 if mobj is None:
1195 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1196 return
1197
df372a65 1198 # At this point we have a new video
9bf7fa52 1199 self._downloader.increment_downloads()
49c0028a 1200 video_id = mobj.group(1)
1201
1202 video_extension = 'mp4'
1203
1204 # Retrieve video webpage to extract further information
490fd7ae 1205 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
49c0028a 1206 try:
1207 self.report_download_webpage(video_id)
1208 webpage = urllib2.urlopen(request).read()
1209 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1210 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1211 return
1212
1213 # Extract URL, uploader, and title from webpage
1214 self.report_extraction(video_id)
490fd7ae
RG
1215 mobj = re.search(r"download_url:'([^']+)'", webpage)
1216 if mobj is None:
1217 video_extension = 'flv'
1218 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
49c0028a 1219 if mobj is None:
1220 self._downloader.trouble(u'ERROR: unable to extract media URL')
1221 return
1222 mediaURL = urllib.unquote(mobj.group(1))
1223 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1224 mediaURL = mediaURL.replace('\\x26', '\x26')
1225
1226 video_url = mediaURL
1227
1228 mobj = re.search(r'<title>(.*)</title>', webpage)
1229 if mobj is None:
1230 self._downloader.trouble(u'ERROR: unable to extract title')
1231 return
1232 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1233 video_title = sanitize_title(video_title)
31cbdaaf 1234 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1235
7e58d568
RG
1236 # Extract video description
1237 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1238 if mobj is None:
1239 self._downloader.trouble(u'ERROR: unable to extract video description')
1240 return
1241 video_description = mobj.group(1).decode('utf-8')
1242 if not video_description:
1243 video_description = 'No description available.'
1244
1245 # Extract video thumbnail
1246 if self._downloader.params.get('forcethumbnail', False):
1247 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1248 try:
1249 webpage = urllib2.urlopen(request).read()
1250 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1251 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1252 return
1253 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1254 if mobj is None:
1255 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1256 return
1257 video_thumbnail = mobj.group(1)
1258 else: # we need something to pass to process_info
1259 video_thumbnail = ''
1260
1261
49c0028a 1262 try:
1263 # Process video information
1264 self._downloader.process_info({
1265 'id': video_id.decode('utf-8'),
1266 'url': video_url.decode('utf-8'),
6ba562b0 1267 'uploader': u'NA',
490fd7ae 1268 'title': video_title,
31cbdaaf 1269 'stitle': simple_title,
49c0028a 1270 'ext': video_extension.decode('utf-8'),
6ba562b0 1271 'format': u'NA',
e616ec0c 1272 'player_url': None,
49c0028a 1273 })
73f4e7af
RG
1274 except UnavailableVideoError:
1275 self._downloader.trouble(u'ERROR: unable to download video')
49c0028a 1276
1277
1278class PhotobucketIE(InfoExtractor):
1279 """Information extractor for photobucket.com."""
1280
1281 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1282
1283 def __init__(self, downloader=None):
1284 InfoExtractor.__init__(self, downloader)
1285
1286 @staticmethod
1287 def suitable(url):
1288 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1289
1290 def report_download_webpage(self, video_id):
1291 """Report webpage download."""
1292 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1293
1294 def report_extraction(self, video_id):
1295 """Report information extraction."""
1296 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1297
1298 def _real_initialize(self):
1299 return
1300
1301 def _real_extract(self, url):
1302 # Extract id from URL
1303 mobj = re.match(self._VALID_URL, url)
1304 if mobj is None:
1305 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1306 return
1307
df372a65 1308 # At this point we have a new video
9bf7fa52 1309 self._downloader.increment_downloads()
49c0028a 1310 video_id = mobj.group(1)
1311
1312 video_extension = 'flv'
1313
1314 # Retrieve video webpage to extract further information
1315 request = urllib2.Request(url)
1316 try:
1317 self.report_download_webpage(video_id)
1318 webpage = urllib2.urlopen(request).read()
1319 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1320 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1321 return
1322
1323 # Extract URL, uploader, and title from webpage
1324 self.report_extraction(video_id)
1325 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1326 if mobj is None:
1327 self._downloader.trouble(u'ERROR: unable to extract media URL')
1328 return
1329 mediaURL = urllib.unquote(mobj.group(1))
1330
1331 video_url = mediaURL
1332
1333 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1334 if mobj is None:
1335 self._downloader.trouble(u'ERROR: unable to extract title')
1336 return
1337 video_title = mobj.group(1).decode('utf-8')
490fd7ae 1338 video_title = sanitize_title(video_title)
31cbdaaf 1339 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
49c0028a 1340
1341 video_uploader = mobj.group(2).decode('utf-8')
1342
1343 try:
1344 # Process video information
1345 self._downloader.process_info({
1346 'id': video_id.decode('utf-8'),
1347 'url': video_url.decode('utf-8'),
490fd7ae
RG
1348 'uploader': video_uploader,
1349 'title': video_title,
31cbdaaf 1350 'stitle': simple_title,
490fd7ae 1351 'ext': video_extension.decode('utf-8'),
6ba562b0 1352 'format': u'NA',
e616ec0c 1353 'player_url': None,
490fd7ae 1354 })
73f4e7af
RG
1355 except UnavailableVideoError:
1356 self._downloader.trouble(u'ERROR: unable to download video')
490fd7ae
RG
1357
1358
61945318
RG
1359class YahooIE(InfoExtractor):
1360 """Information extractor for video.yahoo.com."""
1361
1362 # _VALID_URL matches all Yahoo! Video URLs
1363 # _VPAGE_URL matches only the extractable '/watch/' URLs
1364 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1365 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1366
1367 def __init__(self, downloader=None):
1368 InfoExtractor.__init__(self, downloader)
1369
1370 @staticmethod
1371 def suitable(url):
1372 return (re.match(YahooIE._VALID_URL, url) is not None)
1373
1374 def report_download_webpage(self, video_id):
1375 """Report webpage download."""
1376 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1377
1378 def report_extraction(self, video_id):
1379 """Report information extraction."""
1380 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1381
1382 def _real_initialize(self):
1383 return
1384
df372a65 1385 def _real_extract(self, url, new_video=True):
61945318
RG
1386 # Extract ID from URL
1387 mobj = re.match(self._VALID_URL, url)
1388 if mobj is None:
1389 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1390 return
1391
df372a65 1392 # At this point we have a new video
9bf7fa52 1393 self._downloader.increment_downloads()
61945318
RG
1394 video_id = mobj.group(2)
1395 video_extension = 'flv'
1396
1397 # Rewrite valid but non-extractable URLs as
1398 # extractable English language /watch/ URLs
1399 if re.match(self._VPAGE_URL, url) is None:
1400 request = urllib2.Request(url)
1401 try:
1402 webpage = urllib2.urlopen(request).read()
1403 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1404 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1405 return
1406
1407 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1408 if mobj is None:
1409 self._downloader.trouble(u'ERROR: Unable to extract id field')
1410 return
1411 yahoo_id = mobj.group(1)
1412
1413 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1414 if mobj is None:
1415 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1416 return
1417 yahoo_vid = mobj.group(1)
1418
1419 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
df372a65 1420 return self._real_extract(url, new_video=False)
61945318
RG
1421
1422 # Retrieve video webpage to extract further information
1423 request = urllib2.Request(url)
1424 try:
1425 self.report_download_webpage(video_id)
1426 webpage = urllib2.urlopen(request).read()
1427 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1428 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1429 return
1430
1431 # Extract uploader and title from webpage
1432 self.report_extraction(video_id)
1433 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1434 if mobj is None:
1435 self._downloader.trouble(u'ERROR: unable to extract video title')
1436 return
1437 video_title = mobj.group(1).decode('utf-8')
1438 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1439
1440 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1441 if mobj is None:
1442 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1443 return
1444 video_uploader = mobj.group(1).decode('utf-8')
1445
7e58d568
RG
1446 # Extract video thumbnail
1447 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1448 if mobj is None:
1449 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1450 return
1451 video_thumbnail = mobj.group(1).decode('utf-8')
1452
1453 # Extract video description
1454 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1455 if mobj is None:
1456 self._downloader.trouble(u'ERROR: unable to extract video description')
1457 return
1458 video_description = mobj.group(1).decode('utf-8')
1459 if not video_description: video_description = 'No description available.'
1460
61945318
RG
1461 # Extract video height and width
1462 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1463 if mobj is None:
1464 self._downloader.trouble(u'ERROR: unable to extract video height')
1465 return
1466 yv_video_height = mobj.group(1)
1467
1468 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1469 if mobj is None:
1470 self._downloader.trouble(u'ERROR: unable to extract video width')
1471 return
1472 yv_video_width = mobj.group(1)
1473
1474 # Retrieve video playlist to extract media URL
1475 # I'm not completely sure what all these options are, but we
1476 # seem to need most of them, otherwise the server sends a 401.
1477 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1478 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1479 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1480 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1481 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1482 try:
1483 self.report_download_webpage(video_id)
1484 webpage = urllib2.urlopen(request).read()
1485 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1486 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1487 return
1488
1489 # Extract media URL from playlist XML
1490 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1491 if mobj is None:
1492 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1493 return
1494 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1495 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1496
1497 try:
1498 # Process video information
1499 self._downloader.process_info({
1500 'id': video_id.decode('utf-8'),
1501 'url': video_url,
1502 'uploader': video_uploader,
1503 'title': video_title,
1504 'stitle': simple_title,
1505 'ext': video_extension.decode('utf-8'),
7e58d568
RG
1506 'thumbnail': video_thumbnail.decode('utf-8'),
1507 'description': video_description,
1508 'thumbnail': video_thumbnail,
1509 'description': video_description,
e616ec0c 1510 'player_url': None,
61945318 1511 })
73f4e7af
RG
1512 except UnavailableVideoError:
1513 self._downloader.trouble(u'ERROR: unable to download video')
61945318
RG
1514
1515
490fd7ae
RG
1516class GenericIE(InfoExtractor):
1517 """Generic last-resort information extractor."""
1518
1519 def __init__(self, downloader=None):
1520 InfoExtractor.__init__(self, downloader)
1521
1522 @staticmethod
1523 def suitable(url):
1524 return True
1525
1526 def report_download_webpage(self, video_id):
1527 """Report webpage download."""
1528 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1529 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1530
1531 def report_extraction(self, video_id):
1532 """Report information extraction."""
1533 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1534
1535 def _real_initialize(self):
1536 return
1537
1538 def _real_extract(self, url):
df372a65 1539 # At this point we have a new video
9bf7fa52 1540 self._downloader.increment_downloads()
df372a65 1541
490fd7ae
RG
1542 video_id = url.split('/')[-1]
1543 request = urllib2.Request(url)
1544 try:
1545 self.report_download_webpage(video_id)
1546 webpage = urllib2.urlopen(request).read()
1547 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1548 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1549 return
1550 except ValueError, err:
1551 # since this is the last-resort InfoExtractor, if
1552 # this error is thrown, it'll be thrown here
1553 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1554 return
1555
1556 # Start with something easy: JW Player in SWFObject
1557 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1558 if mobj is None:
1559 # Broaden the search a little bit
1560 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1561 if mobj is None:
1562 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1563 return
1564
1565 # It's possible that one of the regexes
1566 # matched, but returned an empty group:
1567 if mobj.group(1) is None:
1568 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1569 return
1570
1571 video_url = urllib.unquote(mobj.group(1))
1572 video_id = os.path.basename(video_url)
1573
1574 # here's a fun little line of code for you:
1575 video_extension = os.path.splitext(video_id)[1][1:]
1576 video_id = os.path.splitext(video_id)[0]
1577
1578 # it's tempting to parse this further, but you would
1579 # have to take into account all the variations like
1580 # Video Title - Site Name
1581 # Site Name | Video Title
1582 # Video Title - Tagline | Site Name
1583 # and so on and so forth; it's just not practical
1584 mobj = re.search(r'<title>(.*)</title>', webpage)
1585 if mobj is None:
1586 self._downloader.trouble(u'ERROR: unable to extract title')
1587 return
1588 video_title = mobj.group(1).decode('utf-8')
1589 video_title = sanitize_title(video_title)
31cbdaaf 1590 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
490fd7ae
RG
1591
1592 # video uploader is domain name
1593 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1594 if mobj is None:
1595 self._downloader.trouble(u'ERROR: unable to extract title')
1596 return
1597 video_uploader = mobj.group(1).decode('utf-8')
1598
1599 try:
1600 # Process video information
1601 self._downloader.process_info({
1602 'id': video_id.decode('utf-8'),
1603 'url': video_url.decode('utf-8'),
1604 'uploader': video_uploader,
1605 'title': video_title,
31cbdaaf 1606 'stitle': simple_title,
49c0028a 1607 'ext': video_extension.decode('utf-8'),
6ba562b0 1608 'format': u'NA',
e616ec0c 1609 'player_url': None,
49c0028a 1610 })
73f4e7af
RG
1611 except UnavailableVideoError, err:
1612 self._downloader.trouble(u'ERROR: unable to download video')
49c0028a 1613
1614
25af2bce
RG
1615class YoutubeSearchIE(InfoExtractor):
1616 """Information Extractor for YouTube search queries."""
1617 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1618 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1619 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
304a4d85 1620 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
25af2bce 1621 _youtube_ie = None
fd9288c3 1622 _max_youtube_results = 1000
25af2bce 1623
f995f712 1624 def __init__(self, youtube_ie, downloader=None):
25af2bce
RG
1625 InfoExtractor.__init__(self, downloader)
1626 self._youtube_ie = youtube_ie
1627
1628 @staticmethod
1629 def suitable(url):
1630 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1631
1632 def report_download_page(self, query, pagenum):
1633 """Report attempt to download playlist page with given number."""
490fd7ae 1634 query = query.decode(preferredencoding())
3aaf887e 1635 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
25af2bce
RG
1636
1637 def _real_initialize(self):
1638 self._youtube_ie.initialize()
1639
1640 def _real_extract(self, query):
1641 mobj = re.match(self._VALID_QUERY, query)
1642 if mobj is None:
147753eb 1643 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
6f21f686 1644 return
25af2bce
RG
1645
1646 prefix, query = query.split(':')
1647 prefix = prefix[8:]
490fd7ae 1648 query = query.encode('utf-8')
f995f712 1649 if prefix == '':
6f21f686
RG
1650 self._download_n_results(query, 1)
1651 return
f995f712 1652 elif prefix == 'all':
6f21f686
RG
1653 self._download_n_results(query, self._max_youtube_results)
1654 return
f995f712 1655 else:
25af2bce 1656 try:
e1f18b8a 1657 n = long(prefix)
25af2bce 1658 if n <= 0:
147753eb 1659 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
6f21f686 1660 return
257453b9 1661 elif n > self._max_youtube_results:
6f21f686 1662 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
257453b9 1663 n = self._max_youtube_results
6f21f686
RG
1664 self._download_n_results(query, n)
1665 return
e1f18b8a 1666 except ValueError: # parsing prefix as integer fails
6f21f686
RG
1667 self._download_n_results(query, 1)
1668 return
25af2bce
RG
1669
1670 def _download_n_results(self, query, n):
1671 """Downloads a specified number of results for a query"""
1672
1673 video_ids = []
1674 already_seen = set()
1675 pagenum = 1
1676
1677 while True:
1678 self.report_download_page(query, pagenum)
a9633f14 1679 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
25af2bce
RG
1680 request = urllib2.Request(result_url, None, std_headers)
1681 try:
1682 page = urllib2.urlopen(request).read()
1683 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1684 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 1685 return
25af2bce
RG
1686
1687 # Extract video identifiers
1688 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1689 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1690 if video_id not in already_seen:
1691 video_ids.append(video_id)
1692 already_seen.add(video_id)
1693 if len(video_ids) == n:
1694 # Specified n videos reached
25af2bce 1695 for id in video_ids:
6f21f686
RG
1696 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1697 return
25af2bce 1698
304a4d85 1699 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
25af2bce 1700 for id in video_ids:
6f21f686
RG
1701 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1702 return
25af2bce
RG
1703
1704 pagenum = pagenum + 1
1705
7e58d568
RG
1706class GoogleSearchIE(InfoExtractor):
1707 """Information Extractor for Google Video search queries."""
1708 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1709 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1710 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1711 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1712 _google_ie = None
1713 _max_google_results = 1000
1714
1715 def __init__(self, google_ie, downloader=None):
1716 InfoExtractor.__init__(self, downloader)
1717 self._google_ie = google_ie
1718
1719 @staticmethod
1720 def suitable(url):
1721 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1722
1723 def report_download_page(self, query, pagenum):
1724 """Report attempt to download playlist page with given number."""
1725 query = query.decode(preferredencoding())
1726 self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1727
1728 def _real_initialize(self):
1729 self._google_ie.initialize()
1730
1731 def _real_extract(self, query):
1732 mobj = re.match(self._VALID_QUERY, query)
1733 if mobj is None:
1734 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1735 return
1736
1737 prefix, query = query.split(':')
1738 prefix = prefix[8:]
1739 query = query.encode('utf-8')
1740 if prefix == '':
1741 self._download_n_results(query, 1)
1742 return
1743 elif prefix == 'all':
1744 self._download_n_results(query, self._max_google_results)
1745 return
1746 else:
1747 try:
1748 n = long(prefix)
1749 if n <= 0:
1750 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1751 return
1752 elif n > self._max_google_results:
1753 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
1754 n = self._max_google_results
1755 self._download_n_results(query, n)
1756 return
1757 except ValueError: # parsing prefix as integer fails
1758 self._download_n_results(query, 1)
1759 return
1760
1761 def _download_n_results(self, query, n):
1762 """Downloads a specified number of results for a query"""
1763
1764 video_ids = []
1765 already_seen = set()
1766 pagenum = 1
1767
1768 while True:
1769 self.report_download_page(query, pagenum)
1770 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1771 request = urllib2.Request(result_url, None, std_headers)
1772 try:
1773 page = urllib2.urlopen(request).read()
1774 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1775 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1776 return
1777
1778 # Extract video identifiers
1779 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1780 video_id = mobj.group(1)
1781 if video_id not in already_seen:
1782 video_ids.append(video_id)
1783 already_seen.add(video_id)
1784 if len(video_ids) == n:
1785 # Specified n videos reached
1786 for id in video_ids:
1787 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1788 return
1789
1790 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1791 for id in video_ids:
1792 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1793 return
1794
1795 pagenum = pagenum + 1
1796
1797class YahooSearchIE(InfoExtractor):
1798 """Information Extractor for Yahoo! Video search queries."""
1799 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1800 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1801 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1802 _MORE_PAGES_INDICATOR = r'\s*Next'
1803 _yahoo_ie = None
1804 _max_yahoo_results = 1000
1805
1806 def __init__(self, yahoo_ie, downloader=None):
1807 InfoExtractor.__init__(self, downloader)
1808 self._yahoo_ie = yahoo_ie
1809
1810 @staticmethod
1811 def suitable(url):
1812 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1813
1814 def report_download_page(self, query, pagenum):
1815 """Report attempt to download playlist page with given number."""
1816 query = query.decode(preferredencoding())
1817 self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1818
1819 def _real_initialize(self):
1820 self._yahoo_ie.initialize()
1821
1822 def _real_extract(self, query):
1823 mobj = re.match(self._VALID_QUERY, query)
1824 if mobj is None:
1825 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1826 return
1827
1828 prefix, query = query.split(':')
1829 prefix = prefix[8:]
1830 query = query.encode('utf-8')
1831 if prefix == '':
1832 self._download_n_results(query, 1)
1833 return
1834 elif prefix == 'all':
1835 self._download_n_results(query, self._max_yahoo_results)
1836 return
1837 else:
1838 try:
1839 n = long(prefix)
1840 if n <= 0:
1841 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1842 return
1843 elif n > self._max_yahoo_results:
1844 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
1845 n = self._max_yahoo_results
1846 self._download_n_results(query, n)
1847 return
1848 except ValueError: # parsing prefix as integer fails
1849 self._download_n_results(query, 1)
1850 return
1851
1852 def _download_n_results(self, query, n):
1853 """Downloads a specified number of results for a query"""
1854
1855 video_ids = []
1856 already_seen = set()
1857 pagenum = 1
1858
1859 while True:
1860 self.report_download_page(query, pagenum)
1861 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1862 request = urllib2.Request(result_url, None, std_headers)
1863 try:
1864 page = urllib2.urlopen(request).read()
1865 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1866 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1867 return
1868
1869 # Extract video identifiers
1870 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1871 video_id = mobj.group(1)
1872 if video_id not in already_seen:
1873 video_ids.append(video_id)
1874 already_seen.add(video_id)
1875 if len(video_ids) == n:
1876 # Specified n videos reached
1877 for id in video_ids:
1878 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1879 return
1880
1881 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1882 for id in video_ids:
1883 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1884 return
1885
1886 pagenum = pagenum + 1
1887
0c2dc87d
RG
1888class YoutubePlaylistIE(InfoExtractor):
1889 """Information Extractor for YouTube playlists."""
1890
9177ce4d 1891 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
72ac78b8 1892 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
0c2dc87d 1893 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
ce5cafea 1894 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
0c2dc87d
RG
1895 _youtube_ie = None
1896
1897 def __init__(self, youtube_ie, downloader=None):
1898 InfoExtractor.__init__(self, downloader)
1899 self._youtube_ie = youtube_ie
1900
1901 @staticmethod
1902 def suitable(url):
1903 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1904
1905 def report_download_page(self, playlist_id, pagenum):
1906 """Report attempt to download playlist page with given number."""
3aaf887e 1907 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
0c2dc87d
RG
1908
1909 def _real_initialize(self):
1910 self._youtube_ie.initialize()
1911
1912 def _real_extract(self, url):
1913 # Extract playlist id
1914 mobj = re.match(self._VALID_URL, url)
1915 if mobj is None:
147753eb 1916 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
6f21f686 1917 return
0c2dc87d
RG
1918
1919 # Download playlist pages
1920 playlist_id = mobj.group(1)
1921 video_ids = []
1922 pagenum = 1
1923
1924 while True:
1925 self.report_download_page(playlist_id, pagenum)
1926 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1927 try:
1928 page = urllib2.urlopen(request).read()
1929 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
147753eb 1930 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
6f21f686 1931 return
0c2dc87d
RG
1932
1933 # Extract video identifiers
27d98b6e 1934 ids_in_page = []
0c2dc87d 1935 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
27d98b6e
RG
1936 if mobj.group(1) not in ids_in_page:
1937 ids_in_page.append(mobj.group(1))
1938 video_ids.extend(ids_in_page)
0c2dc87d 1939
ce5cafea 1940 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
0c2dc87d
RG
1941 break
1942 pagenum = pagenum + 1
1943
204c9398
RG
1944 playliststart = self._downloader.params.get('playliststart', 1)
1945 playliststart -= 1 #our arrays are zero-based but the playlist is 1-based
1946 if playliststart > 0:
1947 video_ids = video_ids[playliststart:]
1948
0c2dc87d 1949 for id in video_ids:
6f21f686
RG
1950 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1951 return
0c2dc87d 1952
c39c05cd
A
1953class YoutubeUserIE(InfoExtractor):
1954 """Information Extractor for YouTube users."""
1955
1956 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1957 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
75a4cf3c 1958 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
c39c05cd
A
1959 _youtube_ie = None
1960
1961 def __init__(self, youtube_ie, downloader=None):
1962 InfoExtractor.__init__(self, downloader)
1963 self._youtube_ie = youtube_ie
1964
1965 @staticmethod
1966 def suitable(url):
1967 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1968
1969 def report_download_page(self, username):
1970 """Report attempt to download user page."""
75a4cf3c 1971 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
c39c05cd
A
1972
1973 def _real_initialize(self):
1974 self._youtube_ie.initialize()
1975
1976 def _real_extract(self, url):
1977 # Extract username
1978 mobj = re.match(self._VALID_URL, url)
1979 if mobj is None:
1980 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1981 return
1982
1983 # Download user page
1984 username = mobj.group(1)
1985 video_ids = []
1986 pagenum = 1
1987
1988 self.report_download_page(username)
1989 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1990 try:
1991 page = urllib2.urlopen(request).read()
1992 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1993 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1994 return
1995
1996 # Extract video identifiers
1997 ids_in_page = []
1998
1999 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
c39c05cd
A
2000 if mobj.group(1) not in ids_in_page:
2001 ids_in_page.append(mobj.group(1))
2002 video_ids.extend(ids_in_page)
2003
204c9398
RG
2004 playliststart = self._downloader.params.get('playliststart', 1)
2005 playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based
2006 if playliststart > 0:
2007 video_ids = video_ids[playliststart:]
2008
c39c05cd
A
2009 for id in video_ids:
2010 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2011 return
2012
65cd34c5
RG
2013class PostProcessor(object):
2014 """Post Processor class.
2015
2016 PostProcessor objects can be added to downloaders with their
2017 add_post_processor() method. When the downloader has finished a
2018 successful download, it will take its internal chain of PostProcessors
2019 and start calling the run() method on each one of them, first with
2020 an initial argument and then with the returned value of the previous
2021 PostProcessor.
2022
2023 The chain will be stopped if one of them ever returns None or the end
2024 of the chain is reached.
2025
2026 PostProcessor objects follow a "mutual registration" process similar
2027 to InfoExtractor objects.
2028 """
2029
2030 _downloader = None
2031
2032 def __init__(self, downloader=None):
2033 self._downloader = downloader
2034
65cd34c5
RG
2035 def set_downloader(self, downloader):
2036 """Sets the downloader for this PP."""
2037 self._downloader = downloader
2038
2039 def run(self, information):
2040 """Run the PostProcessor.
2041
2042 The "information" argument is a dictionary like the ones
2f11508a 2043 composed by InfoExtractors. The only difference is that this
65cd34c5
RG
2044 one has an extra field called "filepath" that points to the
2045 downloaded file.
2046
2047 When this method returns None, the postprocessing chain is
2048 stopped. However, this method may return an information
2049 dictionary that will be passed to the next postprocessing
2050 object in the chain. It can be the one it received after
2051 changing some fields.
2052
2053 In addition, this method may raise a PostProcessingError
2054 exception that will be taken into account by the downloader
2055 it was called from.
2056 """
2057 return information # by default, do nothing
2058
2059### MAIN PROGRAM ###
4fa74b52
RG
2060if __name__ == '__main__':
2061 try:
f9f1e798 2062 # Modules needed only when running the main program
209e9e27 2063 import getpass
f9f1e798
RG
2064 import optparse
2065
4bec29ef
RG
2066 # Function to update the program file with the latest version from bitbucket.org
2067 def update_self(downloader, filename):
2068 # Note: downloader only used for options
2069 if not os.access (filename, os.W_OK):
2070 sys.exit('ERROR: no write permissions on %s' % filename)
2071
2072 downloader.to_stdout('Updating to latest stable version...')
2073 latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
2074 latest_version = urllib.urlopen(latest_url).read().strip()
2075 prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2076 newcontent = urllib.urlopen(prog_url).read()
2077 stream = open(filename, 'w')
2078 stream.write(newcontent)
2079 stream.close()
2080 downloader.to_stdout('Updated to version %s' % latest_version)
2081
4fa74b52
RG
2082 # General configuration
2083 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2084 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
f9f1e798
RG
2085 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2086
2087 # Parse command line
209e9e27 2088 parser = optparse.OptionParser(
7b7759f5 2089 usage='Usage: %prog [options] url...',
06f34701 2090 version='2010.07.24',
7b7759f5 2091 conflict_handler='resolve',
2092 )
2093
209e9e27
RG
2094 parser.add_option('-h', '--help',
2095 action='help', help='print this help text and exit')
2096 parser.add_option('-v', '--version',
2097 action='version', help='print program version and exit')
4bec29ef
RG
2098 parser.add_option('-U', '--update',
2099 action='store_true', dest='update_self', help='update this program to latest stable version')
7b7759f5 2100 parser.add_option('-i', '--ignore-errors',
2101 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2102 parser.add_option('-r', '--rate-limit',
2b06c33d 2103 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
7031008c 2104 parser.add_option('-R', '--retries',
2b06c33d 2105 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
204c9398
RG
2106 parser.add_option('--playlist-start',
2107 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
7b7759f5 2108
2109 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2110 authentication.add_option('-u', '--username',
2b06c33d 2111 dest='username', metavar='USERNAME', help='account username')
7b7759f5 2112 authentication.add_option('-p', '--password',
2b06c33d 2113 dest='password', metavar='PASSWORD', help='account password')
7b7759f5 2114 authentication.add_option('-n', '--netrc',
209e9e27 2115 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
7b7759f5 2116 parser.add_option_group(authentication)
2117
2118 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2119 video_format.add_option('-f', '--format',
2b06c33d 2120 action='store', dest='format', metavar='FORMAT', help='video format code')
7b7759f5 2121 video_format.add_option('-m', '--mobile-version',
b74c859d 2122 action='store_const', dest='format', help='alias for -f 17', const='17')
6ba562b0
RG
2123 video_format.add_option('--all-formats',
2124 action='store_const', dest='format', help='download all available video formats', const='-1')
f2413e67 2125 video_format.add_option('--max-quality',
460d8acb 2126 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2962317d
RG
2127 video_format.add_option('-b', '--best-quality',
2128 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
7b7759f5 2129 parser.add_option_group(video_format)
2130
2131 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2132 verbosity.add_option('-q', '--quiet',
2133 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2134 verbosity.add_option('-s', '--simulate',
2135 action='store_true', dest='simulate', help='do not download video', default=False)
2136 verbosity.add_option('-g', '--get-url',
2137 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2138 verbosity.add_option('-e', '--get-title',
2139 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
7e58d568
RG
2140 verbosity.add_option('--get-thumbnail',
2141 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2142 verbosity.add_option('--get-description',
2143 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
d9835247
RG
2144 verbosity.add_option('--no-progress',
2145 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
7b7759f5 2146 parser.add_option_group(verbosity)
2147
2148 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1c76e23e
RG
2149 filesystem.add_option('-t', '--title',
2150 action='store_true', dest='usetitle', help='use title in file name', default=False)
2151 filesystem.add_option('-l', '--literal',
2152 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
7b7759f5 2153 filesystem.add_option('-o', '--output',
2b06c33d 2154 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
7b7759f5 2155 filesystem.add_option('-a', '--batch-file',
2b06c33d 2156 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
7b7759f5 2157 filesystem.add_option('-w', '--no-overwrites',
0beeff4b 2158 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
f76c2df6
PI
2159 filesystem.add_option('-c', '--continue',
2160 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
7b7759f5 2161 parser.add_option_group(filesystem)
2162
209e9e27 2163 (opts, args) = parser.parse_args()
2a7353b8 2164
c6fd0bb8 2165 # Batch file verification
d1580ed9 2166 batchurls = []
c6fd0bb8
RG
2167 if opts.batchfile is not None:
2168 try:
2a7353b8
RG
2169 if opts.batchfile == '-':
2170 batchfd = sys.stdin
2171 else:
2172 batchfd = open(opts.batchfile, 'r')
2173 batchurls = batchfd.readlines()
b65740e4
RG
2174 batchurls = [x.strip() for x in batchurls]
2175 batchurls = [x for x in batchurls if len(x) > 0]
c6fd0bb8
RG
2176 except IOError:
2177 sys.exit(u'ERROR: batch file could not be read')
2178 all_urls = batchurls + args
2179
209e9e27 2180 # Conflicting, missing and erroneous options
2962317d
RG
2181 if opts.bestquality:
2182 print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
209e9e27 2183 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2740c509 2184 parser.error(u'using .netrc conflicts with giving username/password')
209e9e27 2185 if opts.password is not None and opts.username is None:
2740c509 2186 parser.error(u'account username missing')
209e9e27 2187 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
2740c509 2188 parser.error(u'using output template conflicts with using title or literal title')
209e9e27 2189 if opts.usetitle and opts.useliteral:
2740c509 2190 parser.error(u'using title conflicts with using literal title')
209e9e27 2191 if opts.username is not None and opts.password is None:
76a7f364 2192 opts.password = getpass.getpass(u'Type account password and press return:')
acd3d842
RG
2193 if opts.ratelimit is not None:
2194 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2195 if numeric_limit is None:
2740c509 2196 parser.error(u'invalid rate limit specified')
acd3d842 2197 opts.ratelimit = numeric_limit
7031008c
RG
2198 if opts.retries is not None:
2199 try:
2200 opts.retries = long(opts.retries)
2201 except (TypeError, ValueError), err:
2202 parser.error(u'invalid retry count specified')
204c9398
RG
2203 if opts.playliststart is not None:
2204 try:
2205 opts.playliststart = long(opts.playliststart)
2206 except (TypeError, ValueError), err:
2207 parser.error(u'invalid playlist page specified')
4fa74b52
RG
2208
2209 # Information extractors
2210 youtube_ie = YoutubeIE()
020f7150 2211 metacafe_ie = MetacafeIE(youtube_ie)
4135fa45 2212 dailymotion_ie = DailymotionIE()
0c2dc87d 2213 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
c39c05cd 2214 youtube_user_ie = YoutubeUserIE(youtube_ie)
25af2bce 2215 youtube_search_ie = YoutubeSearchIE(youtube_ie)
49c0028a 2216 google_ie = GoogleIE()
7e58d568 2217 google_search_ie = GoogleSearchIE(google_ie)
49c0028a 2218 photobucket_ie = PhotobucketIE()
61945318 2219 yahoo_ie = YahooIE()
7e58d568 2220 yahoo_search_ie = YahooSearchIE(yahoo_ie)
490fd7ae 2221 generic_ie = GenericIE()
4fa74b52
RG
2222
2223 # File downloader
9fcd8355 2224 fd = FileDownloader({
209e9e27
RG
2225 'usenetrc': opts.usenetrc,
2226 'username': opts.username,
2227 'password': opts.password,
7e58d568 2228 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
209e9e27
RG
2229 'forceurl': opts.geturl,
2230 'forcetitle': opts.gettitle,
7e58d568
RG
2231 'forcethumbnail': opts.getthumbnail,
2232 'forcedescription': opts.getdescription,
2233 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
320becd6 2234 'format': opts.format,
f2413e67 2235 'format_limit': opts.format_limit,
eae2666c 2236 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
6ba562b0
RG
2237 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2238 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2239 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
76a7f364
RG
2240 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2241 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2242 or u'%(id)s.%(ext)s'),
0086d1ec 2243 'ignoreerrors': opts.ignoreerrors,
acd3d842 2244 'ratelimit': opts.ratelimit,
0beeff4b 2245 'nooverwrites': opts.nooverwrites,
7031008c 2246 'retries': opts.retries,
7db85b2c 2247 'continuedl': opts.continue_dl,
d9835247 2248 'noprogress': opts.noprogress,
204c9398 2249 'playliststart': opts.playliststart,
9fcd8355 2250 })
25af2bce 2251 fd.add_info_extractor(youtube_search_ie)
0c2dc87d 2252 fd.add_info_extractor(youtube_pl_ie)
c39c05cd 2253 fd.add_info_extractor(youtube_user_ie)
020f7150 2254 fd.add_info_extractor(metacafe_ie)
4135fa45 2255 fd.add_info_extractor(dailymotion_ie)
4fa74b52 2256 fd.add_info_extractor(youtube_ie)
49c0028a 2257 fd.add_info_extractor(google_ie)
7e58d568 2258 fd.add_info_extractor(google_search_ie)
49c0028a 2259 fd.add_info_extractor(photobucket_ie)
61945318 2260 fd.add_info_extractor(yahoo_ie)
7e58d568 2261 fd.add_info_extractor(yahoo_search_ie)
4bec29ef 2262
490fd7ae
RG
2263 # This must come last since it's the
2264 # fallback if none of the others work
2265 fd.add_info_extractor(generic_ie)
2266
4bec29ef
RG
2267 # Update version
2268 if opts.update_self:
2269 update_self(fd, sys.argv[0])
2270
2271 # Maybe do nothing
2272 if len(all_urls) < 1:
2273 if not opts.update_self:
2274 parser.error(u'you must provide at least one URL')
2275 else:
2276 sys.exit()
c6fd0bb8 2277 retcode = fd.download(all_urls)
bb681b88 2278 sys.exit(retcode)
4fa74b52 2279
e5bf0f55
RG
2280 except DownloadError:
2281 sys.exit(1)
2282 except SameFileError:
76a7f364 2283 sys.exit(u'ERROR: fixed output name but more than one file to download')
4fa74b52 2284 except KeyboardInterrupt:
76a7f364 2285 sys.exit(u'\nERROR: Interrupted by user')