jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	__author__ = (
	5	'Ricardo Garcia Gonzalez',
	6	'Danny Colligan',
	7	'Benjamin Johnson',
	8	'Vasyl\' Vavrychuk',
	9	'Witold Baryluk',
	10	'Paweł Paprota',
	11	'Gergely Imreh',
	12	'Rogério Brito',
	13	'Philipp Hagemeister',
	14	'Sören Schulze',
	15	)
	16
	17	__license__ = 'Public Domain'
	18	__version__ = '2011.09.13'
	19
	20	UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl'
	21
	22	import cookielib
	23	import datetime
	24	import gzip
	25	import htmlentitydefs
	26	import httplib
	27	import locale
	28	import math
	29	import netrc
	30	import os
	31	import os.path
	32	import re
	33	import socket
	34	import string
	35	import subprocess
	36	import sys
	37	import time
	38	import urllib
	39	import urllib2
	40	import warnings
	41	import zlib
	42
	43	if os.name == 'nt':
	44	import ctypes
	45
	46	try:
	47	import email.utils
	48	except ImportError: # Python 2.4
	49	import email.Utils
	50	try:
	51	import cStringIO as StringIO
	52	except ImportError:
	53	import StringIO
	54
	55	# parse_qs was moved from the cgi module to the urlparse module recently.
	56	try:
	57	from urlparse import parse_qs
	58	except ImportError:
	59	from cgi import parse_qs
	60
	61	try:
	62	import lxml.etree
	63	except ImportError:
	64	pass # Handled below
	65
	66	try:
	67	import xml.etree.ElementTree
	68	except ImportError: # Python<2.5
	69	pass # Not officially supported, but let it slip
	70
	71	std_headers = {
	72	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
	73	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	74	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	75	'Accept-Encoding': 'gzip, deflate',
	76	'Accept-Language': 'en-us,en;q=0.5',
	77	}
	78
	79	simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
	80
	81	try:
	82	import json
	83	except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
	84	import re
	85	class json(object):
	86	@staticmethod
	87	def loads(s):
	88	s = s.decode('UTF-8')
	89	def raiseError(msg, i):
	90	raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
	91	def skipSpace(i, expectMore=True):
	92	while i < len(s) and s[i] in ' \t\r\n':
	93	i += 1
	94	if expectMore:
	95	if i >= len(s):
	96	raiseError('Premature end', i)
	97	return i
	98	def decodeEscape(match):
	99	esc = match.group(1)
	100	_STATIC = {
	101	'"': '"',
	102	'\\': '\\',
	103	'/': '/',
	104	'b': unichr(0x8),
	105	'f': unichr(0xc),
	106	'n': '\n',
	107	'r': '\r',
	108	't': '\t',
	109	}
	110	if esc in _STATIC:
	111	return _STATIC[esc]
	112	if esc[0] == 'u':
	113	if len(esc) == 1+4:
	114	return unichr(int(esc[1:5], 16))
	115	if len(esc) == 5+6 and esc[5:7] == '\\u':
	116	hi = int(esc[1:5], 16)
	117	low = int(esc[7:11], 16)
	118	return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
	119	raise ValueError('Unknown escape ' + str(esc))
	120	def parseString(i):
	121	i += 1
	122	e = i
	123	while True:
	124	e = s.index('"', e)
	125	bslashes = 0
	126	while s[e-bslashes-1] == '\\':
	127	bslashes += 1
	128	if bslashes % 2 == 1:
	129	e += 1
	130	continue
	131	break
	132	rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}\|u[0-9a-fA-F]{4}\|.\|$)')
	133	stri = rexp.sub(decodeEscape, s[i:e])
	134	return (e+1,stri)
	135	def parseObj(i):
	136	i += 1
	137	res = {}
	138	i = skipSpace(i)
	139	if s[i] == '}': # Empty dictionary
	140	return (i+1,res)
	141	while True:
	142	if s[i] != '"':
	143	raiseError('Expected a string object key', i)
	144	i,key = parseString(i)
	145	i = skipSpace(i)
	146	if i >= len(s) or s[i] != ':':
	147	raiseError('Expected a colon', i)
	148	i,val = parse(i+1)
	149	res[key] = val
	150	i = skipSpace(i)
	151	if s[i] == '}':
	152	return (i+1, res)
	153	if s[i] != ',':
	154	raiseError('Expected comma or closing curly brace', i)
	155	i = skipSpace(i+1)
	156	def parseArray(i):
	157	res = []
	158	i = skipSpace(i+1)
	159	if s[i] == ']': # Empty array
	160	return (i+1,res)
	161	while True:
	162	i,val = parse(i)
	163	res.append(val)
	164	i = skipSpace(i) # Raise exception if premature end
	165	if s[i] == ']':
	166	return (i+1, res)
	167	if s[i] != ',':
	168	raiseError('Expected a comma or closing bracket', i)
	169	i = skipSpace(i+1)
	170	def parseDiscrete(i):
	171	for k,v in {'true': True, 'false': False, 'null': None}.items():
	172	if s.startswith(k, i):
	173	return (i+len(k), v)
	174	raiseError('Not a boolean (or null)', i)
	175	def parseNumber(i):
	176	mobj = re.match('^(-?(0\|[1-9][0-9])(\.[0-9])?([eE][+-]?[0-9]+)?)', s[i:])
	177	if mobj is None:
	178	raiseError('Not a number', i)
	179	nums = mobj.group(1)
	180	if '.' in nums or 'e' in nums or 'E' in nums:
	181	return (i+len(nums), float(nums))
	182	return (i+len(nums), int(nums))
	183	CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
	184	def parse(i):
	185	i = skipSpace(i)
	186	i,res = CHARMAP.get(s[i], parseNumber)(i)
	187	i = skipSpace(i, False)
	188	return (i,res)
	189	i,res = parse(0)
	190	if i < len(s):
	191	raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
	192	return res
	193
	194	def preferredencoding():
	195	"""Get preferred encoding.
	196
	197	Returns the best encoding scheme for the system, based on
	198	locale.getpreferredencoding() and some further tweaks.
	199	"""
	200	def yield_preferredencoding():
	201	try:
	202	pref = locale.getpreferredencoding()
	203	u'TEST'.encode(pref)
	204	except:
	205	pref = 'UTF-8'
	206	while True:
	207	yield pref
	208	return yield_preferredencoding().next()
	209
	210
	211	def htmlentity_transform(matchobj):
	212	"""Transforms an HTML entity to a Unicode character.
	213
	214	This function receives a match object and is intended to be used with
	215	the re.sub() function.
	216	"""
	217	entity = matchobj.group(1)
	218
	219	# Known non-numeric HTML entity
	220	if entity in htmlentitydefs.name2codepoint:
	221	return unichr(htmlentitydefs.name2codepoint[entity])
	222
	223	# Unicode character
	224	mobj = re.match(ur'(?u)#(x?\d+)', entity)
	225	if mobj is not None:
	226	numstr = mobj.group(1)
	227	if numstr.startswith(u'x'):
	228	base = 16
	229	numstr = u'0%s' % numstr
	230	else:
	231	base = 10
	232	return unichr(long(numstr, base))
	233
	234	# Unknown entity in name, return its literal representation
	235	return (u'&%s;' % entity)
	236
	237
	238	def sanitize_title(utitle):
	239	"""Sanitizes a video title so it could be used as part of a filename."""
	240	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
	241	return utitle.replace(unicode(os.sep), u'%')
	242
	243
	244	def sanitize_open(filename, open_mode):
	245	"""Try to open the given filename, and slightly tweak it if this fails.
	246
	247	Attempts to open the given filename. If this fails, it tries to change
	248	the filename slightly, step by step, until it's either able to open it
	249	or it fails and raises a final exception, like the standard open()
	250	function.
	251
	252	It returns the tuple (stream, definitive_file_name).
	253	"""
	254	try:
	255	if filename == u'-':
	256	if sys.platform == 'win32':
	257	import msvcrt
	258	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	259	return (sys.stdout, filename)
	260	stream = open(filename, open_mode)
	261	return (stream, filename)
	262	except (IOError, OSError), err:
	263	# In case of error, try to remove win32 forbidden chars
	264	filename = re.sub(ur'[/<>:"\\|\?\*]', u'#', filename)
	265
	266	# An exception here should be caught in the caller
	267	stream = open(filename, open_mode)
	268	return (stream, filename)
	269
	270
	271	def timeconvert(timestr):
	272	"""Convert RFC 2822 defined time string into system timestamp"""
	273	timestamp = None
	274	timetuple = email.utils.parsedate_tz(timestr)
	275	if timetuple is not None:
	276	timestamp = email.utils.mktime_tz(timetuple)
	277	return timestamp
	278
	279
	280	class DownloadError(Exception):
	281	"""Download Error exception.
	282
	283	This exception may be thrown by FileDownloader objects if they are not
	284	configured to continue on errors. They will contain the appropriate
	285	error message.
	286	"""
	287	pass
	288
	289
	290	class SameFileError(Exception):
	291	"""Same File exception.
	292
	293	This exception will be thrown by FileDownloader objects if they detect
	294	multiple files would have to be downloaded to the same file on disk.
	295	"""
	296	pass
	297
	298
	299	class PostProcessingError(Exception):
	300	"""Post Processing exception.
	301
	302	This exception may be raised by PostProcessor's .run() method to
	303	indicate an error in the postprocessing task.
	304	"""
	305	pass
	306
	307
	308	class UnavailableVideoError(Exception):
	309	"""Unavailable Format exception.
	310
	311	This exception will be thrown when a video is requested
	312	in a format that is not available for that video.
	313	"""
	314	pass
	315
	316
	317	class ContentTooShortError(Exception):
	318	"""Content Too Short exception.
	319
	320	This exception may be raised by FileDownloader objects when a file they
	321	download is too small for what the server announced first, indicating
	322	the connection was probably interrupted.
	323	"""
	324	# Both in bytes
	325	downloaded = None
	326	expected = None
	327
	328	def __init__(self, downloaded, expected):
	329	self.downloaded = downloaded
	330	self.expected = expected
	331
	332
	333	class YoutubeDLHandler(urllib2.HTTPHandler):
	334	"""Handler for HTTP requests and responses.
	335
	336	This class, when installed with an OpenerDirector, automatically adds
	337	the standard headers to every HTTP request and handles gzipped and
	338	deflated responses from web servers. If compression is to be avoided in
	339	a particular request, the original request in the program code only has
	340	to include the HTTP header "Youtubedl-No-Compression", which will be
	341	removed before making the real request.
	342
	343	Part of this code was copied from:
	344
	345	http://techknack.net/python-urllib2-handlers/
	346
	347	Andrew Rowls, the author of that code, agreed to release it to the
	348	public domain.
	349	"""
	350
	351	@staticmethod
	352	def deflate(data):
	353	try:
	354	return zlib.decompress(data, -zlib.MAX_WBITS)
	355	except zlib.error:
	356	return zlib.decompress(data)
	357
	358	@staticmethod
	359	def addinfourl_wrapper(stream, headers, url, code):
	360	if hasattr(urllib2.addinfourl, 'getcode'):
	361	return urllib2.addinfourl(stream, headers, url, code)
	362	ret = urllib2.addinfourl(stream, headers, url)
	363	ret.code = code
	364	return ret
	365
	366	def http_request(self, req):
	367	for h in std_headers:
	368	if h in req.headers:
	369	del req.headers[h]
	370	req.add_header(h, std_headers[h])
	371	if 'Youtubedl-no-compression' in req.headers:
	372	if 'Accept-encoding' in req.headers:
	373	del req.headers['Accept-encoding']
	374	del req.headers['Youtubedl-no-compression']
	375	return req
	376
	377	def http_response(self, req, resp):
	378	old_resp = resp
	379	# gzip
	380	if resp.headers.get('Content-encoding', '') == 'gzip':
	381	gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
	382	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	383	resp.msg = old_resp.msg
	384	# deflate
	385	if resp.headers.get('Content-encoding', '') == 'deflate':
	386	gz = StringIO.StringIO(self.deflate(resp.read()))
	387	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	388	resp.msg = old_resp.msg
	389	return resp
	390
	391
	392	class FileDownloader(object):
	393	"""File Downloader class.
	394
	395	File downloader objects are the ones responsible of downloading the
	396	actual video file and writing it to disk if the user has requested
	397	it, among some other tasks. In most cases there should be one per
	398	program. As, given a video URL, the downloader doesn't know how to
	399	extract all the needed information, task that InfoExtractors do, it
	400	has to pass the URL to one of them.
	401
	402	For this, file downloader objects have a method that allows
	403	InfoExtractors to be registered in a given order. When it is passed
	404	a URL, the file downloader handles it to the first InfoExtractor it
	405	finds that reports being able to handle it. The InfoExtractor extracts
	406	all the information about the video or videos the URL refers to, and
	407	asks the FileDownloader to process the video information, possibly
	408	downloading the video.
	409
	410	File downloaders accept a lot of parameters. In order not to saturate
	411	the object constructor with arguments, it receives a dictionary of
	412	options instead. These options are available through the params
	413	attribute for the InfoExtractors to use. The FileDownloader also
	414	registers itself as the downloader in charge for the InfoExtractors
	415	that are added to it, so this is a "mutual registration".
	416
	417	Available options:
	418
	419	username: Username for authentication purposes.
	420	password: Password for authentication purposes.
	421	usenetrc: Use netrc for authentication instead.
	422	quiet: Do not print messages to stdout.
	423	forceurl: Force printing final URL.
	424	forcetitle: Force printing title.
	425	forcethumbnail: Force printing thumbnail URL.
	426	forcedescription: Force printing description.
	427	forcefilename: Force printing final filename.
	428	simulate: Do not download the video files.
	429	format: Video format code.
	430	format_limit: Highest quality format to try.
	431	outtmpl: Template for output names.
	432	ignoreerrors: Do not stop on download errors.
	433	ratelimit: Download speed limit, in bytes/sec.
	434	nooverwrites: Prevent overwriting files.
	435	retries: Number of times to retry for HTTP error 5xx
	436	continuedl: Try to continue downloads if possible.
	437	noprogress: Do not print the progress bar.
	438	playliststart: Playlist item to start at.
	439	playlistend: Playlist item to end at.
	440	logtostderr: Log messages to stderr instead of stdout.
	441	consoletitle: Display progress in console window's titlebar.
	442	nopart: Do not use temporary .part files.
	443	updatetime: Use the Last-modified header to set output file timestamps.
	444	writedescription: Write the video description to a .description file
	445	writeinfojson: Write the video description to a .info.json file
	446	"""
	447
	448	params = None
	449	_ies = []
	450	_pps = []
	451	_download_retcode = None
	452	_num_downloads = None
	453	_screen_file = None
	454
	455	def __init__(self, params):
	456	"""Create a FileDownloader object with the given options."""
	457	self._ies = []
	458	self._pps = []
	459	self._download_retcode = 0
	460	self._num_downloads = 0
	461	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	462	self.params = params
	463
	464	@staticmethod
	465	def format_bytes(bytes):
	466	if bytes is None:
	467	return 'N/A'
	468	if type(bytes) is str:
	469	bytes = float(bytes)
	470	if bytes == 0.0:
	471	exponent = 0
	472	else:
	473	exponent = long(math.log(bytes, 1024.0))
	474	suffix = 'bkMGTPEZY'[exponent]
	475	converted = float(bytes) / float(1024 ** exponent)
	476	return '%.2f%s' % (converted, suffix)
	477
	478	@staticmethod
	479	def calc_percent(byte_counter, data_len):
	480	if data_len is None:
	481	return '---.-%'
	482	return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
	483
	484	@staticmethod
	485	def calc_eta(start, now, total, current):
	486	if total is None:
	487	return '--:--'
	488	dif = now - start
	489	if current == 0 or dif < 0.001: # One millisecond
	490	return '--:--'
	491	rate = float(current) / dif
	492	eta = long((float(total) - float(current)) / rate)
	493	(eta_mins, eta_secs) = divmod(eta, 60)
	494	if eta_mins > 99:
	495	return '--:--'
	496	return '%02d:%02d' % (eta_mins, eta_secs)
	497
	498	@staticmethod
	499	def calc_speed(start, now, bytes):
	500	dif = now - start
	501	if bytes == 0 or dif < 0.001: # One millisecond
	502	return '%10s' % '---b/s'
	503	return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
	504
	505	@staticmethod
	506	def best_block_size(elapsed_time, bytes):
	507	new_min = max(bytes / 2.0, 1.0)
	508	new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
	509	if elapsed_time < 0.001:
	510	return long(new_max)
	511	rate = bytes / elapsed_time
	512	if rate > new_max:
	513	return long(new_max)
	514	if rate < new_min:
	515	return long(new_min)
	516	return long(rate)
	517
	518	@staticmethod
	519	def parse_bytes(bytestr):
	520	"""Parse a string indicating a byte quantity into a long integer."""
	521	matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
	522	if matchobj is None:
	523	return None
	524	number = float(matchobj.group(1))
	525	multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
	526	return long(round(number * multiplier))
	527
	528	def add_info_extractor(self, ie):
	529	"""Add an InfoExtractor object to the end of the list."""
	530	self._ies.append(ie)
	531	ie.set_downloader(self)
	532
	533	def add_post_processor(self, pp):
	534	"""Add a PostProcessor object to the end of the chain."""
	535	self._pps.append(pp)
	536	pp.set_downloader(self)
	537
	538	def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
	539	"""Print message to stdout if not in quiet mode."""
	540	try:
	541	if not self.params.get('quiet', False):
	542	terminator = [u'\n', u''][skip_eol]
	543	print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
	544	self._screen_file.flush()
	545	except (UnicodeEncodeError), err:
	546	if not ignore_encoding_errors:
	547	raise
	548
	549	def to_stderr(self, message):
	550	"""Print message to stderr."""
	551	print >>sys.stderr, message.encode(preferredencoding())
	552
	553	def to_cons_title(self, message):
	554	"""Set console/terminal window title to message."""
	555	if not self.params.get('consoletitle', False):
	556	return
	557	if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
	558	# c_wchar_p() might not be necessary if `message` is
	559	# already of type unicode()
	560	ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
	561	elif 'TERM' in os.environ:
	562	sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
	563
	564	def fixed_template(self):
	565	"""Checks if the output template is fixed."""
	566	return (re.search(ur'(?u)%$.+?$s', self.params['outtmpl']) is None)
	567
	568	def trouble(self, message=None):
	569	"""Determine action to take when a download problem appears.
	570
	571	Depending on if the downloader has been configured to ignore
	572	download errors or not, this method may throw an exception or
	573	not when errors are found, after printing the message.
	574	"""
	575	if message is not None:
	576	self.to_stderr(message)
	577	if not self.params.get('ignoreerrors', False):
	578	raise DownloadError(message)
	579	self._download_retcode = 1
	580
	581	def slow_down(self, start_time, byte_counter):
	582	"""Sleep if the download speed is over the rate limit."""
	583	rate_limit = self.params.get('ratelimit', None)
	584	if rate_limit is None or byte_counter == 0:
	585	return
	586	now = time.time()
	587	elapsed = now - start_time
	588	if elapsed <= 0.0:
	589	return
	590	speed = float(byte_counter) / elapsed
	591	if speed > rate_limit:
	592	time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
	593
	594	def temp_name(self, filename):
	595	"""Returns a temporary filename for the given filename."""
	596	if self.params.get('nopart', False) or filename == u'-' or \
	597	(os.path.exists(filename) and not os.path.isfile(filename)):
	598	return filename
	599	return filename + u'.part'
	600
	601	def undo_temp_name(self, filename):
	602	if filename.endswith(u'.part'):
	603	return filename[:-len(u'.part')]
	604	return filename
	605
	606	def try_rename(self, old_filename, new_filename):
	607	try:
	608	if old_filename == new_filename:
	609	return
	610	os.rename(old_filename, new_filename)
	611	except (IOError, OSError), err:
	612	self.trouble(u'ERROR: unable to rename file')
	613
	614	def try_utime(self, filename, last_modified_hdr):
	615	"""Try to set the last-modified time of the given file."""
	616	if last_modified_hdr is None:
	617	return
	618	if not os.path.isfile(filename):
	619	return
	620	timestr = last_modified_hdr
	621	if timestr is None:
	622	return
	623	filetime = timeconvert(timestr)
	624	if filetime is None:
	625	return
	626	try:
	627	os.utime(filename, (time.time(), filetime))
	628	except:
	629	pass
	630
	631	def report_writedescription(self, descfn):
	632	""" Report that the description file is being written """
	633	self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
	634
	635	def report_writeinfojson(self, infofn):
	636	""" Report that the metadata file has been written """
	637	self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
	638
	639	def report_destination(self, filename):
	640	"""Report destination filename."""
	641	self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
	642
	643	def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
	644	"""Report download progress."""
	645	if self.params.get('noprogress', False):
	646	return
	647	self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
	648	(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
	649	self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
	650	(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
	651
	652	def report_resuming_byte(self, resume_len):
	653	"""Report attempt to resume at given byte."""
	654	self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
	655
	656	def report_retry(self, count, retries):
	657	"""Report retry in case of HTTP error 5xx"""
	658	self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
	659
	660	def report_file_already_downloaded(self, file_name):
	661	"""Report file has already been fully downloaded."""
	662	try:
	663	self.to_screen(u'[download] %s has already been downloaded' % file_name)
	664	except (UnicodeEncodeError), err:
	665	self.to_screen(u'[download] The file has already been downloaded')
	666
	667	def report_unable_to_resume(self):
	668	"""Report it was impossible to resume download."""
	669	self.to_screen(u'[download] Unable to resume')
	670
	671	def report_finish(self):
	672	"""Report download finished."""
	673	if self.params.get('noprogress', False):
	674	self.to_screen(u'[download] Download completed')
	675	else:
	676	self.to_screen(u'')
	677
	678	def increment_downloads(self):
	679	"""Increment the ordinal that assigns a number to each file."""
	680	self._num_downloads += 1
	681
	682	def prepare_filename(self, info_dict):
	683	"""Generate the output filename."""
	684	try:
	685	template_dict = dict(info_dict)
	686	template_dict['epoch'] = unicode(long(time.time()))
	687	template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
	688	filename = self.params['outtmpl'] % template_dict
	689	return filename
	690	except (ValueError, KeyError), err:
	691	self.trouble(u'ERROR: invalid system charset or erroneous output template')
	692	return None
	693
	694	def process_info(self, info_dict):
	695	"""Process a single dictionary returned by an InfoExtractor."""
	696	filename = self.prepare_filename(info_dict)
	697	# Do nothing else if in simulate mode
	698	if self.params.get('simulate', False):
	699	# Forced printings
	700	if self.params.get('forcetitle', False):
	701	print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
	702	if self.params.get('forceurl', False):
	703	print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
	704	if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
	705	print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
	706	if self.params.get('forcedescription', False) and 'description' in info_dict:
	707	print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
	708	if self.params.get('forcefilename', False) and filename is not None:
	709	print filename.encode(preferredencoding(), 'xmlcharrefreplace')
	710
	711	return
	712
	713	if filename is None:
	714	return
	715	if self.params.get('nooverwrites', False) and os.path.exists(filename):
	716	self.to_stderr(u'WARNING: file exists and will be skipped')
	717	return
	718
	719	try:
	720	dn = os.path.dirname(filename)
	721	if dn != '' and not os.path.exists(dn):
	722	os.makedirs(dn)
	723	except (OSError, IOError), err:
	724	self.trouble(u'ERROR: unable to create directory ' + unicode(err))
	725	return
	726
	727	if self.params.get('writedescription', False):
	728	try:
	729	descfn = filename + '.description'
	730	self.report_writedescription(descfn)
	731	descfile = open(descfn, 'wb')
	732	try:
	733	descfile.write(info_dict['description'].encode('utf-8'))
	734	finally:
	735	descfile.close()
	736	except (OSError, IOError):
	737	self.trouble(u'ERROR: Cannot write description file ' + descfn)
	738	return
	739
	740	if self.params.get('writeinfojson', False):
	741	infofn = filename + '.info.json'
	742	self.report_writeinfojson(infofn)
	743	try:
	744	json.dump
	745	except (NameError,AttributeError):
	746	self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
	747	return
	748	try:
	749	infof = open(infofn, 'wb')
	750	try:
	751	json.dump(info_dict, infof)
	752	finally:
	753	infof.close()
	754	except (OSError, IOError):
	755	self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
	756	return
	757
	758	try:
	759	success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
	760	except (OSError, IOError), err:
	761	raise UnavailableVideoError
	762	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	763	self.trouble(u'ERROR: unable to download video data: %s' % str(err))
	764	return
	765	except (ContentTooShortError, ), err:
	766	self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
	767	return
	768
	769	if success:
	770	try:
	771	self.post_process(filename, info_dict)
	772	except (PostProcessingError), err:
	773	self.trouble(u'ERROR: postprocessing: %s' % str(err))
	774	return
	775
	776	def download(self, url_list):
	777	"""Download a given list of URLs."""
	778	if len(url_list) > 1 and self.fixed_template():
	779	raise SameFileError(self.params['outtmpl'])
	780
	781	for url in url_list:
	782	suitable_found = False
	783	for ie in self._ies:
	784	# Go to next InfoExtractor if not suitable
	785	if not ie.suitable(url):
	786	continue
	787
	788	# Suitable InfoExtractor found
	789	suitable_found = True
	790
	791	# Extract information from URL and process it
	792	ie.extract(url)
	793
	794	# Suitable InfoExtractor had been found; go to next URL
	795	break
	796
	797	if not suitable_found:
	798	self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
	799
	800	return self._download_retcode
	801
	802	def post_process(self, filename, ie_info):
	803	"""Run the postprocessing chain on the given file."""
	804	info = dict(ie_info)
	805	info['filepath'] = filename
	806	for pp in self._pps:
	807	info = pp.run(info)
	808	if info is None:
	809	break
	810
	811	def _download_with_rtmpdump(self, filename, url, player_url):
	812	self.report_destination(filename)
	813	tmpfilename = self.temp_name(filename)
	814
	815	# Check for rtmpdump first
	816	try:
	817	subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
	818	except (OSError, IOError):
	819	self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
	820	return False
	821
	822	# Download using rtmpdump. rtmpdump returns exit code 2 when
	823	# the connection was interrumpted and resuming appears to be
	824	# possible. This is part of rtmpdump's normal usage, AFAIK.
	825	basic_args = ['rtmpdump'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
	826	retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
	827	while retval == 2 or retval == 1:
	828	prevsize = os.path.getsize(tmpfilename)
	829	self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
	830	time.sleep(5.0) # This seems to be needed
	831	retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
	832	cursize = os.path.getsize(tmpfilename)
	833	if prevsize == cursize and retval == 1:
	834	break
	835	if retval == 0:
	836	self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
	837	self.try_rename(tmpfilename, filename)
	838	return True
	839	else:
	840	self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
	841	return False
	842
	843	def _do_download(self, filename, url, player_url):
	844	# Check file already present
	845	if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
	846	self.report_file_already_downloaded(filename)
	847	return True
	848
	849	# Attempt to download using rtmpdump
	850	if url.startswith('rtmp'):
	851	return self._download_with_rtmpdump(filename, url, player_url)
	852
	853	tmpfilename = self.temp_name(filename)
	854	stream = None
	855	open_mode = 'wb'
	856
	857	# Do not include the Accept-Encoding header
	858	headers = {'Youtubedl-no-compression': 'True'}
	859	basic_request = urllib2.Request(url, None, headers)
	860	request = urllib2.Request(url, None, headers)
	861
	862	# Establish possible resume length
	863	if os.path.isfile(tmpfilename):
	864	resume_len = os.path.getsize(tmpfilename)
	865	else:
	866	resume_len = 0
	867
	868	# Request parameters in case of being able to resume
	869	if self.params.get('continuedl', False) and resume_len != 0:
	870	self.report_resuming_byte(resume_len)
	871	request.add_header('Range', 'bytes=%d-' % resume_len)
	872	open_mode = 'ab'
	873
	874	count = 0
	875	retries = self.params.get('retries', 0)
	876	while count <= retries:
	877	# Establish connection
	878	try:
	879	data = urllib2.urlopen(request)
	880	break
	881	except (urllib2.HTTPError, ), err:
	882	if (err.code < 500 or err.code >= 600) and err.code != 416:
	883	# Unexpected HTTP error
	884	raise
	885	elif err.code == 416:
	886	# Unable to resume (requested range not satisfiable)
	887	try:
	888	# Open the connection again without the range header
	889	data = urllib2.urlopen(basic_request)
	890	content_length = data.info()['Content-Length']
	891	except (urllib2.HTTPError, ), err:
	892	if err.code < 500 or err.code >= 600:
	893	raise
	894	else:
	895	# Examine the reported length
	896	if (content_length is not None and
	897	(resume_len - 100 < long(content_length) < resume_len + 100)):
	898	# The file had already been fully downloaded.
	899	# Explanation to the above condition: in issue #175 it was revealed that
	900	# YouTube sometimes adds or removes a few bytes from the end of the file,
	901	# changing the file size slightly and causing problems for some users. So
	902	# I decided to implement a suggested change and consider the file
	903	# completely downloaded if the file size differs less than 100 bytes from
	904	# the one in the hard drive.
	905	self.report_file_already_downloaded(filename)
	906	self.try_rename(tmpfilename, filename)
	907	return True
	908	else:
	909	# The length does not match, we start the download over
	910	self.report_unable_to_resume()
	911	open_mode = 'wb'
	912	break
	913	# Retry
	914	count += 1
	915	if count <= retries:
	916	self.report_retry(count, retries)
	917
	918	if count > retries:
	919	self.trouble(u'ERROR: giving up after %s retries' % retries)
	920	return False
	921
	922	data_len = data.info().get('Content-length', None)
	923	if data_len is not None:
	924	data_len = long(data_len) + resume_len
	925	data_len_str = self.format_bytes(data_len)
	926	byte_counter = 0 + resume_len
	927	block_size = 1024
	928	start = time.time()
	929	while True:
	930	# Download and write
	931	before = time.time()
	932	data_block = data.read(block_size)
	933	after = time.time()
	934	if len(data_block) == 0:
	935	break
	936	byte_counter += len(data_block)
	937
	938	# Open file just in time
	939	if stream is None:
	940	try:
	941	(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
	942	assert stream is not None
	943	filename = self.undo_temp_name(tmpfilename)
	944	self.report_destination(filename)
	945	except (OSError, IOError), err:
	946	self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
	947	return False
	948	try:
	949	stream.write(data_block)
	950	except (IOError, OSError), err:
	951	self.trouble(u'\nERROR: unable to write data: %s' % str(err))
	952	return False
	953	block_size = self.best_block_size(after - before, len(data_block))
	954
	955	# Progress message
	956	percent_str = self.calc_percent(byte_counter, data_len)
	957	eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
	958	speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
	959	self.report_progress(percent_str, data_len_str, speed_str, eta_str)
	960
	961	# Apply rate limit
	962	self.slow_down(start, byte_counter - resume_len)
	963
	964	if stream is None:
	965	self.trouble(u'\nERROR: Did not get any data blocks')
	966	return False
	967	stream.close()
	968	self.report_finish()
	969	if data_len is not None and byte_counter != data_len:
	970	raise ContentTooShortError(byte_counter, long(data_len))
	971	self.try_rename(tmpfilename, filename)
	972
	973	# Update file modification time
	974	if self.params.get('updatetime', True):
	975	self.try_utime(filename, data.info().get('last-modified', None))
	976
	977	return True
	978
	979
	980	class InfoExtractor(object):
	981	"""Information Extractor class.
	982
	983	Information extractors are the classes that, given a URL, extract
	984	information from the video (or videos) the URL refers to. This
	985	information includes the real video URL, the video title and simplified
	986	title, author and others. The information is stored in a dictionary
	987	which is then passed to the FileDownloader. The FileDownloader
	988	processes this information possibly downloading the video to the file
	989	system, among other possible outcomes. The dictionaries must include
	990	the following fields:
	991
	992	id: Video identifier.
	993	url: Final video URL.
	994	uploader: Nickname of the video uploader.
	995	title: Literal title.
	996	stitle: Simplified title.
	997	ext: Video filename extension.
	998	format: Video format.
	999	player_url: SWF Player URL (may be None).
	1000
	1001	The following fields are optional. Their primary purpose is to allow
	1002	youtube-dl to serve as the backend for a video search function, such
	1003	as the one in youtube2mp3. They are only used when their respective
	1004	forced printing functions are called:
	1005
	1006	thumbnail: Full URL to a video thumbnail image.
	1007	description: One-line video description.
	1008
	1009	Subclasses of this one should re-define the _real_initialize() and
	1010	_real_extract() methods, as well as the suitable() static method.
	1011	Probably, they should also be instantiated and added to the main
	1012	downloader.
	1013	"""
	1014
	1015	_ready = False
	1016	_downloader = None
	1017
	1018	def __init__(self, downloader=None):
	1019	"""Constructor. Receives an optional downloader."""
	1020	self._ready = False
	1021	self.set_downloader(downloader)
	1022
	1023	@staticmethod
	1024	def suitable(url):
	1025	"""Receives a URL and returns True if suitable for this IE."""
	1026	return False
	1027
	1028	def initialize(self):
	1029	"""Initializes an instance (authentication, etc)."""
	1030	if not self._ready:
	1031	self._real_initialize()
	1032	self._ready = True
	1033
	1034	def extract(self, url):
	1035	"""Extracts URL information and returns it in list of dicts."""
	1036	self.initialize()
	1037	return self._real_extract(url)
	1038
	1039	def set_downloader(self, downloader):
	1040	"""Sets the downloader for this IE."""
	1041	self._downloader = downloader
	1042
	1043	def _real_initialize(self):
	1044	"""Real initialization process. Redefine in subclasses."""
	1045	pass
	1046
	1047	def _real_extract(self, url):
	1048	"""Real extraction process. Redefine in subclasses."""
	1049	pass
	1050
	1051
	1052	class YoutubeIE(InfoExtractor):
	1053	"""Information extractor for youtube.com."""
	1054
	1055	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/\|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v\|embed\|e)/)\|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?\|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
	1056	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
	1057	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
	1058	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
	1059	_NETRC_MACHINE = 'youtube'
	1060	# Listed in order of quality
	1061	_available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
	1062	_video_extensions = {
	1063	'13': '3gp',
	1064	'17': 'mp4',
	1065	'18': 'mp4',
	1066	'22': 'mp4',
	1067	'37': 'mp4',
	1068	'38': 'video', # You actually don't know if this will be MOV, AVI or whatever
	1069	'43': 'webm',
	1070	'45': 'webm',
	1071	}
	1072
	1073	@staticmethod
	1074	def suitable(url):
	1075	return (re.match(YoutubeIE._VALID_URL, url) is not None)
	1076
	1077	def report_lang(self):
	1078	"""Report attempt to set language."""
	1079	self._downloader.to_screen(u'[youtube] Setting language')
	1080
	1081	def report_login(self):
	1082	"""Report attempt to log in."""
	1083	self._downloader.to_screen(u'[youtube] Logging in')
	1084
	1085	def report_age_confirmation(self):
	1086	"""Report attempt to confirm age."""
	1087	self._downloader.to_screen(u'[youtube] Confirming age')
	1088
	1089	def report_video_webpage_download(self, video_id):
	1090	"""Report attempt to download video webpage."""
	1091	self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
	1092
	1093	def report_video_info_webpage_download(self, video_id):
	1094	"""Report attempt to download video info webpage."""
	1095	self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
	1096
	1097	def report_information_extraction(self, video_id):
	1098	"""Report attempt to extract video information."""
	1099	self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
	1100
	1101	def report_unavailable_format(self, video_id, format):
	1102	"""Report extracted video URL."""
	1103	self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
	1104
	1105	def report_rtmp_download(self):
	1106	"""Indicate the download will use the RTMP protocol."""
	1107	self._downloader.to_screen(u'[youtube] RTMP download detected')
	1108
	1109	def _real_initialize(self):
	1110	if self._downloader is None:
	1111	return
	1112
	1113	username = None
	1114	password = None
	1115	downloader_params = self._downloader.params
	1116
	1117	# Attempt to use provided username and password or .netrc data
	1118	if downloader_params.get('username', None) is not None:
	1119	username = downloader_params['username']
	1120	password = downloader_params['password']
	1121	elif downloader_params.get('usenetrc', False):
	1122	try:
	1123	info = netrc.netrc().authenticators(self._NETRC_MACHINE)
	1124	if info is not None:
	1125	username = info[0]
	1126	password = info[2]
	1127	else:
	1128	raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
	1129	except (IOError, netrc.NetrcParseError), err:
	1130	self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
	1131	return
	1132
	1133	# Set language
	1134	request = urllib2.Request(self._LANG_URL)
	1135	try:
	1136	self.report_lang()
	1137	urllib2.urlopen(request).read()
	1138	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1139	self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
	1140	return
	1141
	1142	# No authentication to be performed
	1143	if username is None:
	1144	return
	1145
	1146	# Log in
	1147	login_form = {
	1148	'current_form': 'loginForm',
	1149	'next': '/',
	1150	'action_login': 'Log In',
	1151	'username': username,
	1152	'password': password,
	1153	}
	1154	request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
	1155	try:
	1156	self.report_login()
	1157	login_results = urllib2.urlopen(request).read()
	1158	if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
	1159	self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
	1160	return
	1161	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1162	self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
	1163	return
	1164
	1165	# Confirm age
	1166	age_form = {
	1167	'next_url': '/',
	1168	'action_confirm': 'Confirm',
	1169	}
	1170	request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
	1171	try:
	1172	self.report_age_confirmation()
	1173	age_results = urllib2.urlopen(request).read()
	1174	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1175	self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
	1176	return
	1177
	1178	def _real_extract(self, url):
	1179	# Extract video id from URL
	1180	mobj = re.match(self._VALID_URL, url)
	1181	if mobj is None:
	1182	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1183	return
	1184	video_id = mobj.group(2)
	1185
	1186	# Get video webpage
	1187	self.report_video_webpage_download(video_id)
	1188	request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
	1189	try:
	1190	video_webpage = urllib2.urlopen(request).read()
	1191	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1192	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	1193	return
	1194
	1195	# Attempt to extract SWF player URL
	1196	mobj = re.search(r'swfConfig.?"(http:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	1197	if mobj is not None:
	1198	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	1199	else:
	1200	player_url = None
	1201
	1202	# Get video info
	1203	self.report_video_info_webpage_download(video_id)
	1204	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	1205	video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	1206	% (video_id, el_type))
	1207	request = urllib2.Request(video_info_url)
	1208	try:
	1209	video_info_webpage = urllib2.urlopen(request).read()
	1210	video_info = parse_qs(video_info_webpage)
	1211	if 'token' in video_info:
	1212	break
	1213	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1214	self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
	1215	return
	1216	if 'token' not in video_info:
	1217	if 'reason' in video_info:
	1218	self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
	1219	else:
	1220	self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
	1221	return
	1222
	1223	# Start extracting information
	1224	self.report_information_extraction(video_id)
	1225
	1226	# uploader
	1227	if 'author' not in video_info:
	1228	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1229	return
	1230	video_uploader = urllib.unquote_plus(video_info['author'][0])
	1231
	1232	# title
	1233	if 'title' not in video_info:
	1234	self._downloader.trouble(u'ERROR: unable to extract video title')
	1235	return
	1236	video_title = urllib.unquote_plus(video_info['title'][0])
	1237	video_title = video_title.decode('utf-8')
	1238	video_title = sanitize_title(video_title)
	1239
	1240	# simplified title
	1241	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	1242	simple_title = simple_title.strip(ur'_')
	1243
	1244	# thumbnail image
	1245	if 'thumbnail_url' not in video_info:
	1246	self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
	1247	video_thumbnail = ''
	1248	else: # don't panic if we can't find it
	1249	video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
	1250
	1251	# upload date
	1252	upload_date = u'NA'
	1253	mobj = re.search(r'id="eow-date.?>(.?)</span>', video_webpage, re.DOTALL)
	1254	if mobj is not None:
	1255	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	1256	format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
	1257	for expression in format_expressions:
	1258	try:
	1259	upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
	1260	except:
	1261	pass
	1262
	1263	# description
	1264	try:
	1265	lxml.etree
	1266	except NameError:
	1267	video_description = u'No description available.'
	1268	if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
	1269	mobj = re.search(r'<meta name="description" content="(.)"(?:\s/)?>', video_webpage)
	1270	if mobj is not None:
	1271	video_description = mobj.group(1).decode('utf-8')
	1272	else:
	1273	html_parser = lxml.etree.HTMLParser(encoding='utf-8')
	1274	vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
	1275	video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
	1276	# TODO use another parser
	1277
	1278	# token
	1279	video_token = urllib.unquote_plus(video_info['token'][0])
	1280
	1281	# Decide which formats to download
	1282	req_format = self._downloader.params.get('format', None)
	1283
	1284	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1285	self.report_rtmp_download()
	1286	video_url_list = [(None, video_info['conn'][0])]
	1287	elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
	1288	url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
	1289	url_data = [parse_qs(uds) for uds in url_data_strs]
	1290	url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
	1291	url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
	1292
	1293	format_limit = self._downloader.params.get('format_limit', None)
	1294	if format_limit is not None and format_limit in self._available_formats:
	1295	format_list = self._available_formats[self._available_formats.index(format_limit):]
	1296	else:
	1297	format_list = self._available_formats
	1298	existing_formats = [x for x in format_list if x in url_map]
	1299	if len(existing_formats) == 0:
	1300	self._downloader.trouble(u'ERROR: no known formats available for video')
	1301	return
	1302	if req_format is None:
	1303	video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
	1304	elif req_format == '-1':
	1305	video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
	1306	else:
	1307	# Specific format
	1308	if req_format not in url_map:
	1309	self._downloader.trouble(u'ERROR: requested format not available')
	1310	return
	1311	video_url_list = [(req_format, url_map[req_format])] # Specific format
	1312	else:
	1313	self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
	1314	return
	1315
	1316	for format_param, video_real_url in video_url_list:
	1317	# At this point we have a new video
	1318	self._downloader.increment_downloads()
	1319
	1320	# Extension
	1321	video_extension = self._video_extensions.get(format_param, 'flv')
	1322
	1323	try:
	1324	# Process video information
	1325	self._downloader.process_info({
	1326	'id': video_id.decode('utf-8'),
	1327	'url': video_real_url.decode('utf-8'),
	1328	'uploader': video_uploader.decode('utf-8'),
	1329	'upload_date': upload_date,
	1330	'title': video_title,
	1331	'stitle': simple_title,
	1332	'ext': video_extension.decode('utf-8'),
	1333	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	1334	'thumbnail': video_thumbnail.decode('utf-8'),
	1335	'description': video_description,
	1336	'player_url': player_url,
	1337	})
	1338	except UnavailableVideoError, err:
	1339	self._downloader.trouble(u'\nERROR: unable to download video')
	1340
	1341
	1342	class MetacafeIE(InfoExtractor):
	1343	"""Information Extractor for metacafe.com."""
	1344
	1345	_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
	1346	_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
	1347	_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
	1348	_youtube_ie = None
	1349
	1350	def __init__(self, youtube_ie, downloader=None):
	1351	InfoExtractor.__init__(self, downloader)
	1352	self._youtube_ie = youtube_ie
	1353
	1354	@staticmethod
	1355	def suitable(url):
	1356	return (re.match(MetacafeIE._VALID_URL, url) is not None)
	1357
	1358	def report_disclaimer(self):
	1359	"""Report disclaimer retrieval."""
	1360	self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
	1361
	1362	def report_age_confirmation(self):
	1363	"""Report attempt to confirm age."""
	1364	self._downloader.to_screen(u'[metacafe] Confirming age')
	1365
	1366	def report_download_webpage(self, video_id):
	1367	"""Report webpage download."""
	1368	self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
	1369
	1370	def report_extraction(self, video_id):
	1371	"""Report information extraction."""
	1372	self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
	1373
	1374	def _real_initialize(self):
	1375	# Retrieve disclaimer
	1376	request = urllib2.Request(self._DISCLAIMER)
	1377	try:
	1378	self.report_disclaimer()
	1379	disclaimer = urllib2.urlopen(request).read()
	1380	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1381	self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
	1382	return
	1383
	1384	# Confirm age
	1385	disclaimer_form = {
	1386	'filters': '0',
	1387	'submit': "Continue - I'm over 18",
	1388	}
	1389	request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
	1390	try:
	1391	self.report_age_confirmation()
	1392	disclaimer = urllib2.urlopen(request).read()
	1393	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1394	self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
	1395	return
	1396
	1397	def _real_extract(self, url):
	1398	# Extract id and simplified title from URL
	1399	mobj = re.match(self._VALID_URL, url)
	1400	if mobj is None:
	1401	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1402	return
	1403
	1404	video_id = mobj.group(1)
	1405
	1406	# Check if video comes from YouTube
	1407	mobj2 = re.match(r'^yt-(.*)$', video_id)
	1408	if mobj2 is not None:
	1409	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
	1410	return
	1411
	1412	# At this point we have a new video
	1413	self._downloader.increment_downloads()
	1414
	1415	simple_title = mobj.group(2).decode('utf-8')
	1416
	1417	# Retrieve video webpage to extract further information
	1418	request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
	1419	try:
	1420	self.report_download_webpage(video_id)
	1421	webpage = urllib2.urlopen(request).read()
	1422	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1423	self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
	1424	return
	1425
	1426	# Extract URL, uploader and title from webpage
	1427	self.report_extraction(video_id)
	1428	mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
	1429	if mobj is not None:
	1430	mediaURL = urllib.unquote(mobj.group(1))
	1431	video_extension = mediaURL[-3:]
	1432
	1433	# Extract gdaKey if available
	1434	mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
	1435	if mobj is None:
	1436	video_url = mediaURL
	1437	else:
	1438	gdaKey = mobj.group(1)
	1439	video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
	1440	else:
	1441	mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
	1442	if mobj is None:
	1443	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1444	return
	1445	vardict = parse_qs(mobj.group(1))
	1446	if 'mediaData' not in vardict:
	1447	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1448	return
	1449	mobj = re.search(r'"mediaURL":"(http.?)","key":"(.?)"', vardict['mediaData'][0])
	1450	if mobj is None:
	1451	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1452	return
	1453	mediaURL = mobj.group(1).replace('\\/', '/')
	1454	video_extension = mediaURL[-3:]
	1455	video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
	1456
	1457	mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
	1458	if mobj is None:
	1459	self._downloader.trouble(u'ERROR: unable to extract title')
	1460	return
	1461	video_title = mobj.group(1).decode('utf-8')
	1462	video_title = sanitize_title(video_title)
	1463
	1464	mobj = re.search(r'(?ms)By:\s<a .?>(.+?)<', webpage)
	1465	if mobj is None:
	1466	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1467	return
	1468	video_uploader = mobj.group(1)
	1469
	1470	try:
	1471	# Process video information
	1472	self._downloader.process_info({
	1473	'id': video_id.decode('utf-8'),
	1474	'url': video_url.decode('utf-8'),
	1475	'uploader': video_uploader.decode('utf-8'),
	1476	'upload_date': u'NA',
	1477	'title': video_title,
	1478	'stitle': simple_title,
	1479	'ext': video_extension.decode('utf-8'),
	1480	'format': u'NA',
	1481	'player_url': None,
	1482	})
	1483	except UnavailableVideoError:
	1484	self._downloader.trouble(u'\nERROR: unable to download video')
	1485
	1486
	1487	class DailymotionIE(InfoExtractor):
	1488	"""Information Extractor for Dailymotion"""
	1489
	1490	_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
	1491
	1492	def __init__(self, downloader=None):
	1493	InfoExtractor.__init__(self, downloader)
	1494
	1495	@staticmethod
	1496	def suitable(url):
	1497	return (re.match(DailymotionIE._VALID_URL, url) is not None)
	1498
	1499	def report_download_webpage(self, video_id):
	1500	"""Report webpage download."""
	1501	self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
	1502
	1503	def report_extraction(self, video_id):
	1504	"""Report information extraction."""
	1505	self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
	1506
	1507	def _real_initialize(self):
	1508	return
	1509
	1510	def _real_extract(self, url):
	1511	# Extract id and simplified title from URL
	1512	mobj = re.match(self._VALID_URL, url)
	1513	if mobj is None:
	1514	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1515	return
	1516
	1517	# At this point we have a new video
	1518	self._downloader.increment_downloads()
	1519	video_id = mobj.group(1)
	1520
	1521	simple_title = mobj.group(2).decode('utf-8')
	1522	video_extension = 'flv'
	1523
	1524	# Retrieve video webpage to extract further information
	1525	request = urllib2.Request(url)
	1526	request.add_header('Cookie', 'family_filter=off')
	1527	try:
	1528	self.report_download_webpage(video_id)
	1529	webpage = urllib2.urlopen(request).read()
	1530	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1531	self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
	1532	return
	1533
	1534	# Extract URL, uploader and title from webpage
	1535	self.report_extraction(video_id)
	1536	mobj = re.search(r'(?i)addVariable$\"sequence\"\s,\s\"([^\"]+?)\"$', webpage)
	1537	if mobj is None:
	1538	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1539	return
	1540	sequence = urllib.unquote(mobj.group(1))
	1541	mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
	1542	if mobj is None:
	1543	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1544	return
	1545	mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
	1546
	1547	# if needed add http://www.dailymotion.com/ if relative URL
	1548
	1549	video_url = mediaURL
	1550
	1551	mobj = re.search(r'(?im)<title>Dailymotion\s-\s(.+)\s-\s[^<]+?</title>', webpage)
	1552	if mobj is None:
	1553	self._downloader.trouble(u'ERROR: unable to extract title')
	1554	return
	1555	video_title = mobj.group(1).decode('utf-8')
	1556	video_title = sanitize_title(video_title)
	1557
	1558	mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
	1559	if mobj is None:
	1560	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1561	return
	1562	video_uploader = mobj.group(1)
	1563
	1564	try:
	1565	# Process video information
	1566	self._downloader.process_info({
	1567	'id': video_id.decode('utf-8'),
	1568	'url': video_url.decode('utf-8'),
	1569	'uploader': video_uploader.decode('utf-8'),
	1570	'upload_date': u'NA',
	1571	'title': video_title,
	1572	'stitle': simple_title,
	1573	'ext': video_extension.decode('utf-8'),
	1574	'format': u'NA',
	1575	'player_url': None,
	1576	})
	1577	except UnavailableVideoError:
	1578	self._downloader.trouble(u'\nERROR: unable to download video')
	1579
	1580
	1581	class GoogleIE(InfoExtractor):
	1582	"""Information extractor for video.google.com."""
	1583
	1584	_VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?\|co\.(?:uk\|jp\|kr\|cr)\|ca\|de\|es\|fr\|it\|nl\|pl)/videoplay\?docid=([^\&]+).*'
	1585
	1586	def __init__(self, downloader=None):
	1587	InfoExtractor.__init__(self, downloader)
	1588
	1589	@staticmethod
	1590	def suitable(url):
	1591	return (re.match(GoogleIE._VALID_URL, url) is not None)
	1592
	1593	def report_download_webpage(self, video_id):
	1594	"""Report webpage download."""
	1595	self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
	1596
	1597	def report_extraction(self, video_id):
	1598	"""Report information extraction."""
	1599	self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
	1600
	1601	def _real_initialize(self):
	1602	return
	1603
	1604	def _real_extract(self, url):
	1605	# Extract id from URL
	1606	mobj = re.match(self._VALID_URL, url)
	1607	if mobj is None:
	1608	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1609	return
	1610
	1611	# At this point we have a new video
	1612	self._downloader.increment_downloads()
	1613	video_id = mobj.group(1)
	1614
	1615	video_extension = 'mp4'
	1616
	1617	# Retrieve video webpage to extract further information
	1618	request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
	1619	try:
	1620	self.report_download_webpage(video_id)
	1621	webpage = urllib2.urlopen(request).read()
	1622	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1623	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1624	return
	1625
	1626	# Extract URL, uploader, and title from webpage
	1627	self.report_extraction(video_id)
	1628	mobj = re.search(r"download_url:'([^']+)'", webpage)
	1629	if mobj is None:
	1630	video_extension = 'flv'
	1631	mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
	1632	if mobj is None:
	1633	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1634	return
	1635	mediaURL = urllib.unquote(mobj.group(1))
	1636	mediaURL = mediaURL.replace('\\x3d', '\x3d')
	1637	mediaURL = mediaURL.replace('\\x26', '\x26')
	1638
	1639	video_url = mediaURL
	1640
	1641	mobj = re.search(r'<title>(.*)</title>', webpage)
	1642	if mobj is None:
	1643	self._downloader.trouble(u'ERROR: unable to extract title')
	1644	return
	1645	video_title = mobj.group(1).decode('utf-8')
	1646	video_title = sanitize_title(video_title)
	1647	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	1648
	1649	# Extract video description
	1650	mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
	1651	if mobj is None:
	1652	self._downloader.trouble(u'ERROR: unable to extract video description')
	1653	return
	1654	video_description = mobj.group(1).decode('utf-8')
	1655	if not video_description:
	1656	video_description = 'No description available.'
	1657
	1658	# Extract video thumbnail
	1659	if self._downloader.params.get('forcethumbnail', False):
	1660	request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
	1661	try:
	1662	webpage = urllib2.urlopen(request).read()
	1663	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1664	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1665	return
	1666	mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
	1667	if mobj is None:
	1668	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	1669	return
	1670	video_thumbnail = mobj.group(1)
	1671	else: # we need something to pass to process_info
	1672	video_thumbnail = ''
	1673
	1674	try:
	1675	# Process video information
	1676	self._downloader.process_info({
	1677	'id': video_id.decode('utf-8'),
	1678	'url': video_url.decode('utf-8'),
	1679	'uploader': u'NA',
	1680	'upload_date': u'NA',
	1681	'title': video_title,
	1682	'stitle': simple_title,
	1683	'ext': video_extension.decode('utf-8'),
	1684	'format': u'NA',
	1685	'player_url': None,
	1686	})
	1687	except UnavailableVideoError:
	1688	self._downloader.trouble(u'\nERROR: unable to download video')
	1689
	1690
	1691	class PhotobucketIE(InfoExtractor):
	1692	"""Information extractor for photobucket.com."""
	1693
	1694	_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.[\?\&]current=(.\.flv)'
	1695
	1696	def __init__(self, downloader=None):
	1697	InfoExtractor.__init__(self, downloader)
	1698
	1699	@staticmethod
	1700	def suitable(url):
	1701	return (re.match(PhotobucketIE._VALID_URL, url) is not None)
	1702
	1703	def report_download_webpage(self, video_id):
	1704	"""Report webpage download."""
	1705	self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
	1706
	1707	def report_extraction(self, video_id):
	1708	"""Report information extraction."""
	1709	self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
	1710
	1711	def _real_initialize(self):
	1712	return
	1713
	1714	def _real_extract(self, url):
	1715	# Extract id from URL
	1716	mobj = re.match(self._VALID_URL, url)
	1717	if mobj is None:
	1718	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1719	return
	1720
	1721	# At this point we have a new video
	1722	self._downloader.increment_downloads()
	1723	video_id = mobj.group(1)
	1724
	1725	video_extension = 'flv'
	1726
	1727	# Retrieve video webpage to extract further information
	1728	request = urllib2.Request(url)
	1729	try:
	1730	self.report_download_webpage(video_id)
	1731	webpage = urllib2.urlopen(request).read()
	1732	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1733	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1734	return
	1735
	1736	# Extract URL, uploader, and title from webpage
	1737	self.report_extraction(video_id)
	1738	mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
	1739	if mobj is None:
	1740	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1741	return
	1742	mediaURL = urllib.unquote(mobj.group(1))
	1743
	1744	video_url = mediaURL
	1745
	1746	mobj = re.search(r'<title>(.) video by (.) - Photobucket</title>', webpage)
	1747	if mobj is None:
	1748	self._downloader.trouble(u'ERROR: unable to extract title')
	1749	return
	1750	video_title = mobj.group(1).decode('utf-8')
	1751	video_title = sanitize_title(video_title)
	1752	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	1753
	1754	video_uploader = mobj.group(2).decode('utf-8')
	1755
	1756	try:
	1757	# Process video information
	1758	self._downloader.process_info({
	1759	'id': video_id.decode('utf-8'),
	1760	'url': video_url.decode('utf-8'),
	1761	'uploader': video_uploader,
	1762	'upload_date': u'NA',
	1763	'title': video_title,
	1764	'stitle': simple_title,
	1765	'ext': video_extension.decode('utf-8'),
	1766	'format': u'NA',
	1767	'player_url': None,
	1768	})
	1769	except UnavailableVideoError:
	1770	self._downloader.trouble(u'\nERROR: unable to download video')
	1771
	1772
	1773	class YahooIE(InfoExtractor):
	1774	"""Information extractor for video.yahoo.com."""
	1775
	1776	# _VALID_URL matches all Yahoo! Video URLs
	1777	# _VPAGE_URL matches only the extractable '/watch/' URLs
	1778	_VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch\|network)/([0-9]+)(?:/\|\?v=)([0-9]+)(?:[#\?].*)?'
	1779	_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
	1780
	1781	def __init__(self, downloader=None):
	1782	InfoExtractor.__init__(self, downloader)
	1783
	1784	@staticmethod
	1785	def suitable(url):
	1786	return (re.match(YahooIE._VALID_URL, url) is not None)
	1787
	1788	def report_download_webpage(self, video_id):
	1789	"""Report webpage download."""
	1790	self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
	1791
	1792	def report_extraction(self, video_id):
	1793	"""Report information extraction."""
	1794	self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
	1795
	1796	def _real_initialize(self):
	1797	return
	1798
	1799	def _real_extract(self, url, new_video=True):
	1800	# Extract ID from URL
	1801	mobj = re.match(self._VALID_URL, url)
	1802	if mobj is None:
	1803	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1804	return
	1805
	1806	# At this point we have a new video
	1807	self._downloader.increment_downloads()
	1808	video_id = mobj.group(2)
	1809	video_extension = 'flv'
	1810
	1811	# Rewrite valid but non-extractable URLs as
	1812	# extractable English language /watch/ URLs
	1813	if re.match(self._VPAGE_URL, url) is None:
	1814	request = urllib2.Request(url)
	1815	try:
	1816	webpage = urllib2.urlopen(request).read()
	1817	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1818	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1819	return
	1820
	1821	mobj = re.search(r'$"id", "([0-9]+)"$;', webpage)
	1822	if mobj is None:
	1823	self._downloader.trouble(u'ERROR: Unable to extract id field')
	1824	return
	1825	yahoo_id = mobj.group(1)
	1826
	1827	mobj = re.search(r'$"vid", "([0-9]+)"$;', webpage)
	1828	if mobj is None:
	1829	self._downloader.trouble(u'ERROR: Unable to extract vid field')
	1830	return
	1831	yahoo_vid = mobj.group(1)
	1832
	1833	url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
	1834	return self._real_extract(url, new_video=False)
	1835
	1836	# Retrieve video webpage to extract further information
	1837	request = urllib2.Request(url)
	1838	try:
	1839	self.report_download_webpage(video_id)
	1840	webpage = urllib2.urlopen(request).read()
	1841	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1842	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1843	return
	1844
	1845	# Extract uploader and title from webpage
	1846	self.report_extraction(video_id)
	1847	mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
	1848	if mobj is None:
	1849	self._downloader.trouble(u'ERROR: unable to extract video title')
	1850	return
	1851	video_title = mobj.group(1).decode('utf-8')
	1852	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	1853
	1854	mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people\|profile)/[0-9]+" beacon=".">(.)</a></h2>', webpage)
	1855	if mobj is None:
	1856	self._downloader.trouble(u'ERROR: unable to extract video uploader')
	1857	return
	1858	video_uploader = mobj.group(1).decode('utf-8')
	1859
	1860	# Extract video thumbnail
	1861	mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
	1862	if mobj is None:
	1863	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	1864	return
	1865	video_thumbnail = mobj.group(1).decode('utf-8')
	1866
	1867	# Extract video description
	1868	mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
	1869	if mobj is None:
	1870	self._downloader.trouble(u'ERROR: unable to extract video description')
	1871	return
	1872	video_description = mobj.group(1).decode('utf-8')
	1873	if not video_description:
	1874	video_description = 'No description available.'
	1875
	1876	# Extract video height and width
	1877	mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
	1878	if mobj is None:
	1879	self._downloader.trouble(u'ERROR: unable to extract video height')
	1880	return
	1881	yv_video_height = mobj.group(1)
	1882
	1883	mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
	1884	if mobj is None:
	1885	self._downloader.trouble(u'ERROR: unable to extract video width')
	1886	return
	1887	yv_video_width = mobj.group(1)
	1888
	1889	# Retrieve video playlist to extract media URL
	1890	# I'm not completely sure what all these options are, but we
	1891	# seem to need most of them, otherwise the server sends a 401.
	1892	yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
	1893	yv_bitrate = '700' # according to Wikipedia this is hard-coded
	1894	request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
	1895	'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
	1896	'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
	1897	try:
	1898	self.report_download_webpage(video_id)
	1899	webpage = urllib2.urlopen(request).read()
	1900	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1901	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1902	return
	1903
	1904	# Extract media URL from playlist XML
	1905	mobj = re.search(r'<STREAM APP="(http://.)" FULLPATH="/?(/.\.flv\?[^"]*)"', webpage)
	1906	if mobj is None:
	1907	self._downloader.trouble(u'ERROR: Unable to extract media URL')
	1908	return
	1909	video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
	1910	video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
	1911
	1912	try:
	1913	# Process video information
	1914	self._downloader.process_info({
	1915	'id': video_id.decode('utf-8'),
	1916	'url': video_url,
	1917	'uploader': video_uploader,
	1918	'upload_date': u'NA',
	1919	'title': video_title,
	1920	'stitle': simple_title,
	1921	'ext': video_extension.decode('utf-8'),
	1922	'thumbnail': video_thumbnail.decode('utf-8'),
	1923	'description': video_description,
	1924	'thumbnail': video_thumbnail,
	1925	'player_url': None,
	1926	})
	1927	except UnavailableVideoError:
	1928	self._downloader.trouble(u'\nERROR: unable to download video')
	1929
	1930
	1931	class VimeoIE(InfoExtractor):
	1932	"""Information extractor for vimeo.com."""
	1933
	1934	# _VALID_URL matches Vimeo URLs
	1935	_VALID_URL = r'(?:https?://)?(?:(?:www\|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
	1936
	1937	def __init__(self, downloader=None):
	1938	InfoExtractor.__init__(self, downloader)
	1939
	1940	@staticmethod
	1941	def suitable(url):
	1942	return (re.match(VimeoIE._VALID_URL, url) is not None)
	1943
	1944	def report_download_webpage(self, video_id):
	1945	"""Report webpage download."""
	1946	self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
	1947
	1948	def report_extraction(self, video_id):
	1949	"""Report information extraction."""
	1950	self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
	1951
	1952	def _real_initialize(self):
	1953	return
	1954
	1955	def _real_extract(self, url, new_video=True):
	1956	# Extract ID from URL
	1957	mobj = re.match(self._VALID_URL, url)
	1958	if mobj is None:
	1959	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1960	return
	1961
	1962	# At this point we have a new video
	1963	self._downloader.increment_downloads()
	1964	video_id = mobj.group(1)
	1965
	1966	# Retrieve video webpage to extract further information
	1967	request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
	1968	try:
	1969	self.report_download_webpage(video_id)
	1970	webpage = urllib2.urlopen(request).read()
	1971	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1972	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1973	return
	1974
	1975	# Now we begin extracting as much information as we can from what we
	1976	# retrieved. First we extract the information common to all extractors,
	1977	# and latter we extract those that are Vimeo specific.
	1978	self.report_extraction(video_id)
	1979
	1980	# Extract title
	1981	mobj = re.search(r'<caption>(.*?)</caption>', webpage)
	1982	if mobj is None:
	1983	self._downloader.trouble(u'ERROR: unable to extract video title')
	1984	return
	1985	video_title = mobj.group(1).decode('utf-8')
	1986	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	1987
	1988	# Extract uploader
	1989	mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
	1990	if mobj is None:
	1991	self._downloader.trouble(u'ERROR: unable to extract video uploader')
	1992	return
	1993	video_uploader = mobj.group(1).decode('utf-8')
	1994
	1995	# Extract video thumbnail
	1996	mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
	1997	if mobj is None:
	1998	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	1999	return
	2000	video_thumbnail = mobj.group(1).decode('utf-8')
	2001
	2002	# # Extract video description
	2003	# mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
	2004	# if mobj is None:
	2005	# self._downloader.trouble(u'ERROR: unable to extract video description')
	2006	# return
	2007	# video_description = mobj.group(1).decode('utf-8')
	2008	# if not video_description: video_description = 'No description available.'
	2009	video_description = 'Foo.'
	2010
	2011	# Vimeo specific: extract request signature
	2012	mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
	2013	if mobj is None:
	2014	self._downloader.trouble(u'ERROR: unable to extract request signature')
	2015	return
	2016	sig = mobj.group(1).decode('utf-8')
	2017
	2018	# Vimeo specific: Extract request signature expiration
	2019	mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
	2020	if mobj is None:
	2021	self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
	2022	return
	2023	sig_exp = mobj.group(1).decode('utf-8')
	2024
	2025	video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
	2026
	2027	try:
	2028	# Process video information
	2029	self._downloader.process_info({
	2030	'id': video_id.decode('utf-8'),
	2031	'url': video_url,
	2032	'uploader': video_uploader,
	2033	'upload_date': u'NA',
	2034	'title': video_title,
	2035	'stitle': simple_title,
	2036	'ext': u'mp4',
	2037	'thumbnail': video_thumbnail.decode('utf-8'),
	2038	'description': video_description,
	2039	'thumbnail': video_thumbnail,
	2040	'description': video_description,
	2041	'player_url': None,
	2042	})
	2043	except UnavailableVideoError:
	2044	self._downloader.trouble(u'ERROR: unable to download video')
	2045
	2046
	2047	class GenericIE(InfoExtractor):
	2048	"""Generic last-resort information extractor."""
	2049
	2050	def __init__(self, downloader=None):
	2051	InfoExtractor.__init__(self, downloader)
	2052
	2053	@staticmethod
	2054	def suitable(url):
	2055	return True
	2056
	2057	def report_download_webpage(self, video_id):
	2058	"""Report webpage download."""
	2059	self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
	2060	self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
	2061
	2062	def report_extraction(self, video_id):
	2063	"""Report information extraction."""
	2064	self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
	2065
	2066	def _real_initialize(self):
	2067	return
	2068
	2069	def _real_extract(self, url):
	2070	# At this point we have a new video
	2071	self._downloader.increment_downloads()
	2072
	2073	video_id = url.split('/')[-1]
	2074	request = urllib2.Request(url)
	2075	try:
	2076	self.report_download_webpage(video_id)
	2077	webpage = urllib2.urlopen(request).read()
	2078	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2079	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	2080	return
	2081	except ValueError, err:
	2082	# since this is the last-resort InfoExtractor, if
	2083	# this error is thrown, it'll be thrown here
	2084	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2085	return
	2086
	2087	self.report_extraction(video_id)
	2088	# Start with something easy: JW Player in SWFObject
	2089	mobj = re.search(r'flashvars: [\'"](?:.&)?file=(http[^\'"&])', webpage)
	2090	if mobj is None:
	2091	# Broaden the search a little bit
	2092	mobj = re.search(r'[^A-Za-z0-9]?(?:file\|source)=(http[^\'"&]*)', webpage)
	2093	if mobj is None:
	2094	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2095	return
	2096
	2097	# It's possible that one of the regexes
	2098	# matched, but returned an empty group:
	2099	if mobj.group(1) is None:
	2100	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2101	return
	2102
	2103	video_url = urllib.unquote(mobj.group(1))
	2104	video_id = os.path.basename(video_url)
	2105
	2106	# here's a fun little line of code for you:
	2107	video_extension = os.path.splitext(video_id)[1][1:]
	2108	video_id = os.path.splitext(video_id)[0]
	2109
	2110	# it's tempting to parse this further, but you would
	2111	# have to take into account all the variations like
	2112	# Video Title - Site Name
	2113	# Site Name \| Video Title
	2114	# Video Title - Tagline \| Site Name
	2115	# and so on and so forth; it's just not practical
	2116	mobj = re.search(r'<title>(.*)</title>', webpage)
	2117	if mobj is None:
	2118	self._downloader.trouble(u'ERROR: unable to extract title')
	2119	return
	2120	video_title = mobj.group(1).decode('utf-8')
	2121	video_title = sanitize_title(video_title)
	2122	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	2123
	2124	# video uploader is domain name
	2125	mobj = re.match(r'(?:https?://)?([^/])/.', url)
	2126	if mobj is None:
	2127	self._downloader.trouble(u'ERROR: unable to extract title')
	2128	return
	2129	video_uploader = mobj.group(1).decode('utf-8')
	2130
	2131	try:
	2132	# Process video information
	2133	self._downloader.process_info({
	2134	'id': video_id.decode('utf-8'),
	2135	'url': video_url.decode('utf-8'),
	2136	'uploader': video_uploader,
	2137	'upload_date': u'NA',
	2138	'title': video_title,
	2139	'stitle': simple_title,
	2140	'ext': video_extension.decode('utf-8'),
	2141	'format': u'NA',
	2142	'player_url': None,
	2143	})
	2144	except UnavailableVideoError, err:
	2145	self._downloader.trouble(u'\nERROR: unable to download video')
	2146
	2147
	2148	class YoutubeSearchIE(InfoExtractor):
	2149	"""Information Extractor for YouTube search queries."""
	2150	_VALID_QUERY = r'ytsearch(\d+\|all)?:[\s\S]+'
	2151	_TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
	2152	_VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
	2153	_MORE_PAGES_INDICATOR = r'(?m)>\sNext\s</a>'
	2154	_youtube_ie = None
	2155	_max_youtube_results = 1000
	2156
	2157	def __init__(self, youtube_ie, downloader=None):
	2158	InfoExtractor.__init__(self, downloader)
	2159	self._youtube_ie = youtube_ie
	2160
	2161	@staticmethod
	2162	def suitable(url):
	2163	return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
	2164
	2165	def report_download_page(self, query, pagenum):
	2166	"""Report attempt to download playlist page with given number."""
	2167	query = query.decode(preferredencoding())
	2168	self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
	2169
	2170	def _real_initialize(self):
	2171	self._youtube_ie.initialize()
	2172
	2173	def _real_extract(self, query):
	2174	mobj = re.match(self._VALID_QUERY, query)
	2175	if mobj is None:
	2176	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2177	return
	2178
	2179	prefix, query = query.split(':')
	2180	prefix = prefix[8:]
	2181	query = query.encode('utf-8')
	2182	if prefix == '':
	2183	self._download_n_results(query, 1)
	2184	return
	2185	elif prefix == 'all':
	2186	self._download_n_results(query, self._max_youtube_results)
	2187	return
	2188	else:
	2189	try:
	2190	n = long(prefix)
	2191	if n <= 0:
	2192	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2193	return
	2194	elif n > self._max_youtube_results:
	2195	self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
	2196	n = self._max_youtube_results
	2197	self._download_n_results(query, n)
	2198	return
	2199	except ValueError: # parsing prefix as integer fails
	2200	self._download_n_results(query, 1)
	2201	return
	2202
	2203	def _download_n_results(self, query, n):
	2204	"""Downloads a specified number of results for a query"""
	2205
	2206	video_ids = []
	2207	already_seen = set()
	2208	pagenum = 1
	2209
	2210	while True:
	2211	self.report_download_page(query, pagenum)
	2212	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2213	request = urllib2.Request(result_url)
	2214	try:
	2215	page = urllib2.urlopen(request).read()
	2216	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2217	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2218	return
	2219
	2220	# Extract video identifiers
	2221	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2222	video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
	2223	if video_id not in already_seen:
	2224	video_ids.append(video_id)
	2225	already_seen.add(video_id)
	2226	if len(video_ids) == n:
	2227	# Specified n videos reached
	2228	for id in video_ids:
	2229	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2230	return
	2231
	2232	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2233	for id in video_ids:
	2234	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2235	return
	2236
	2237	pagenum = pagenum + 1
	2238
	2239
	2240	class GoogleSearchIE(InfoExtractor):
	2241	"""Information Extractor for Google Video search queries."""
	2242	_VALID_QUERY = r'gvsearch(\d+\|all)?:[\s\S]+'
	2243	_TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
	2244	_VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
	2245	_MORE_PAGES_INDICATOR = r'<span>Next</span>'
	2246	_google_ie = None
	2247	_max_google_results = 1000
	2248
	2249	def __init__(self, google_ie, downloader=None):
	2250	InfoExtractor.__init__(self, downloader)
	2251	self._google_ie = google_ie
	2252
	2253	@staticmethod
	2254	def suitable(url):
	2255	return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
	2256
	2257	def report_download_page(self, query, pagenum):
	2258	"""Report attempt to download playlist page with given number."""
	2259	query = query.decode(preferredencoding())
	2260	self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
	2261
	2262	def _real_initialize(self):
	2263	self._google_ie.initialize()
	2264
	2265	def _real_extract(self, query):
	2266	mobj = re.match(self._VALID_QUERY, query)
	2267	if mobj is None:
	2268	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2269	return
	2270
	2271	prefix, query = query.split(':')
	2272	prefix = prefix[8:]
	2273	query = query.encode('utf-8')
	2274	if prefix == '':
	2275	self._download_n_results(query, 1)
	2276	return
	2277	elif prefix == 'all':
	2278	self._download_n_results(query, self._max_google_results)
	2279	return
	2280	else:
	2281	try:
	2282	n = long(prefix)
	2283	if n <= 0:
	2284	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2285	return
	2286	elif n > self._max_google_results:
	2287	self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
	2288	n = self._max_google_results
	2289	self._download_n_results(query, n)
	2290	return
	2291	except ValueError: # parsing prefix as integer fails
	2292	self._download_n_results(query, 1)
	2293	return
	2294
	2295	def _download_n_results(self, query, n):
	2296	"""Downloads a specified number of results for a query"""
	2297
	2298	video_ids = []
	2299	already_seen = set()
	2300	pagenum = 1
	2301
	2302	while True:
	2303	self.report_download_page(query, pagenum)
	2304	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2305	request = urllib2.Request(result_url)
	2306	try:
	2307	page = urllib2.urlopen(request).read()
	2308	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2309	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2310	return
	2311
	2312	# Extract video identifiers
	2313	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2314	video_id = mobj.group(1)
	2315	if video_id not in already_seen:
	2316	video_ids.append(video_id)
	2317	already_seen.add(video_id)
	2318	if len(video_ids) == n:
	2319	# Specified n videos reached
	2320	for id in video_ids:
	2321	self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
	2322	return
	2323
	2324	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2325	for id in video_ids:
	2326	self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
	2327	return
	2328
	2329	pagenum = pagenum + 1
	2330
	2331
	2332	class YahooSearchIE(InfoExtractor):
	2333	"""Information Extractor for Yahoo! Video search queries."""
	2334	_VALID_QUERY = r'yvsearch(\d+\|all)?:[\s\S]+'
	2335	_TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
	2336	_VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
	2337	_MORE_PAGES_INDICATOR = r'\s*Next'
	2338	_yahoo_ie = None
	2339	_max_yahoo_results = 1000
	2340
	2341	def __init__(self, yahoo_ie, downloader=None):
	2342	InfoExtractor.__init__(self, downloader)
	2343	self._yahoo_ie = yahoo_ie
	2344
	2345	@staticmethod
	2346	def suitable(url):
	2347	return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
	2348
	2349	def report_download_page(self, query, pagenum):
	2350	"""Report attempt to download playlist page with given number."""
	2351	query = query.decode(preferredencoding())
	2352	self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
	2353
	2354	def _real_initialize(self):
	2355	self._yahoo_ie.initialize()
	2356
	2357	def _real_extract(self, query):
	2358	mobj = re.match(self._VALID_QUERY, query)
	2359	if mobj is None:
	2360	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2361	return
	2362
	2363	prefix, query = query.split(':')
	2364	prefix = prefix[8:]
	2365	query = query.encode('utf-8')
	2366	if prefix == '':
	2367	self._download_n_results(query, 1)
	2368	return
	2369	elif prefix == 'all':
	2370	self._download_n_results(query, self._max_yahoo_results)
	2371	return
	2372	else:
	2373	try:
	2374	n = long(prefix)
	2375	if n <= 0:
	2376	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2377	return
	2378	elif n > self._max_yahoo_results:
	2379	self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
	2380	n = self._max_yahoo_results
	2381	self._download_n_results(query, n)
	2382	return
	2383	except ValueError: # parsing prefix as integer fails
	2384	self._download_n_results(query, 1)
	2385	return
	2386
	2387	def _download_n_results(self, query, n):
	2388	"""Downloads a specified number of results for a query"""
	2389
	2390	video_ids = []
	2391	already_seen = set()
	2392	pagenum = 1
	2393
	2394	while True:
	2395	self.report_download_page(query, pagenum)
	2396	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2397	request = urllib2.Request(result_url)
	2398	try:
	2399	page = urllib2.urlopen(request).read()
	2400	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2401	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2402	return
	2403
	2404	# Extract video identifiers
	2405	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2406	video_id = mobj.group(1)
	2407	if video_id not in already_seen:
	2408	video_ids.append(video_id)
	2409	already_seen.add(video_id)
	2410	if len(video_ids) == n:
	2411	# Specified n videos reached
	2412	for id in video_ids:
	2413	self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
	2414	return
	2415
	2416	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2417	for id in video_ids:
	2418	self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
	2419	return
	2420
	2421	pagenum = pagenum + 1
	2422
	2423
	2424	class YoutubePlaylistIE(InfoExtractor):
	2425	"""Information Extractor for YouTube playlists."""
	2426
	2427	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list\|my_playlists\|artist\|playlist)\?.?(p\|a\|list)=\|user/.?/user/\|p/\|user/.?#[pg]/c/)([0-9A-Za-z]+)(?:/.?/([0-9A-Za-z_-]+))?.*'
	2428	_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
	2429	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
	2430	_MORE_PAGES_INDICATOR = r'(?m)>\sNext\s</a>'
	2431	_youtube_ie = None
	2432
	2433	def __init__(self, youtube_ie, downloader=None):
	2434	InfoExtractor.__init__(self, downloader)
	2435	self._youtube_ie = youtube_ie
	2436
	2437	@staticmethod
	2438	def suitable(url):
	2439	return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
	2440
	2441	def report_download_page(self, playlist_id, pagenum):
	2442	"""Report attempt to download playlist page with given number."""
	2443	self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
	2444
	2445	def _real_initialize(self):
	2446	self._youtube_ie.initialize()
	2447
	2448	def _real_extract(self, url):
	2449	# Extract playlist id
	2450	mobj = re.match(self._VALID_URL, url)
	2451	if mobj is None:
	2452	self._downloader.trouble(u'ERROR: invalid url: %s' % url)
	2453	return
	2454
	2455	# Single video case
	2456	if mobj.group(3) is not None:
	2457	self._youtube_ie.extract(mobj.group(3))
	2458	return
	2459
	2460	# Download playlist pages
	2461	# prefix is 'p' as default for playlists but there are other types that need extra care
	2462	playlist_prefix = mobj.group(1)
	2463	if playlist_prefix == 'a':
	2464	playlist_access = 'artist'
	2465	else:
	2466	playlist_prefix = 'p'
	2467	playlist_access = 'view_play_list'
	2468	playlist_id = mobj.group(2)
	2469	video_ids = []
	2470	pagenum = 1
	2471
	2472	while True:
	2473	self.report_download_page(playlist_id, pagenum)
	2474	request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
	2475	try:
	2476	page = urllib2.urlopen(request).read()
	2477	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2478	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2479	return
	2480
	2481	# Extract video identifiers
	2482	ids_in_page = []
	2483	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2484	if mobj.group(1) not in ids_in_page:
	2485	ids_in_page.append(mobj.group(1))
	2486	video_ids.extend(ids_in_page)
	2487
	2488	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2489	break
	2490	pagenum = pagenum + 1
	2491
	2492	playliststart = self._downloader.params.get('playliststart', 1) - 1
	2493	playlistend = self._downloader.params.get('playlistend', -1)
	2494	video_ids = video_ids[playliststart:playlistend]
	2495
	2496	for id in video_ids:
	2497	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2498	return
	2499
	2500
	2501	class YoutubeUserIE(InfoExtractor):
	2502	"""Information Extractor for YouTube users."""
	2503
	2504	_VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)\|ytuser:)([A-Za-z0-9_-]+)'
	2505	_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
	2506	_GDATA_PAGE_SIZE = 50
	2507	_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
	2508	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
	2509	_youtube_ie = None
	2510
	2511	def __init__(self, youtube_ie, downloader=None):
	2512	InfoExtractor.__init__(self, downloader)
	2513	self._youtube_ie = youtube_ie
	2514
	2515	@staticmethod
	2516	def suitable(url):
	2517	return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
	2518
	2519	def report_download_page(self, username, start_index):
	2520	"""Report attempt to download user page."""
	2521	self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
	2522	(username, start_index, start_index + self._GDATA_PAGE_SIZE))
	2523
	2524	def _real_initialize(self):
	2525	self._youtube_ie.initialize()
	2526
	2527	def _real_extract(self, url):
	2528	# Extract username
	2529	mobj = re.match(self._VALID_URL, url)
	2530	if mobj is None:
	2531	self._downloader.trouble(u'ERROR: invalid url: %s' % url)
	2532	return
	2533
	2534	username = mobj.group(1)
	2535
	2536	# Download video ids using YouTube Data API. Result size per
	2537	# query is limited (currently to 50 videos) so we need to query
	2538	# page by page until there are no video ids - it means we got
	2539	# all of them.
	2540
	2541	video_ids = []
	2542	pagenum = 0
	2543
	2544	while True:
	2545	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	2546	self.report_download_page(username, start_index)
	2547
	2548	request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
	2549
	2550	try:
	2551	page = urllib2.urlopen(request).read()
	2552	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2553	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2554	return
	2555
	2556	# Extract video identifiers
	2557	ids_in_page = []
	2558
	2559	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2560	if mobj.group(1) not in ids_in_page:
	2561	ids_in_page.append(mobj.group(1))
	2562
	2563	video_ids.extend(ids_in_page)
	2564
	2565	# A little optimization - if current page is not
	2566	# "full", ie. does not contain PAGE_SIZE video ids then
	2567	# we can assume that this page is the last one - there
	2568	# are no more ids on further pages - no need to query
	2569	# again.
	2570
	2571	if len(ids_in_page) < self._GDATA_PAGE_SIZE:
	2572	break
	2573
	2574	pagenum += 1
	2575
	2576	all_ids_count = len(video_ids)
	2577	playliststart = self._downloader.params.get('playliststart', 1) - 1
	2578	playlistend = self._downloader.params.get('playlistend', -1)
	2579
	2580	if playlistend == -1:
	2581	video_ids = video_ids[playliststart:]
	2582	else:
	2583	video_ids = video_ids[playliststart:playlistend]
	2584
	2585	self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
	2586	(username, all_ids_count, len(video_ids)))
	2587
	2588	for video_id in video_ids:
	2589	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
	2590
	2591
	2592	class DepositFilesIE(InfoExtractor):
	2593	"""Information extractor for depositfiles.com"""
	2594
	2595	_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
	2596
	2597	def __init__(self, downloader=None):
	2598	InfoExtractor.__init__(self, downloader)
	2599
	2600	@staticmethod
	2601	def suitable(url):
	2602	return (re.match(DepositFilesIE._VALID_URL, url) is not None)
	2603
	2604	def report_download_webpage(self, file_id):
	2605	"""Report webpage download."""
	2606	self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
	2607
	2608	def report_extraction(self, file_id):
	2609	"""Report information extraction."""
	2610	self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
	2611
	2612	def _real_initialize(self):
	2613	return
	2614
	2615	def _real_extract(self, url):
	2616	# At this point we have a new file
	2617	self._downloader.increment_downloads()
	2618
	2619	file_id = url.split('/')[-1]
	2620	# Rebuild url in english locale
	2621	url = 'http://depositfiles.com/en/files/' + file_id
	2622
	2623	# Retrieve file webpage with 'Free download' button pressed
	2624	free_download_indication = { 'gateway_result' : '1' }
	2625	request = urllib2.Request(url, urllib.urlencode(free_download_indication))
	2626	try:
	2627	self.report_download_webpage(file_id)
	2628	webpage = urllib2.urlopen(request).read()
	2629	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2630	self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
	2631	return
	2632
	2633	# Search for the real file URL
	2634	mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
	2635	if (mobj is None) or (mobj.group(1) is None):
	2636	# Try to figure out reason of the error.
	2637	mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
	2638	if (mobj is not None) and (mobj.group(1) is not None):
	2639	restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
	2640	self._downloader.trouble(u'ERROR: %s' % restriction_message)
	2641	else:
	2642	self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
	2643	return
	2644
	2645	file_url = mobj.group(1)
	2646	file_extension = os.path.splitext(file_url)[1][1:]
	2647
	2648	# Search for file title
	2649	mobj = re.search(r'<b title="(.*?)">', webpage)
	2650	if mobj is None:
	2651	self._downloader.trouble(u'ERROR: unable to extract title')
	2652	return
	2653	file_title = mobj.group(1).decode('utf-8')
	2654
	2655	try:
	2656	# Process file information
	2657	self._downloader.process_info({
	2658	'id': file_id.decode('utf-8'),
	2659	'url': file_url.decode('utf-8'),
	2660	'uploader': u'NA',
	2661	'upload_date': u'NA',
	2662	'title': file_title,
	2663	'stitle': file_title,
	2664	'ext': file_extension.decode('utf-8'),
	2665	'format': u'NA',
	2666	'player_url': None,
	2667	})
	2668	except UnavailableVideoError, err:
	2669	self._downloader.trouble(u'ERROR: unable to download file')
	2670
	2671
	2672	class FacebookIE(InfoExtractor):
	2673	"""Information Extractor for Facebook"""
	2674
	2675	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.?)v=(?P<ID>\d+)(?:.)'
	2676	_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
	2677	_NETRC_MACHINE = 'facebook'
	2678	_available_formats = ['highqual', 'lowqual']
	2679	_video_extensions = {
	2680	'highqual': 'mp4',
	2681	'lowqual': 'mp4',
	2682	}
	2683
	2684	def __init__(self, downloader=None):
	2685	InfoExtractor.__init__(self, downloader)
	2686
	2687	@staticmethod
	2688	def suitable(url):
	2689	return (re.match(FacebookIE._VALID_URL, url) is not None)
	2690
	2691	def _reporter(self, message):
	2692	"""Add header and report message."""
	2693	self._downloader.to_screen(u'[facebook] %s' % message)
	2694
	2695	def report_login(self):
	2696	"""Report attempt to log in."""
	2697	self._reporter(u'Logging in')
	2698
	2699	def report_video_webpage_download(self, video_id):
	2700	"""Report attempt to download video webpage."""
	2701	self._reporter(u'%s: Downloading video webpage' % video_id)
	2702
	2703	def report_information_extraction(self, video_id):
	2704	"""Report attempt to extract video information."""
	2705	self._reporter(u'%s: Extracting video information' % video_id)
	2706
	2707	def _parse_page(self, video_webpage):
	2708	"""Extract video information from page"""
	2709	# General data
	2710	data = {'title': r'class="video_title datawrap">(.*?)</',
	2711	'description': r'<div class="datawrap">(.*?)</div>',
	2712	'owner': r'$"video_owner_name", "(.*?)"$',
	2713	'upload_date': r'data-date="(.*?)"',
	2714	'thumbnail': r'$"thumb_url", "(?P<THUMB>.*?)"$',
	2715	}
	2716	video_info = {}
	2717	for piece in data.keys():
	2718	mobj = re.search(data[piece], video_webpage)
	2719	if mobj is not None:
	2720	video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
	2721
	2722	# Video urls
	2723	video_urls = {}
	2724	for fmt in self._available_formats:
	2725	mobj = re.search(r'$"%s_src\", "(.+?)"$' % fmt, video_webpage)
	2726	if mobj is not None:
	2727	# URL is in a Javascript segment inside an escaped Unicode format within
	2728	# the generally utf-8 page
	2729	video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
	2730	video_info['video_urls'] = video_urls
	2731
	2732	return video_info
	2733
	2734	def _real_initialize(self):
	2735	if self._downloader is None:
	2736	return
	2737
	2738	useremail = None
	2739	password = None
	2740	downloader_params = self._downloader.params
	2741
	2742	# Attempt to use provided username and password or .netrc data
	2743	if downloader_params.get('username', None) is not None:
	2744	useremail = downloader_params['username']
	2745	password = downloader_params['password']
	2746	elif downloader_params.get('usenetrc', False):
	2747	try:
	2748	info = netrc.netrc().authenticators(self._NETRC_MACHINE)
	2749	if info is not None:
	2750	useremail = info[0]
	2751	password = info[2]
	2752	else:
	2753	raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
	2754	except (IOError, netrc.NetrcParseError), err:
	2755	self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
	2756	return
	2757
	2758	if useremail is None:
	2759	return
	2760
	2761	# Log in
	2762	login_form = {
	2763	'email': useremail,
	2764	'pass': password,
	2765	'login': 'Log+In'
	2766	}
	2767	request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
	2768	try:
	2769	self.report_login()
	2770	login_results = urllib2.urlopen(request).read()
	2771	if re.search(r'<form(.)name="login"(.)</form>', login_results) is not None:
	2772	self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
	2773	return
	2774	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2775	self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
	2776	return
	2777
	2778	def _real_extract(self, url):
	2779	mobj = re.match(self._VALID_URL, url)
	2780	if mobj is None:
	2781	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	2782	return
	2783	video_id = mobj.group('ID')
	2784
	2785	# Get video webpage
	2786	self.report_video_webpage_download(video_id)
	2787	request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
	2788	try:
	2789	page = urllib2.urlopen(request)
	2790	video_webpage = page.read()
	2791	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2792	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	2793	return
	2794
	2795	# Start extracting information
	2796	self.report_information_extraction(video_id)
	2797
	2798	# Extract information
	2799	video_info = self._parse_page(video_webpage)
	2800
	2801	# uploader
	2802	if 'owner' not in video_info:
	2803	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	2804	return
	2805	video_uploader = video_info['owner']
	2806
	2807	# title
	2808	if 'title' not in video_info:
	2809	self._downloader.trouble(u'ERROR: unable to extract video title')
	2810	return
	2811	video_title = video_info['title']
	2812	video_title = video_title.decode('utf-8')
	2813	video_title = sanitize_title(video_title)
	2814
	2815	# simplified title
	2816	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	2817	simple_title = simple_title.strip(ur'_')
	2818
	2819	# thumbnail image
	2820	if 'thumbnail' not in video_info:
	2821	self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
	2822	video_thumbnail = ''
	2823	else:
	2824	video_thumbnail = video_info['thumbnail']
	2825
	2826	# upload date
	2827	upload_date = u'NA'
	2828	if 'upload_date' in video_info:
	2829	upload_time = video_info['upload_date']
	2830	timetuple = email.utils.parsedate_tz(upload_time)
	2831	if timetuple is not None:
	2832	try:
	2833	upload_date = time.strftime('%Y%m%d', timetuple[0:9])
	2834	except:
	2835	pass
	2836
	2837	# description
	2838	video_description = video_info.get('description', 'No description available.')
	2839
	2840	url_map = video_info['video_urls']
	2841	if len(url_map.keys()) > 0:
	2842	# Decide which formats to download
	2843	req_format = self._downloader.params.get('format', None)
	2844	format_limit = self._downloader.params.get('format_limit', None)
	2845
	2846	if format_limit is not None and format_limit in self._available_formats:
	2847	format_list = self._available_formats[self._available_formats.index(format_limit):]
	2848	else:
	2849	format_list = self._available_formats
	2850	existing_formats = [x for x in format_list if x in url_map]
	2851	if len(existing_formats) == 0:
	2852	self._downloader.trouble(u'ERROR: no known formats available for video')
	2853	return
	2854	if req_format is None:
	2855	video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
	2856	elif req_format == '-1':
	2857	video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
	2858	else:
	2859	# Specific format
	2860	if req_format not in url_map:
	2861	self._downloader.trouble(u'ERROR: requested format not available')
	2862	return
	2863	video_url_list = [(req_format, url_map[req_format])] # Specific format
	2864
	2865	for format_param, video_real_url in video_url_list:
	2866
	2867	# At this point we have a new video
	2868	self._downloader.increment_downloads()
	2869
	2870	# Extension
	2871	video_extension = self._video_extensions.get(format_param, 'mp4')
	2872
	2873	try:
	2874	# Process video information
	2875	self._downloader.process_info({
	2876	'id': video_id.decode('utf-8'),
	2877	'url': video_real_url.decode('utf-8'),
	2878	'uploader': video_uploader.decode('utf-8'),
	2879	'upload_date': upload_date,
	2880	'title': video_title,
	2881	'stitle': simple_title,
	2882	'ext': video_extension.decode('utf-8'),
	2883	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	2884	'thumbnail': video_thumbnail.decode('utf-8'),
	2885	'description': video_description.decode('utf-8'),
	2886	'player_url': None,
	2887	})
	2888	except UnavailableVideoError, err:
	2889	self._downloader.trouble(u'\nERROR: unable to download video')
	2890
	2891	class BlipTVIE(InfoExtractor):
	2892	"""Information extractor for blip.tv"""
	2893
	2894	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
	2895	_URL_EXT = r'^.*\.([a-z0-9]+)$'
	2896
	2897	@staticmethod
	2898	def suitable(url):
	2899	return (re.match(BlipTVIE._VALID_URL, url) is not None)
	2900
	2901	def report_extraction(self, file_id):
	2902	"""Report information extraction."""
	2903	self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
	2904
	2905	def _simplify_title(self, title):
	2906	res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
	2907	res = res.strip(ur'_')
	2908	return res
	2909
	2910	def _real_extract(self, url):
	2911	mobj = re.match(self._VALID_URL, url)
	2912	if mobj is None:
	2913	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	2914	return
	2915
	2916	if '?' in url:
	2917	cchar = '&'
	2918	else:
	2919	cchar = '?'
	2920	json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
	2921	request = urllib2.Request(json_url)
	2922	self.report_extraction(mobj.group(1))
	2923	try:
	2924	json_code = urllib2.urlopen(request).read()
	2925	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2926	self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
	2927	return
	2928	try:
	2929	json_data = json.loads(json_code)
	2930	if 'Post' in json_data:
	2931	data = json_data['Post']
	2932	else:
	2933	data = json_data
	2934
	2935	upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
	2936	video_url = data['media']['url']
	2937	umobj = re.match(self._URL_EXT, video_url)
	2938	if umobj is None:
	2939	raise ValueError('Can not determine filename extension')
	2940	ext = umobj.group(1)
	2941
	2942	self._downloader.increment_downloads()
	2943
	2944	info = {
	2945	'id': data['item_id'],
	2946	'url': video_url,
	2947	'uploader': data['display_name'],
	2948	'upload_date': upload_date,
	2949	'title': data['title'],
	2950	'stitle': self._simplify_title(data['title']),
	2951	'ext': ext,
	2952	'format': data['media']['mimeType'],
	2953	'thumbnail': data['thumbnailUrl'],
	2954	'description': data['description'],
	2955	'player_url': data['embedUrl']
	2956	}
	2957	except (ValueError,KeyError), err:
	2958	self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
	2959	return
	2960
	2961	try:
	2962	self._downloader.process_info(info)
	2963	except UnavailableVideoError, err:
	2964	self._downloader.trouble(u'\nERROR: unable to download video')
	2965
	2966
	2967	class MyVideoIE(InfoExtractor):
	2968	"""Information Extractor for myvideo.de."""
	2969
	2970	_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
	2971
	2972	def __init__(self, downloader=None):
	2973	InfoExtractor.__init__(self, downloader)
	2974
	2975	@staticmethod
	2976	def suitable(url):
	2977	return (re.match(MyVideoIE._VALID_URL, url) is not None)
	2978
	2979	def report_download_webpage(self, video_id):
	2980	"""Report webpage download."""
	2981	self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
	2982
	2983	def report_extraction(self, video_id):
	2984	"""Report information extraction."""
	2985	self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
	2986
	2987	def _real_initialize(self):
	2988	return
	2989
	2990	def _real_extract(self,url):
	2991	mobj = re.match(self._VALID_URL, url)
	2992	if mobj is None:
	2993	self._download.trouble(u'ERROR: invalid URL: %s' % url)
	2994	return
	2995
	2996	video_id = mobj.group(1)
	2997	simple_title = mobj.group(2).decode('utf-8')
	2998	# should actually not be necessary
	2999	simple_title = sanitize_title(simple_title)
	3000	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
	3001
	3002	# Get video webpage
	3003	request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
	3004	try:
	3005	self.report_download_webpage(video_id)
	3006	webpage = urllib2.urlopen(request).read()
	3007	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3008	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	3009	return
	3010
	3011	self.report_extraction(video_id)
	3012	mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
	3013	webpage)
	3014	if mobj is None:
	3015	self._downloader.trouble(u'ERROR: unable to extract media URL')
	3016	return
	3017	video_url = mobj.group(1) + ('/%s.flv' % video_id)
	3018
	3019	mobj = re.search('<title>([^<]+)</title>', webpage)
	3020	if mobj is None:
	3021	self._downloader.trouble(u'ERROR: unable to extract title')
	3022	return
	3023
	3024	video_title = mobj.group(1)
	3025	video_title = sanitize_title(video_title)
	3026
	3027	try:
	3028	print(video_url)
	3029	self._downloader.process_info({
	3030	'id': video_id,
	3031	'url': video_url,
	3032	'uploader': u'NA',
	3033	'upload_date': u'NA',
	3034	'title': video_title,
	3035	'stitle': simple_title,
	3036	'ext': u'flv',
	3037	'format': u'NA',
	3038	'player_url': None,
	3039	})
	3040	except UnavailableVideoError:
	3041	self._downloader.trouble(u'\nERROR: Unable to download video')
	3042
	3043	class ComedyCentralIE(InfoExtractor):
	3044	"""Information extractor for The Daily Show and Colbert Report """
	3045
	3046	_VALID_URL = r'^(:(?P<shortname>tds\|thedailyshow\|cr\|colbert\|colbertnation\|colbertreport))\|(https?://)?(www\.)(?P<showname>thedailyshow\|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
	3047
	3048	@staticmethod
	3049	def suitable(url):
	3050	return (re.match(ComedyCentralIE._VALID_URL, url) is not None)
	3051
	3052	def report_extraction(self, episode_id):
	3053	self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
	3054
	3055	def report_config_download(self, episode_id):
	3056	self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
	3057
	3058	def report_player_url(self, episode_id):
	3059	self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
	3060
	3061	def _simplify_title(self, title):
	3062	res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
	3063	res = res.strip(ur'_')
	3064	return res
	3065
	3066	def _real_extract(self, url):
	3067	mobj = re.match(self._VALID_URL, url)
	3068	if mobj is None:
	3069	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3070	return
	3071
	3072	if mobj.group('shortname'):
	3073	if mobj.group('shortname') in ('tds', 'thedailyshow'):
	3074	url = 'http://www.thedailyshow.com/full-episodes/'
	3075	else:
	3076	url = 'http://www.colbertnation.com/full-episodes/'
	3077	mobj = re.match(self._VALID_URL, url)
	3078	assert mobj is not None
	3079
	3080	dlNewest = not mobj.group('episode')
	3081	if dlNewest:
	3082	epTitle = mobj.group('showname')
	3083	else:
	3084	epTitle = mobj.group('episode')
	3085
	3086	req = urllib2.Request(url)
	3087	self.report_extraction(epTitle)
	3088	try:
	3089	htmlHandle = urllib2.urlopen(req)
	3090	html = htmlHandle.read()
	3091	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3092	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
	3093	return
	3094	if dlNewest:
	3095	url = htmlHandle.geturl()
	3096	mobj = re.match(self._VALID_URL, url)
	3097	if mobj is None:
	3098	self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
	3099	return
	3100	if mobj.group('episode') == '':
	3101	self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
	3102	return
	3103	epTitle = mobj.group('episode')
	3104
	3105	mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/(.?:episode:([^:]):)(.*?))"/>', html)
	3106	if len(mMovieParams) == 0:
	3107	self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
	3108	return
	3109	show_id = mMovieParams[0][2]
	3110	ACT_COUNT = { # TODO: Detect this dynamically
	3111	'thedailyshow.com': 4,
	3112	'colbertnation.com': 3,
	3113	}.get(show_id, 4)
	3114	OFFSET = {
	3115	'thedailyshow.com': 1,
	3116	'colbertnation.com': 1,
	3117	}.get(show_id, 1)
	3118
	3119	first_player_url = mMovieParams[0][0]
	3120	startMediaNum = int(mMovieParams[0][3]) + OFFSET
	3121	movieId = mMovieParams[0][1]
	3122
	3123	playerReq = urllib2.Request(first_player_url)
	3124	self.report_player_url(epTitle)
	3125	try:
	3126	playerResponse = urllib2.urlopen(playerReq)
	3127	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3128	self._downloader.trouble(u'ERROR: unable to download player: %s' % unicode(err))
	3129	return
	3130	player_url = playerResponse.geturl()
	3131
	3132	for actNum in range(ACT_COUNT):
	3133	mediaNum = startMediaNum + actNum
	3134	mediaId = movieId + str(mediaNum)
	3135	configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
	3136	urllib.urlencode({'uri': mediaId}))
	3137	configReq = urllib2.Request(configUrl)
	3138	self.report_config_download(epTitle)
	3139	try:
	3140	configXml = urllib2.urlopen(configReq).read()
	3141	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3142	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
	3143	return
	3144
	3145	cdoc = xml.etree.ElementTree.fromstring(configXml)
	3146	turls = []
	3147	for rendition in cdoc.findall('.//rendition'):
	3148	finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
	3149	turls.append(finfo)
	3150
	3151	if len(turls) == 0:
	3152	self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum) + ': No videos found')
	3153	continue
	3154
	3155	# For now, just pick the highest bitrate
	3156	format,video_url = turls[-1]
	3157
	3158	self._downloader.increment_downloads()
	3159
	3160	effTitle = show_id.replace('.com', '') + '-' + epTitle
	3161	info = {
	3162	'id': str(mediaNum),
	3163	'url': video_url,
	3164	'uploader': show_id,
	3165	'upload_date': 'NA',
	3166	'title': effTitle,
	3167	'stitle': self._simplify_title(effTitle),
	3168	'ext': 'mp4',
	3169	'format': format,
	3170	'thumbnail': None,
	3171	'description': 'TODO: Not yet supported',
	3172	'player_url': player_url
	3173	}
	3174
	3175	try:
	3176	self._downloader.process_info(info)
	3177	except UnavailableVideoError, err:
	3178	self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum))
	3179	continue
	3180
	3181
	3182	class PostProcessor(object):
	3183	"""Post Processor class.
	3184
	3185	PostProcessor objects can be added to downloaders with their
	3186	add_post_processor() method. When the downloader has finished a
	3187	successful download, it will take its internal chain of PostProcessors
	3188	and start calling the run() method on each one of them, first with
	3189	an initial argument and then with the returned value of the previous
	3190	PostProcessor.
	3191
	3192	The chain will be stopped if one of them ever returns None or the end
	3193	of the chain is reached.
	3194
	3195	PostProcessor objects follow a "mutual registration" process similar
	3196	to InfoExtractor objects.
	3197	"""
	3198
	3199	_downloader = None
	3200
	3201	def __init__(self, downloader=None):
	3202	self._downloader = downloader
	3203
	3204	def set_downloader(self, downloader):
	3205	"""Sets the downloader for this PP."""
	3206	self._downloader = downloader
	3207
	3208	def run(self, information):
	3209	"""Run the PostProcessor.
	3210
	3211	The "information" argument is a dictionary like the ones
	3212	composed by InfoExtractors. The only difference is that this
	3213	one has an extra field called "filepath" that points to the
	3214	downloaded file.
	3215
	3216	When this method returns None, the postprocessing chain is
	3217	stopped. However, this method may return an information
	3218	dictionary that will be passed to the next postprocessing
	3219	object in the chain. It can be the one it received after
	3220	changing some fields.
	3221
	3222	In addition, this method may raise a PostProcessingError
	3223	exception that will be taken into account by the downloader
	3224	it was called from.
	3225	"""
	3226	return information # by default, do nothing
	3227
	3228
	3229	class FFmpegExtractAudioPP(PostProcessor):
	3230
	3231	def __init__(self, downloader=None, preferredcodec=None):
	3232	PostProcessor.__init__(self, downloader)
	3233	if preferredcodec is None:
	3234	preferredcodec = 'best'
	3235	self._preferredcodec = preferredcodec
	3236
	3237	@staticmethod
	3238	def get_audio_codec(path):
	3239	try:
	3240	cmd = ['ffprobe', '-show_streams', '--', path]
	3241	handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
	3242	output = handle.communicate()[0]
	3243	if handle.wait() != 0:
	3244	return None
	3245	except (IOError, OSError):
	3246	return None
	3247	audio_codec = None
	3248	for line in output.split('\n'):
	3249	if line.startswith('codec_name='):
	3250	audio_codec = line.split('=')[1].strip()
	3251	elif line.strip() == 'codec_type=audio' and audio_codec is not None:
	3252	return audio_codec
	3253	return None
	3254
	3255	@staticmethod
	3256	def run_ffmpeg(path, out_path, codec, more_opts):
	3257	try:
	3258	cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
	3259	ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
	3260	return (ret == 0)
	3261	except (IOError, OSError):
	3262	return False
	3263
	3264	def run(self, information):
	3265	path = information['filepath']
	3266
	3267	filecodec = self.get_audio_codec(path)
	3268	if filecodec is None:
	3269	self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
	3270	return None
	3271
	3272	more_opts = []
	3273	if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
	3274	if filecodec == 'aac' or filecodec == 'mp3':
	3275	# Lossless if possible
	3276	acodec = 'copy'
	3277	extension = filecodec
	3278	if filecodec == 'aac':
	3279	more_opts = ['-f', 'adts']
	3280	else:
	3281	# MP3 otherwise.
	3282	acodec = 'libmp3lame'
	3283	extension = 'mp3'
	3284	more_opts = ['-ab', '128k']
	3285	else:
	3286	# We convert the audio (lossy)
	3287	acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
	3288	extension = self._preferredcodec
	3289	more_opts = ['-ab', '128k']
	3290	if self._preferredcodec == 'aac':
	3291	more_opts += ['-f', 'adts']
	3292
	3293	(prefix, ext) = os.path.splitext(path)
	3294	new_path = prefix + '.' + extension
	3295	self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
	3296	status = self.run_ffmpeg(path, new_path, acodec, more_opts)
	3297
	3298	if not status:
	3299	self._downloader.to_stderr(u'WARNING: error running ffmpeg')
	3300	return None
	3301
	3302	try:
	3303	os.remove(path)
	3304	except (IOError, OSError):
	3305	self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
	3306	return None
	3307
	3308	information['filepath'] = new_path
	3309	return information
	3310
	3311
	3312	def updateSelf(downloader, filename):
	3313	''' Update the program file with the latest version from the repository '''
	3314	# Note: downloader only used for options
	3315	if not os.access(filename, os.W_OK):
	3316	sys.exit('ERROR: no write permissions on %s' % filename)
	3317
	3318	downloader.to_screen('Updating to latest version...')
	3319
	3320	try:
	3321	try:
	3322	urlh = urllib.urlopen(UPDATE_URL)
	3323	newcontent = urlh.read()
	3324	finally:
	3325	urlh.close()
	3326	except (IOError, OSError), err:
	3327	sys.exit('ERROR: unable to download latest version')
	3328
	3329	try:
	3330	outf = open(filename, 'wb')
	3331	try:
	3332	outf.write(newcontent)
	3333	finally:
	3334	outf.close()
	3335	except (IOError, OSError), err:
	3336	sys.exit('ERROR: unable to overwrite current version')
	3337
	3338	downloader.to_screen('Updated youtube-dl. Restart to use the new version.')
	3339
	3340	def parseOpts():
	3341	# Deferred imports
	3342	import getpass
	3343	import optparse
	3344
	3345	def _format_option_string(option):
	3346	''' ('-o', '--option') -> -o, --format METAVAR'''
	3347
	3348	opts = []
	3349
	3350	if option._short_opts: opts.append(option._short_opts[0])
	3351	if option._long_opts: opts.append(option._long_opts[0])
	3352	if len(opts) > 1: opts.insert(1, ', ')
	3353
	3354	if option.takes_value(): opts.append(' %s' % option.metavar)
	3355
	3356	return "".join(opts)
	3357
	3358	def _find_term_columns():
	3359	columns = os.environ.get('COLUMNS', None)
	3360	if columns:
	3361	return int(columns)
	3362
	3363	try:
	3364	sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	3365	out,err = sp.communicate()
	3366	return int(out.split()[1])
	3367	except:
	3368	pass
	3369	return None
	3370
	3371	max_width = 80
	3372	max_help_position = 80
	3373
	3374	# No need to wrap help messages if we're on a wide console
	3375	columns = _find_term_columns()
	3376	if columns: max_width = columns
	3377
	3378	fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
	3379	fmt.format_option_strings = _format_option_string
	3380
	3381	kw = {
	3382	'version' : __version__,
	3383	'formatter' : fmt,
	3384	'usage' : '%prog [options] url...',
	3385	'conflict_handler' : 'resolve',
	3386	}
	3387
	3388	parser = optparse.OptionParser(**kw)
	3389
	3390	# option groups
	3391	general = optparse.OptionGroup(parser, 'General Options')
	3392	authentication = optparse.OptionGroup(parser, 'Authentication Options')
	3393	video_format = optparse.OptionGroup(parser, 'Video Format Options')
	3394	postproc = optparse.OptionGroup(parser, 'Post-processing Options')
	3395	filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
	3396	verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
	3397
	3398	general.add_option('-h', '--help',
	3399	action='help', help='print this help text and exit')
	3400	general.add_option('-v', '--version',
	3401	action='version', help='print program version and exit')
	3402	general.add_option('-U', '--update',
	3403	action='store_true', dest='update_self', help='update this program to latest version')
	3404	general.add_option('-i', '--ignore-errors',
	3405	action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
	3406	general.add_option('-r', '--rate-limit',
	3407	dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
	3408	general.add_option('-R', '--retries',
	3409	dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
	3410	general.add_option('--playlist-start',
	3411	dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
	3412	general.add_option('--playlist-end',
	3413	dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
	3414	general.add_option('--dump-user-agent',
	3415	action='store_true', dest='dump_user_agent',
	3416	help='display the current browser identification', default=False)
	3417
	3418	authentication.add_option('-u', '--username',
	3419	dest='username', metavar='USERNAME', help='account username')
	3420	authentication.add_option('-p', '--password',
	3421	dest='password', metavar='PASSWORD', help='account password')
	3422	authentication.add_option('-n', '--netrc',
	3423	action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
	3424
	3425
	3426	video_format.add_option('-f', '--format',
	3427	action='store', dest='format', metavar='FORMAT', help='video format code')
	3428	video_format.add_option('--all-formats',
	3429	action='store_const', dest='format', help='download all available video formats', const='-1')
	3430	video_format.add_option('--max-quality',
	3431	action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
	3432
	3433
	3434	verbosity.add_option('-q', '--quiet',
	3435	action='store_true', dest='quiet', help='activates quiet mode', default=False)
	3436	verbosity.add_option('-s', '--simulate',
	3437	action='store_true', dest='simulate', help='do not download video', default=False)
	3438	verbosity.add_option('-g', '--get-url',
	3439	action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
	3440	verbosity.add_option('-e', '--get-title',
	3441	action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
	3442	verbosity.add_option('--get-thumbnail',
	3443	action='store_true', dest='getthumbnail',
	3444	help='simulate, quiet but print thumbnail URL', default=False)
	3445	verbosity.add_option('--get-description',
	3446	action='store_true', dest='getdescription',
	3447	help='simulate, quiet but print video description', default=False)
	3448	verbosity.add_option('--get-filename',
	3449	action='store_true', dest='getfilename',
	3450	help='simulate, quiet but print output filename', default=False)
	3451	verbosity.add_option('--no-progress',
	3452	action='store_true', dest='noprogress', help='do not print progress bar', default=False)
	3453	verbosity.add_option('--console-title',
	3454	action='store_true', dest='consoletitle',
	3455	help='display progress in console titlebar', default=False)
	3456
	3457
	3458	filesystem.add_option('-t', '--title',
	3459	action='store_true', dest='usetitle', help='use title in file name', default=False)
	3460	filesystem.add_option('-l', '--literal',
	3461	action='store_true', dest='useliteral', help='use literal title in file name', default=False)
	3462	filesystem.add_option('-A', '--auto-number',
	3463	action='store_true', dest='autonumber',
	3464	help='number downloaded files starting from 00000', default=False)
	3465	filesystem.add_option('-o', '--output',
	3466	dest='outtmpl', metavar='TEMPLATE', help='output filename template')
	3467	filesystem.add_option('-a', '--batch-file',
	3468	dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
	3469	filesystem.add_option('-w', '--no-overwrites',
	3470	action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
	3471	filesystem.add_option('-c', '--continue',
	3472	action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
	3473	filesystem.add_option('--cookies',
	3474	dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
	3475	filesystem.add_option('--no-part',
	3476	action='store_true', dest='nopart', help='do not use .part files', default=False)
	3477	filesystem.add_option('--no-mtime',
	3478	action='store_false', dest='updatetime',
	3479	help='do not use the Last-modified header to set the file modification time', default=True)
	3480	filesystem.add_option('--write-description',
	3481	action='store_true', dest='writedescription',
	3482	help='write video description to a .description file', default=False)
	3483	filesystem.add_option('--write-info-json',
	3484	action='store_true', dest='writeinfojson',
	3485	help='write video metadata to a .info.json file', default=False)
	3486
	3487
	3488	postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
	3489	help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
	3490	postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
	3491	help='"best", "aac" or "mp3"; best by default')
	3492
	3493
	3494	parser.add_option_group(general)
	3495	parser.add_option_group(filesystem)
	3496	parser.add_option_group(verbosity)
	3497	parser.add_option_group(video_format)
	3498	parser.add_option_group(authentication)
	3499	parser.add_option_group(postproc)
	3500
	3501	opts, args = parser.parse_args()
	3502
	3503	return parser, opts, args
	3504
	3505	def main():
	3506	parser, opts, args = parseOpts()
	3507
	3508	# Open appropriate CookieJar
	3509	if opts.cookiefile is None:
	3510	jar = cookielib.CookieJar()
	3511	else:
	3512	try:
	3513	jar = cookielib.MozillaCookieJar(opts.cookiefile)
	3514	if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
	3515	jar.load()
	3516	except (IOError, OSError), err:
	3517	sys.exit(u'ERROR: unable to open cookie file')
	3518
	3519	# Dump user agent
	3520	if opts.dump_user_agent:
	3521	print std_headers['User-Agent']
	3522	sys.exit(0)
	3523
	3524	# General configuration
	3525	cookie_processor = urllib2.HTTPCookieProcessor(jar)
	3526	opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
	3527	urllib2.install_opener(opener)
	3528	socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
	3529
	3530	# Batch file verification
	3531	batchurls = []
	3532	if opts.batchfile is not None:
	3533	try:
	3534	if opts.batchfile == '-':
	3535	batchfd = sys.stdin
	3536	else:
	3537	batchfd = open(opts.batchfile, 'r')
	3538	batchurls = batchfd.readlines()
	3539	batchurls = [x.strip() for x in batchurls]
	3540	batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
	3541	except IOError:
	3542	sys.exit(u'ERROR: batch file could not be read')
	3543	all_urls = batchurls + args
	3544
	3545	# Conflicting, missing and erroneous options
	3546	if opts.usenetrc and (opts.username is not None or opts.password is not None):
	3547	parser.error(u'using .netrc conflicts with giving username/password')
	3548	if opts.password is not None and opts.username is None:
	3549	parser.error(u'account username missing')
	3550	if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
	3551	parser.error(u'using output template conflicts with using title, literal title or auto number')
	3552	if opts.usetitle and opts.useliteral:
	3553	parser.error(u'using title conflicts with using literal title')
	3554	if opts.username is not None and opts.password is None:
	3555	opts.password = getpass.getpass(u'Type account password and press return:')
	3556	if opts.ratelimit is not None:
	3557	numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
	3558	if numeric_limit is None:
	3559	parser.error(u'invalid rate limit specified')
	3560	opts.ratelimit = numeric_limit
	3561	if opts.retries is not None:
	3562	try:
	3563	opts.retries = long(opts.retries)
	3564	except (TypeError, ValueError), err:
	3565	parser.error(u'invalid retry count specified')
	3566	try:
	3567	opts.playliststart = int(opts.playliststart)
	3568	if opts.playliststart <= 0:
	3569	raise ValueError(u'Playlist start must be positive')
	3570	except (TypeError, ValueError), err:
	3571	parser.error(u'invalid playlist start number specified')
	3572	try:
	3573	opts.playlistend = int(opts.playlistend)
	3574	if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
	3575	raise ValueError(u'Playlist end must be greater than playlist start')
	3576	except (TypeError, ValueError), err:
	3577	parser.error(u'invalid playlist end number specified')
	3578	if opts.extractaudio:
	3579	if opts.audioformat not in ['best', 'aac', 'mp3']:
	3580	parser.error(u'invalid audio format specified')
	3581
	3582	# Information extractors
	3583	youtube_ie = YoutubeIE()
	3584	metacafe_ie = MetacafeIE(youtube_ie)
	3585	dailymotion_ie = DailymotionIE()
	3586	youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
	3587	youtube_user_ie = YoutubeUserIE(youtube_ie)
	3588	youtube_search_ie = YoutubeSearchIE(youtube_ie)
	3589	google_ie = GoogleIE()
	3590	google_search_ie = GoogleSearchIE(google_ie)
	3591	photobucket_ie = PhotobucketIE()
	3592	yahoo_ie = YahooIE()
	3593	yahoo_search_ie = YahooSearchIE(yahoo_ie)
	3594	deposit_files_ie = DepositFilesIE()
	3595	facebook_ie = FacebookIE()
	3596	bliptv_ie = BlipTVIE()
	3597	vimeo_ie = VimeoIE()
	3598	myvideo_ie = MyVideoIE()
	3599	comedycentral_ie = ComedyCentralIE()
	3600
	3601	generic_ie = GenericIE()
	3602
	3603	# File downloader
	3604	fd = FileDownloader({
	3605	'usenetrc': opts.usenetrc,
	3606	'username': opts.username,
	3607	'password': opts.password,
	3608	'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
	3609	'forceurl': opts.geturl,
	3610	'forcetitle': opts.gettitle,
	3611	'forcethumbnail': opts.getthumbnail,
	3612	'forcedescription': opts.getdescription,
	3613	'forcefilename': opts.getfilename,
	3614	'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
	3615	'format': opts.format,
	3616	'format_limit': opts.format_limit,
	3617	'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
	3618	or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
	3619	or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
	3620	or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
	3621	or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
	3622	or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
	3623	or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
	3624	or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
	3625	or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
	3626	or u'%(id)s.%(ext)s'),
	3627	'ignoreerrors': opts.ignoreerrors,
	3628	'ratelimit': opts.ratelimit,
	3629	'nooverwrites': opts.nooverwrites,
	3630	'retries': opts.retries,
	3631	'continuedl': opts.continue_dl,
	3632	'noprogress': opts.noprogress,
	3633	'playliststart': opts.playliststart,
	3634	'playlistend': opts.playlistend,
	3635	'logtostderr': opts.outtmpl == '-',
	3636	'consoletitle': opts.consoletitle,
	3637	'nopart': opts.nopart,
	3638	'updatetime': opts.updatetime,
	3639	'writedescription': opts.writedescription,
	3640	'writeinfojson': opts.writeinfojson,
	3641	})
	3642	fd.add_info_extractor(youtube_search_ie)
	3643	fd.add_info_extractor(youtube_pl_ie)
	3644	fd.add_info_extractor(youtube_user_ie)
	3645	fd.add_info_extractor(metacafe_ie)
	3646	fd.add_info_extractor(dailymotion_ie)
	3647	fd.add_info_extractor(youtube_ie)
	3648	fd.add_info_extractor(google_ie)
	3649	fd.add_info_extractor(google_search_ie)
	3650	fd.add_info_extractor(photobucket_ie)
	3651	fd.add_info_extractor(yahoo_ie)
	3652	fd.add_info_extractor(yahoo_search_ie)
	3653	fd.add_info_extractor(deposit_files_ie)
	3654	fd.add_info_extractor(facebook_ie)
	3655	fd.add_info_extractor(bliptv_ie)
	3656	fd.add_info_extractor(vimeo_ie)
	3657	fd.add_info_extractor(myvideo_ie)
	3658	fd.add_info_extractor(comedycentral_ie)
	3659
	3660	# This must come last since it's the
	3661	# fallback if none of the others work
	3662	fd.add_info_extractor(generic_ie)
	3663
	3664	# PostProcessors
	3665	if opts.extractaudio:
	3666	fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
	3667
	3668	# Update version
	3669	if opts.update_self:
	3670	updateSelf(fd, sys.argv[0])
	3671
	3672	# Maybe do nothing
	3673	if len(all_urls) < 1:
	3674	if not opts.update_self:
	3675	parser.error(u'you must provide at least one URL')
	3676	else:
	3677	sys.exit()
	3678	retcode = fd.download(all_urls)
	3679
	3680	# Dump cookie jar if requested
	3681	if opts.cookiefile is not None:
	3682	try:
	3683	jar.save()
	3684	except (IOError, OSError), err:
	3685	sys.exit(u'ERROR: unable to save cookie jar')
	3686
	3687	sys.exit(retcode)
	3688
	3689
	3690	if __name__ == '__main__':
	3691	try:
	3692	main()
	3693	except DownloadError:
	3694	sys.exit(1)
	3695	except SameFileError:
	3696	sys.exit(u'ERROR: fixed output name but more than one file to download')
	3697	except KeyboardInterrupt:
	3698	sys.exit(u'\nERROR: Interrupted by user')
	3699
	3700	# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: