jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	__authors__ = (
	5	'Ricardo Garcia Gonzalez',
	6	'Danny Colligan',
	7	'Benjamin Johnson',
	8	'Vasyl\' Vavrychuk',
	9	'Witold Baryluk',
	10	'Paweł Paprota',
	11	'Gergely Imreh',
	12	'Rogério Brito',
	13	'Philipp Hagemeister',
	14	'Sören Schulze',
	15	'Kevin Ngo',
	16	'Ori Avtalion',
	17	'shizeeg',
	18	)
	19
	20	__license__ = 'Public Domain'
	21	__version__ = '2012.02.27'
	22
	23	UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
	24
	25
	26	import cookielib
	27	import datetime
	28	import getpass
	29	import gzip
	30	import htmlentitydefs
	31	import HTMLParser
	32	import httplib
	33	import locale
	34	import math
	35	import netrc
	36	import optparse
	37	import os
	38	import os.path
	39	import re
	40	import shlex
	41	import socket
	42	import string
	43	import subprocess
	44	import sys
	45	import time
	46	import urllib
	47	import urllib2
	48	import warnings
	49	import zlib
	50
	51	if os.name == 'nt':
	52	import ctypes
	53
	54	try:
	55	import email.utils
	56	except ImportError: # Python 2.4
	57	import email.Utils
	58	try:
	59	import cStringIO as StringIO
	60	except ImportError:
	61	import StringIO
	62
	63	# parse_qs was moved from the cgi module to the urlparse module recently.
	64	try:
	65	from urlparse import parse_qs
	66	except ImportError:
	67	from cgi import parse_qs
	68
	69	try:
	70	import lxml.etree
	71	except ImportError:
	72	pass # Handled below
	73
	74	try:
	75	import xml.etree.ElementTree
	76	except ImportError: # Python<2.5: Not officially supported, but let it slip
	77	warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
	78
	79	std_headers = {
	80	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
	81	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	82	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	83	'Accept-Encoding': 'gzip, deflate',
	84	'Accept-Language': 'en-us,en;q=0.5',
	85	}
	86
	87	try:
	88	import json
	89	except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
	90	import re
	91	class json(object):
	92	@staticmethod
	93	def loads(s):
	94	s = s.decode('UTF-8')
	95	def raiseError(msg, i):
	96	raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
	97	def skipSpace(i, expectMore=True):
	98	while i < len(s) and s[i] in ' \t\r\n':
	99	i += 1
	100	if expectMore:
	101	if i >= len(s):
	102	raiseError('Premature end', i)
	103	return i
	104	def decodeEscape(match):
	105	esc = match.group(1)
	106	_STATIC = {
	107	'"': '"',
	108	'\\': '\\',
	109	'/': '/',
	110	'b': unichr(0x8),
	111	'f': unichr(0xc),
	112	'n': '\n',
	113	'r': '\r',
	114	't': '\t',
	115	}
	116	if esc in _STATIC:
	117	return _STATIC[esc]
	118	if esc[0] == 'u':
	119	if len(esc) == 1+4:
	120	return unichr(int(esc[1:5], 16))
	121	if len(esc) == 5+6 and esc[5:7] == '\\u':
	122	hi = int(esc[1:5], 16)
	123	low = int(esc[7:11], 16)
	124	return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
	125	raise ValueError('Unknown escape ' + str(esc))
	126	def parseString(i):
	127	i += 1
	128	e = i
	129	while True:
	130	e = s.index('"', e)
	131	bslashes = 0
	132	while s[e-bslashes-1] == '\\':
	133	bslashes += 1
	134	if bslashes % 2 == 1:
	135	e += 1
	136	continue
	137	break
	138	rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}\|u[0-9a-fA-F]{4}\|.\|$)')
	139	stri = rexp.sub(decodeEscape, s[i:e])
	140	return (e+1,stri)
	141	def parseObj(i):
	142	i += 1
	143	res = {}
	144	i = skipSpace(i)
	145	if s[i] == '}': # Empty dictionary
	146	return (i+1,res)
	147	while True:
	148	if s[i] != '"':
	149	raiseError('Expected a string object key', i)
	150	i,key = parseString(i)
	151	i = skipSpace(i)
	152	if i >= len(s) or s[i] != ':':
	153	raiseError('Expected a colon', i)
	154	i,val = parse(i+1)
	155	res[key] = val
	156	i = skipSpace(i)
	157	if s[i] == '}':
	158	return (i+1, res)
	159	if s[i] != ',':
	160	raiseError('Expected comma or closing curly brace', i)
	161	i = skipSpace(i+1)
	162	def parseArray(i):
	163	res = []
	164	i = skipSpace(i+1)
	165	if s[i] == ']': # Empty array
	166	return (i+1,res)
	167	while True:
	168	i,val = parse(i)
	169	res.append(val)
	170	i = skipSpace(i) # Raise exception if premature end
	171	if s[i] == ']':
	172	return (i+1, res)
	173	if s[i] != ',':
	174	raiseError('Expected a comma or closing bracket', i)
	175	i = skipSpace(i+1)
	176	def parseDiscrete(i):
	177	for k,v in {'true': True, 'false': False, 'null': None}.items():
	178	if s.startswith(k, i):
	179	return (i+len(k), v)
	180	raiseError('Not a boolean (or null)', i)
	181	def parseNumber(i):
	182	mobj = re.match('^(-?(0\|[1-9][0-9])(\.[0-9])?([eE][+-]?[0-9]+)?)', s[i:])
	183	if mobj is None:
	184	raiseError('Not a number', i)
	185	nums = mobj.group(1)
	186	if '.' in nums or 'e' in nums or 'E' in nums:
	187	return (i+len(nums), float(nums))
	188	return (i+len(nums), int(nums))
	189	CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
	190	def parse(i):
	191	i = skipSpace(i)
	192	i,res = CHARMAP.get(s[i], parseNumber)(i)
	193	i = skipSpace(i, False)
	194	return (i,res)
	195	i,res = parse(0)
	196	if i < len(s):
	197	raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
	198	return res
	199
	200	def preferredencoding():
	201	"""Get preferred encoding.
	202
	203	Returns the best encoding scheme for the system, based on
	204	locale.getpreferredencoding() and some further tweaks.
	205	"""
	206	def yield_preferredencoding():
	207	try:
	208	pref = locale.getpreferredencoding()
	209	u'TEST'.encode(pref)
	210	except:
	211	pref = 'UTF-8'
	212	while True:
	213	yield pref
	214	return yield_preferredencoding().next()
	215
	216
	217	def htmlentity_transform(matchobj):
	218	"""Transforms an HTML entity to a Unicode character.
	219
	220	This function receives a match object and is intended to be used with
	221	the re.sub() function.
	222	"""
	223	entity = matchobj.group(1)
	224
	225	# Known non-numeric HTML entity
	226	if entity in htmlentitydefs.name2codepoint:
	227	return unichr(htmlentitydefs.name2codepoint[entity])
	228
	229	# Unicode character
	230	mobj = re.match(ur'(?u)#(x?\d+)', entity)
	231	if mobj is not None:
	232	numstr = mobj.group(1)
	233	if numstr.startswith(u'x'):
	234	base = 16
	235	numstr = u'0%s' % numstr
	236	else:
	237	base = 10
	238	return unichr(long(numstr, base))
	239
	240	# Unknown entity in name, return its literal representation
	241	return (u'&%s;' % entity)
	242
	243
	244	def sanitize_title(utitle):
	245	"""Sanitizes a video title so it could be used as part of a filename."""
	246	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
	247	return utitle.replace(unicode(os.sep), u'%')
	248
	249
	250	def sanitize_open(filename, open_mode):
	251	"""Try to open the given filename, and slightly tweak it if this fails.
	252
	253	Attempts to open the given filename. If this fails, it tries to change
	254	the filename slightly, step by step, until it's either able to open it
	255	or it fails and raises a final exception, like the standard open()
	256	function.
	257
	258	It returns the tuple (stream, definitive_file_name).
	259	"""
	260	try:
	261	if filename == u'-':
	262	if sys.platform == 'win32':
	263	import msvcrt
	264	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	265	return (sys.stdout, filename)
	266	stream = open(_encodeFilename(filename), open_mode)
	267	return (stream, filename)
	268	except (IOError, OSError), err:
	269	# In case of error, try to remove win32 forbidden chars
	270	filename = re.sub(ur'[/<>:"\\|\?\*]', u'#', filename)
	271
	272	# An exception here should be caught in the caller
	273	stream = open(_encodeFilename(filename), open_mode)
	274	return (stream, filename)
	275
	276
	277	def timeconvert(timestr):
	278	"""Convert RFC 2822 defined time string into system timestamp"""
	279	timestamp = None
	280	timetuple = email.utils.parsedate_tz(timestr)
	281	if timetuple is not None:
	282	timestamp = email.utils.mktime_tz(timetuple)
	283	return timestamp
	284
	285	def _simplify_title(title):
	286	expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
	287	return expr.sub(u'_', title).strip(u'_')
	288
	289	def _orderedSet(iterable):
	290	""" Remove all duplicates from the input iterable """
	291	res = []
	292	for el in iterable:
	293	if el not in res:
	294	res.append(el)
	295	return res
	296
	297	def _unescapeHTML(s):
	298	"""
	299	@param s a string (of type unicode)
	300	"""
	301	assert type(s) == type(u'')
	302
	303	htmlParser = HTMLParser.HTMLParser()
	304	return htmlParser.unescape(s)
	305
	306	def _encodeFilename(s):
	307	"""
	308	@param s The name of the file (of type unicode)
	309	"""
	310
	311	assert type(s) == type(u'')
	312
	313	if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
	314	# Pass u'' directly to use Unicode APIs on Windows 2000 and up
	315	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	316	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	317	return s
	318	else:
	319	return s.encode(sys.getfilesystemencoding(), 'ignore')
	320
	321	class DownloadError(Exception):
	322	"""Download Error exception.
	323
	324	This exception may be thrown by FileDownloader objects if they are not
	325	configured to continue on errors. They will contain the appropriate
	326	error message.
	327	"""
	328	pass
	329
	330
	331	class SameFileError(Exception):
	332	"""Same File exception.
	333
	334	This exception will be thrown by FileDownloader objects if they detect
	335	multiple files would have to be downloaded to the same file on disk.
	336	"""
	337	pass
	338
	339
	340	class PostProcessingError(Exception):
	341	"""Post Processing exception.
	342
	343	This exception may be raised by PostProcessor's .run() method to
	344	indicate an error in the postprocessing task.
	345	"""
	346	pass
	347
	348	class MaxDownloadsReached(Exception):
	349	""" --max-downloads limit has been reached. """
	350	pass
	351
	352
	353	class UnavailableVideoError(Exception):
	354	"""Unavailable Format exception.
	355
	356	This exception will be thrown when a video is requested
	357	in a format that is not available for that video.
	358	"""
	359	pass
	360
	361
	362	class ContentTooShortError(Exception):
	363	"""Content Too Short exception.
	364
	365	This exception may be raised by FileDownloader objects when a file they
	366	download is too small for what the server announced first, indicating
	367	the connection was probably interrupted.
	368	"""
	369	# Both in bytes
	370	downloaded = None
	371	expected = None
	372
	373	def __init__(self, downloaded, expected):
	374	self.downloaded = downloaded
	375	self.expected = expected
	376
	377
	378	class YoutubeDLHandler(urllib2.HTTPHandler):
	379	"""Handler for HTTP requests and responses.
	380
	381	This class, when installed with an OpenerDirector, automatically adds
	382	the standard headers to every HTTP request and handles gzipped and
	383	deflated responses from web servers. If compression is to be avoided in
	384	a particular request, the original request in the program code only has
	385	to include the HTTP header "Youtubedl-No-Compression", which will be
	386	removed before making the real request.
	387
	388	Part of this code was copied from:
	389
	390	http://techknack.net/python-urllib2-handlers/
	391
	392	Andrew Rowls, the author of that code, agreed to release it to the
	393	public domain.
	394	"""
	395
	396	@staticmethod
	397	def deflate(data):
	398	try:
	399	return zlib.decompress(data, -zlib.MAX_WBITS)
	400	except zlib.error:
	401	return zlib.decompress(data)
	402
	403	@staticmethod
	404	def addinfourl_wrapper(stream, headers, url, code):
	405	if hasattr(urllib2.addinfourl, 'getcode'):
	406	return urllib2.addinfourl(stream, headers, url, code)
	407	ret = urllib2.addinfourl(stream, headers, url)
	408	ret.code = code
	409	return ret
	410
	411	def http_request(self, req):
	412	for h in std_headers:
	413	if h in req.headers:
	414	del req.headers[h]
	415	req.add_header(h, std_headers[h])
	416	if 'Youtubedl-no-compression' in req.headers:
	417	if 'Accept-encoding' in req.headers:
	418	del req.headers['Accept-encoding']
	419	del req.headers['Youtubedl-no-compression']
	420	return req
	421
	422	def http_response(self, req, resp):
	423	old_resp = resp
	424	# gzip
	425	if resp.headers.get('Content-encoding', '') == 'gzip':
	426	gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
	427	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	428	resp.msg = old_resp.msg
	429	# deflate
	430	if resp.headers.get('Content-encoding', '') == 'deflate':
	431	gz = StringIO.StringIO(self.deflate(resp.read()))
	432	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	433	resp.msg = old_resp.msg
	434	return resp
	435
	436
	437	class FileDownloader(object):
	438	"""File Downloader class.
	439
	440	File downloader objects are the ones responsible of downloading the
	441	actual video file and writing it to disk if the user has requested
	442	it, among some other tasks. In most cases there should be one per
	443	program. As, given a video URL, the downloader doesn't know how to
	444	extract all the needed information, task that InfoExtractors do, it
	445	has to pass the URL to one of them.
	446
	447	For this, file downloader objects have a method that allows
	448	InfoExtractors to be registered in a given order. When it is passed
	449	a URL, the file downloader handles it to the first InfoExtractor it
	450	finds that reports being able to handle it. The InfoExtractor extracts
	451	all the information about the video or videos the URL refers to, and
	452	asks the FileDownloader to process the video information, possibly
	453	downloading the video.
	454
	455	File downloaders accept a lot of parameters. In order not to saturate
	456	the object constructor with arguments, it receives a dictionary of
	457	options instead. These options are available through the params
	458	attribute for the InfoExtractors to use. The FileDownloader also
	459	registers itself as the downloader in charge for the InfoExtractors
	460	that are added to it, so this is a "mutual registration".
	461
	462	Available options:
	463
	464	username: Username for authentication purposes.
	465	password: Password for authentication purposes.
	466	usenetrc: Use netrc for authentication instead.
	467	quiet: Do not print messages to stdout.
	468	forceurl: Force printing final URL.
	469	forcetitle: Force printing title.
	470	forcethumbnail: Force printing thumbnail URL.
	471	forcedescription: Force printing description.
	472	forcefilename: Force printing final filename.
	473	simulate: Do not download the video files.
	474	format: Video format code.
	475	format_limit: Highest quality format to try.
	476	outtmpl: Template for output names.
	477	ignoreerrors: Do not stop on download errors.
	478	ratelimit: Download speed limit, in bytes/sec.
	479	nooverwrites: Prevent overwriting files.
	480	retries: Number of times to retry for HTTP error 5xx
	481	continuedl: Try to continue downloads if possible.
	482	noprogress: Do not print the progress bar.
	483	playliststart: Playlist item to start at.
	484	playlistend: Playlist item to end at.
	485	matchtitle: Download only matching titles.
	486	rejecttitle: Reject downloads for matching titles.
	487	logtostderr: Log messages to stderr instead of stdout.
	488	consoletitle: Display progress in console window's titlebar.
	489	nopart: Do not use temporary .part files.
	490	updatetime: Use the Last-modified header to set output file timestamps.
	491	writedescription: Write the video description to a .description file
	492	writeinfojson: Write the video description to a .info.json file
	493	writesubtitles: Write the video subtitles to a .srt file
	494	subtitleslang: Language of the subtitles to download
	495	"""
	496
	497	params = None
	498	_ies = []
	499	_pps = []
	500	_download_retcode = None
	501	_num_downloads = None
	502	_screen_file = None
	503
	504	def __init__(self, params):
	505	"""Create a FileDownloader object with the given options."""
	506	self._ies = []
	507	self._pps = []
	508	self._download_retcode = 0
	509	self._num_downloads = 0
	510	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	511	self.params = params
	512
	513	@staticmethod
	514	def format_bytes(bytes):
	515	if bytes is None:
	516	return 'N/A'
	517	if type(bytes) is str:
	518	bytes = float(bytes)
	519	if bytes == 0.0:
	520	exponent = 0
	521	else:
	522	exponent = long(math.log(bytes, 1024.0))
	523	suffix = 'bkMGTPEZY'[exponent]
	524	converted = float(bytes) / float(1024 ** exponent)
	525	return '%.2f%s' % (converted, suffix)
	526
	527	@staticmethod
	528	def calc_percent(byte_counter, data_len):
	529	if data_len is None:
	530	return '---.-%'
	531	return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
	532
	533	@staticmethod
	534	def calc_eta(start, now, total, current):
	535	if total is None:
	536	return '--:--'
	537	dif = now - start
	538	if current == 0 or dif < 0.001: # One millisecond
	539	return '--:--'
	540	rate = float(current) / dif
	541	eta = long((float(total) - float(current)) / rate)
	542	(eta_mins, eta_secs) = divmod(eta, 60)
	543	if eta_mins > 99:
	544	return '--:--'
	545	return '%02d:%02d' % (eta_mins, eta_secs)
	546
	547	@staticmethod
	548	def calc_speed(start, now, bytes):
	549	dif = now - start
	550	if bytes == 0 or dif < 0.001: # One millisecond
	551	return '%10s' % '---b/s'
	552	return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
	553
	554	@staticmethod
	555	def best_block_size(elapsed_time, bytes):
	556	new_min = max(bytes / 2.0, 1.0)
	557	new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
	558	if elapsed_time < 0.001:
	559	return long(new_max)
	560	rate = bytes / elapsed_time
	561	if rate > new_max:
	562	return long(new_max)
	563	if rate < new_min:
	564	return long(new_min)
	565	return long(rate)
	566
	567	@staticmethod
	568	def parse_bytes(bytestr):
	569	"""Parse a string indicating a byte quantity into a long integer."""
	570	matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
	571	if matchobj is None:
	572	return None
	573	number = float(matchobj.group(1))
	574	multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
	575	return long(round(number * multiplier))
	576
	577	def add_info_extractor(self, ie):
	578	"""Add an InfoExtractor object to the end of the list."""
	579	self._ies.append(ie)
	580	ie.set_downloader(self)
	581
	582	def add_post_processor(self, pp):
	583	"""Add a PostProcessor object to the end of the chain."""
	584	self._pps.append(pp)
	585	pp.set_downloader(self)
	586
	587	def to_screen(self, message, skip_eol=False):
	588	"""Print message to stdout if not in quiet mode."""
	589	assert type(message) == type(u'')
	590	if not self.params.get('quiet', False):
	591	terminator = [u'\n', u''][skip_eol]
	592	output = message + terminator
	593
	594	if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
	595	output = output.encode(preferredencoding(), 'ignore')
	596	self._screen_file.write(output)
	597	self._screen_file.flush()
	598
	599	def to_stderr(self, message):
	600	"""Print message to stderr."""
	601	print >>sys.stderr, message.encode(preferredencoding())
	602
	603	def to_cons_title(self, message):
	604	"""Set console/terminal window title to message."""
	605	if not self.params.get('consoletitle', False):
	606	return
	607	if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
	608	# c_wchar_p() might not be necessary if `message` is
	609	# already of type unicode()
	610	ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
	611	elif 'TERM' in os.environ:
	612	sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
	613
	614	def fixed_template(self):
	615	"""Checks if the output template is fixed."""
	616	return (re.search(ur'(?u)%$.+?$s', self.params['outtmpl']) is None)
	617
	618	def trouble(self, message=None):
	619	"""Determine action to take when a download problem appears.
	620
	621	Depending on if the downloader has been configured to ignore
	622	download errors or not, this method may throw an exception or
	623	not when errors are found, after printing the message.
	624	"""
	625	if message is not None:
	626	self.to_stderr(message)
	627	if not self.params.get('ignoreerrors', False):
	628	raise DownloadError(message)
	629	self._download_retcode = 1
	630
	631	def slow_down(self, start_time, byte_counter):
	632	"""Sleep if the download speed is over the rate limit."""
	633	rate_limit = self.params.get('ratelimit', None)
	634	if rate_limit is None or byte_counter == 0:
	635	return
	636	now = time.time()
	637	elapsed = now - start_time
	638	if elapsed <= 0.0:
	639	return
	640	speed = float(byte_counter) / elapsed
	641	if speed > rate_limit:
	642	time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
	643
	644	def temp_name(self, filename):
	645	"""Returns a temporary filename for the given filename."""
	646	if self.params.get('nopart', False) or filename == u'-' or \
	647	(os.path.exists(_encodeFilename(filename)) and not os.path.isfile(_encodeFilename(filename))):
	648	return filename
	649	return filename + u'.part'
	650
	651	def undo_temp_name(self, filename):
	652	if filename.endswith(u'.part'):
	653	return filename[:-len(u'.part')]
	654	return filename
	655
	656	def try_rename(self, old_filename, new_filename):
	657	try:
	658	if old_filename == new_filename:
	659	return
	660	os.rename(_encodeFilename(old_filename), _encodeFilename(new_filename))
	661	except (IOError, OSError), err:
	662	self.trouble(u'ERROR: unable to rename file')
	663
	664	def try_utime(self, filename, last_modified_hdr):
	665	"""Try to set the last-modified time of the given file."""
	666	if last_modified_hdr is None:
	667	return
	668	if not os.path.isfile(_encodeFilename(filename)):
	669	return
	670	timestr = last_modified_hdr
	671	if timestr is None:
	672	return
	673	filetime = timeconvert(timestr)
	674	if filetime is None:
	675	return filetime
	676	try:
	677	os.utime(filename, (time.time(), filetime))
	678	except:
	679	pass
	680	return filetime
	681
	682	def report_writedescription(self, descfn):
	683	""" Report that the description file is being written """
	684	self.to_screen(u'[info] Writing video description to: ' + descfn)
	685
	686	def report_writesubtitles(self, srtfn):
	687	""" Report that the subtitles file is being written """
	688	self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
	689
	690	def report_writeinfojson(self, infofn):
	691	""" Report that the metadata file has been written """
	692	self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
	693
	694	def report_destination(self, filename):
	695	"""Report destination filename."""
	696	self.to_screen(u'[download] Destination: ' + filename)
	697
	698	def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
	699	"""Report download progress."""
	700	if self.params.get('noprogress', False):
	701	return
	702	self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
	703	(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
	704	self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
	705	(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
	706
	707	def report_resuming_byte(self, resume_len):
	708	"""Report attempt to resume at given byte."""
	709	self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
	710
	711	def report_retry(self, count, retries):
	712	"""Report retry in case of HTTP error 5xx"""
	713	self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
	714
	715	def report_file_already_downloaded(self, file_name):
	716	"""Report file has already been fully downloaded."""
	717	try:
	718	self.to_screen(u'[download] %s has already been downloaded' % file_name)
	719	except (UnicodeEncodeError), err:
	720	self.to_screen(u'[download] The file has already been downloaded')
	721
	722	def report_unable_to_resume(self):
	723	"""Report it was impossible to resume download."""
	724	self.to_screen(u'[download] Unable to resume')
	725
	726	def report_finish(self):
	727	"""Report download finished."""
	728	if self.params.get('noprogress', False):
	729	self.to_screen(u'[download] Download completed')
	730	else:
	731	self.to_screen(u'')
	732
	733	def increment_downloads(self):
	734	"""Increment the ordinal that assigns a number to each file."""
	735	self._num_downloads += 1
	736
	737	def prepare_filename(self, info_dict):
	738	"""Generate the output filename."""
	739	try:
	740	template_dict = dict(info_dict)
	741	template_dict['epoch'] = unicode(long(time.time()))
	742	template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
	743	filename = self.params['outtmpl'] % template_dict
	744	return filename
	745	except (ValueError, KeyError), err:
	746	self.trouble(u'ERROR: invalid system charset or erroneous output template')
	747	return None
	748
	749	def _match_entry(self, info_dict):
	750	""" Returns None iff the file should be downloaded """
	751
	752	title = info_dict['title']
	753	matchtitle = self.params.get('matchtitle', False)
	754	if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
	755	return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
	756	rejecttitle = self.params.get('rejecttitle', False)
	757	if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
	758	return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
	759	return None
	760
	761	def process_info(self, info_dict):
	762	"""Process a single dictionary returned by an InfoExtractor."""
	763
	764	reason = self._match_entry(info_dict)
	765	if reason is not None:
	766	self.to_screen(u'[download] ' + reason)
	767	return
	768
	769	max_downloads = self.params.get('max_downloads')
	770	if max_downloads is not None:
	771	if self._num_downloads > int(max_downloads):
	772	raise MaxDownloadsReached()
	773
	774	filename = self.prepare_filename(info_dict)
	775
	776	# Forced printings
	777	if self.params.get('forcetitle', False):
	778	print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
	779	if self.params.get('forceurl', False):
	780	print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
	781	if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
	782	print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
	783	if self.params.get('forcedescription', False) and 'description' in info_dict:
	784	print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
	785	if self.params.get('forcefilename', False) and filename is not None:
	786	print filename.encode(preferredencoding(), 'xmlcharrefreplace')
	787	if self.params.get('forceformat', False):
	788	print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
	789
	790	# Do nothing else if in simulate mode
	791	if self.params.get('simulate', False):
	792	return
	793
	794	if filename is None:
	795	return
	796
	797	try:
	798	dn = os.path.dirname(_encodeFilename(filename))
	799	if dn != '' and not os.path.exists(dn): # dn is already encoded
	800	os.makedirs(dn)
	801	except (OSError, IOError), err:
	802	self.trouble(u'ERROR: unable to create directory ' + unicode(err))
	803	return
	804
	805	if self.params.get('writedescription', False):
	806	try:
	807	descfn = filename + u'.description'
	808	self.report_writedescription(descfn)
	809	descfile = open(_encodeFilename(descfn), 'wb')
	810	try:
	811	descfile.write(info_dict['description'].encode('utf-8'))
	812	finally:
	813	descfile.close()
	814	except (OSError, IOError):
	815	self.trouble(u'ERROR: Cannot write description file ' + descfn)
	816	return
	817
	818	if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
	819	# subtitles download errors are already managed as troubles in relevant IE
	820	# that way it will silently go on when used with unsupporting IE
	821	try:
	822	srtfn = filename.rsplit('.', 1)[0] + u'.srt'
	823	self.report_writesubtitles(srtfn)
	824	srtfile = open(_encodeFilename(srtfn), 'wb')
	825	try:
	826	srtfile.write(info_dict['subtitles'].encode('utf-8'))
	827	finally:
	828	srtfile.close()
	829	except (OSError, IOError):
	830	self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
	831	return
	832
	833	if self.params.get('writeinfojson', False):
	834	infofn = filename + u'.info.json'
	835	self.report_writeinfojson(infofn)
	836	try:
	837	json.dump
	838	except (NameError,AttributeError):
	839	self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
	840	return
	841	try:
	842	infof = open(_encodeFilename(infofn), 'wb')
	843	try:
	844	json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
	845	json.dump(json_info_dict, infof)
	846	finally:
	847	infof.close()
	848	except (OSError, IOError):
	849	self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
	850	return
	851
	852	if not self.params.get('skip_download', False):
	853	if self.params.get('nooverwrites', False) and os.path.exists(_encodeFilename(filename)):
	854	success = True
	855	else:
	856	try:
	857	success = self._do_download(filename, info_dict)
	858	except (OSError, IOError), err:
	859	raise UnavailableVideoError
	860	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	861	self.trouble(u'ERROR: unable to download video data: %s' % str(err))
	862	return
	863	except (ContentTooShortError, ), err:
	864	self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
	865	return
	866
	867	if success:
	868	try:
	869	self.post_process(filename, info_dict)
	870	except (PostProcessingError), err:
	871	self.trouble(u'ERROR: postprocessing: %s' % str(err))
	872	return
	873
	874	def download(self, url_list):
	875	"""Download a given list of URLs."""
	876	if len(url_list) > 1 and self.fixed_template():
	877	raise SameFileError(self.params['outtmpl'])
	878
	879	for url in url_list:
	880	suitable_found = False
	881	for ie in self._ies:
	882	# Go to next InfoExtractor if not suitable
	883	if not ie.suitable(url):
	884	continue
	885
	886	# Suitable InfoExtractor found
	887	suitable_found = True
	888
	889	# Extract information from URL and process it
	890	ie.extract(url)
	891
	892	# Suitable InfoExtractor had been found; go to next URL
	893	break
	894
	895	if not suitable_found:
	896	self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
	897
	898	return self._download_retcode
	899
	900	def post_process(self, filename, ie_info):
	901	"""Run the postprocessing chain on the given file."""
	902	info = dict(ie_info)
	903	info['filepath'] = filename
	904	for pp in self._pps:
	905	info = pp.run(info)
	906	if info is None:
	907	break
	908
	909	def _download_with_rtmpdump(self, filename, url, player_url):
	910	self.report_destination(filename)
	911	tmpfilename = self.temp_name(filename)
	912
	913	# Check for rtmpdump first
	914	try:
	915	subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
	916	except (OSError, IOError):
	917	self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
	918	return False
	919
	920	# Download using rtmpdump. rtmpdump returns exit code 2 when
	921	# the connection was interrumpted and resuming appears to be
	922	# possible. This is part of rtmpdump's normal usage, AFAIK.
	923	basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
	924	args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
	925	if self.params.get('verbose', False):
	926	try:
	927	import pipes
	928	shell_quote = lambda args: ' '.join(map(pipes.quote, args))
	929	except ImportError:
	930	shell_quote = repr
	931	self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
	932	retval = subprocess.call(args)
	933	while retval == 2 or retval == 1:
	934	prevsize = os.path.getsize(_encodeFilename(tmpfilename))
	935	self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
	936	time.sleep(5.0) # This seems to be needed
	937	retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
	938	cursize = os.path.getsize(_encodeFilename(tmpfilename))
	939	if prevsize == cursize and retval == 1:
	940	break
	941	# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
	942	if prevsize == cursize and retval == 2 and cursize > 1024:
	943	self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
	944	retval = 0
	945	break
	946	if retval == 0:
	947	self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(_encodeFilename(tmpfilename)))
	948	self.try_rename(tmpfilename, filename)
	949	return True
	950	else:
	951	self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
	952	return False
	953
	954	def _do_download(self, filename, info_dict):
	955	url = info_dict['url']
	956	player_url = info_dict.get('player_url', None)
	957
	958	# Check file already present
	959	if self.params.get('continuedl', False) and os.path.isfile(_encodeFilename(filename)) and not self.params.get('nopart', False):
	960	self.report_file_already_downloaded(filename)
	961	return True
	962
	963	# Attempt to download using rtmpdump
	964	if url.startswith('rtmp'):
	965	return self._download_with_rtmpdump(filename, url, player_url)
	966
	967	tmpfilename = self.temp_name(filename)
	968	stream = None
	969
	970	# Do not include the Accept-Encoding header
	971	headers = {'Youtubedl-no-compression': 'True'}
	972	basic_request = urllib2.Request(url, None, headers)
	973	request = urllib2.Request(url, None, headers)
	974
	975	# Establish possible resume length
	976	if os.path.isfile(_encodeFilename(tmpfilename)):
	977	resume_len = os.path.getsize(_encodeFilename(tmpfilename))
	978	else:
	979	resume_len = 0
	980
	981	open_mode = 'wb'
	982	if resume_len != 0:
	983	if self.params.get('continuedl', False):
	984	self.report_resuming_byte(resume_len)
	985	request.add_header('Range','bytes=%d-' % resume_len)
	986	open_mode = 'ab'
	987	else:
	988	resume_len = 0
	989
	990	count = 0
	991	retries = self.params.get('retries', 0)
	992	while count <= retries:
	993	# Establish connection
	994	try:
	995	if count == 0 and 'urlhandle' in info_dict:
	996	data = info_dict['urlhandle']
	997	data = urllib2.urlopen(request)
	998	break
	999	except (urllib2.HTTPError, ), err:
	1000	if (err.code < 500 or err.code >= 600) and err.code != 416:
	1001	# Unexpected HTTP error
	1002	raise
	1003	elif err.code == 416:
	1004	# Unable to resume (requested range not satisfiable)
	1005	try:
	1006	# Open the connection again without the range header
	1007	data = urllib2.urlopen(basic_request)
	1008	content_length = data.info()['Content-Length']
	1009	except (urllib2.HTTPError, ), err:
	1010	if err.code < 500 or err.code >= 600:
	1011	raise
	1012	else:
	1013	# Examine the reported length
	1014	if (content_length is not None and
	1015	(resume_len - 100 < long(content_length) < resume_len + 100)):
	1016	# The file had already been fully downloaded.
	1017	# Explanation to the above condition: in issue #175 it was revealed that
	1018	# YouTube sometimes adds or removes a few bytes from the end of the file,
	1019	# changing the file size slightly and causing problems for some users. So
	1020	# I decided to implement a suggested change and consider the file
	1021	# completely downloaded if the file size differs less than 100 bytes from
	1022	# the one in the hard drive.
	1023	self.report_file_already_downloaded(filename)
	1024	self.try_rename(tmpfilename, filename)
	1025	return True
	1026	else:
	1027	# The length does not match, we start the download over
	1028	self.report_unable_to_resume()
	1029	open_mode = 'wb'
	1030	break
	1031	# Retry
	1032	count += 1
	1033	if count <= retries:
	1034	self.report_retry(count, retries)
	1035
	1036	if count > retries:
	1037	self.trouble(u'ERROR: giving up after %s retries' % retries)
	1038	return False
	1039
	1040	data_len = data.info().get('Content-length', None)
	1041	if data_len is not None:
	1042	data_len = long(data_len) + resume_len
	1043	data_len_str = self.format_bytes(data_len)
	1044	byte_counter = 0 + resume_len
	1045	block_size = 1024
	1046	start = time.time()
	1047	while True:
	1048	# Download and write
	1049	before = time.time()
	1050	data_block = data.read(block_size)
	1051	after = time.time()
	1052	if len(data_block) == 0:
	1053	break
	1054	byte_counter += len(data_block)
	1055
	1056	# Open file just in time
	1057	if stream is None:
	1058	try:
	1059	(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
	1060	assert stream is not None
	1061	filename = self.undo_temp_name(tmpfilename)
	1062	self.report_destination(filename)
	1063	except (OSError, IOError), err:
	1064	self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
	1065	return False
	1066	try:
	1067	stream.write(data_block)
	1068	except (IOError, OSError), err:
	1069	self.trouble(u'\nERROR: unable to write data: %s' % str(err))
	1070	return False
	1071	block_size = self.best_block_size(after - before, len(data_block))
	1072
	1073	# Progress message
	1074	speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
	1075	if data_len is None:
	1076	self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
	1077	else:
	1078	percent_str = self.calc_percent(byte_counter, data_len)
	1079	eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
	1080	self.report_progress(percent_str, data_len_str, speed_str, eta_str)
	1081
	1082	# Apply rate limit
	1083	self.slow_down(start, byte_counter - resume_len)
	1084
	1085	if stream is None:
	1086	self.trouble(u'\nERROR: Did not get any data blocks')
	1087	return False
	1088	stream.close()
	1089	self.report_finish()
	1090	if data_len is not None and byte_counter != data_len:
	1091	raise ContentTooShortError(byte_counter, long(data_len))
	1092	self.try_rename(tmpfilename, filename)
	1093
	1094	# Update file modification time
	1095	if self.params.get('updatetime', True):
	1096	info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
	1097
	1098	return True
	1099
	1100
	1101	class InfoExtractor(object):
	1102	"""Information Extractor class.
	1103
	1104	Information extractors are the classes that, given a URL, extract
	1105	information from the video (or videos) the URL refers to. This
	1106	information includes the real video URL, the video title and simplified
	1107	title, author and others. The information is stored in a dictionary
	1108	which is then passed to the FileDownloader. The FileDownloader
	1109	processes this information possibly downloading the video to the file
	1110	system, among other possible outcomes. The dictionaries must include
	1111	the following fields:
	1112
	1113	id: Video identifier.
	1114	url: Final video URL.
	1115	uploader: Nickname of the video uploader.
	1116	title: Literal title.
	1117	stitle: Simplified title.
	1118	ext: Video filename extension.
	1119	format: Video format.
	1120	player_url: SWF Player URL (may be None).
	1121
	1122	The following fields are optional. Their primary purpose is to allow
	1123	youtube-dl to serve as the backend for a video search function, such
	1124	as the one in youtube2mp3. They are only used when their respective
	1125	forced printing functions are called:
	1126
	1127	thumbnail: Full URL to a video thumbnail image.
	1128	description: One-line video description.
	1129
	1130	Subclasses of this one should re-define the _real_initialize() and
	1131	_real_extract() methods and define a _VALID_URL regexp.
	1132	Probably, they should also be added to the list of extractors.
	1133	"""
	1134
	1135	_ready = False
	1136	_downloader = None
	1137
	1138	def __init__(self, downloader=None):
	1139	"""Constructor. Receives an optional downloader."""
	1140	self._ready = False
	1141	self.set_downloader(downloader)
	1142
	1143	def suitable(self, url):
	1144	"""Receives a URL and returns True if suitable for this IE."""
	1145	return re.match(self._VALID_URL, url) is not None
	1146
	1147	def initialize(self):
	1148	"""Initializes an instance (authentication, etc)."""
	1149	if not self._ready:
	1150	self._real_initialize()
	1151	self._ready = True
	1152
	1153	def extract(self, url):
	1154	"""Extracts URL information and returns it in list of dicts."""
	1155	self.initialize()
	1156	return self._real_extract(url)
	1157
	1158	def set_downloader(self, downloader):
	1159	"""Sets the downloader for this IE."""
	1160	self._downloader = downloader
	1161
	1162	def _real_initialize(self):
	1163	"""Real initialization process. Redefine in subclasses."""
	1164	pass
	1165
	1166	def _real_extract(self, url):
	1167	"""Real extraction process. Redefine in subclasses."""
	1168	pass
	1169
	1170
	1171	class YoutubeIE(InfoExtractor):
	1172	"""Information extractor for youtube.com."""
	1173
	1174	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/\|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list\|my_playlists\|artist\|playlist)(?:(?:(?:v\|embed\|e)/)\|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?\|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
	1175	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
	1176	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
	1177	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
	1178	_NETRC_MACHINE = 'youtube'
	1179	# Listed in order of quality
	1180	_available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
	1181	_available_formats_prefer_free = ['38', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
	1182	_video_extensions = {
	1183	'13': '3gp',
	1184	'17': 'mp4',
	1185	'18': 'mp4',
	1186	'22': 'mp4',
	1187	'37': 'mp4',
	1188	'38': 'video', # You actually don't know if this will be MOV, AVI or whatever
	1189	'43': 'webm',
	1190	'44': 'webm',
	1191	'45': 'webm',
	1192	}
	1193	_video_dimensions = {
	1194	'5': '240x400',
	1195	'6': '???',
	1196	'13': '???',
	1197	'17': '144x176',
	1198	'18': '360x640',
	1199	'22': '720x1280',
	1200	'34': '360x640',
	1201	'35': '480x854',
	1202	'37': '1080x1920',
	1203	'38': '3072x4096',
	1204	'43': '360x640',
	1205	'44': '480x854',
	1206	'45': '720x1280',
	1207	}
	1208	IE_NAME = u'youtube'
	1209
	1210	def report_lang(self):
	1211	"""Report attempt to set language."""
	1212	self._downloader.to_screen(u'[youtube] Setting language')
	1213
	1214	def report_login(self):
	1215	"""Report attempt to log in."""
	1216	self._downloader.to_screen(u'[youtube] Logging in')
	1217
	1218	def report_age_confirmation(self):
	1219	"""Report attempt to confirm age."""
	1220	self._downloader.to_screen(u'[youtube] Confirming age')
	1221
	1222	def report_video_webpage_download(self, video_id):
	1223	"""Report attempt to download video webpage."""
	1224	self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
	1225
	1226	def report_video_info_webpage_download(self, video_id):
	1227	"""Report attempt to download video info webpage."""
	1228	self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
	1229
	1230	def report_video_subtitles_download(self, video_id):
	1231	"""Report attempt to download video info webpage."""
	1232	self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id)
	1233
	1234	def report_information_extraction(self, video_id):
	1235	"""Report attempt to extract video information."""
	1236	self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
	1237
	1238	def report_unavailable_format(self, video_id, format):
	1239	"""Report extracted video URL."""
	1240	self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
	1241
	1242	def report_rtmp_download(self):
	1243	"""Indicate the download will use the RTMP protocol."""
	1244	self._downloader.to_screen(u'[youtube] RTMP download detected')
	1245
	1246	def _closed_captions_xml_to_srt(self, xml_string):
	1247	srt = ''
	1248	texts = re.findall(r'<text start="([\d\.]+)"( dur="([\d\.]+)")?>([^<]+)</text>', xml_string, re.MULTILINE)
	1249	# TODO parse xml instead of regex
	1250	for n, (start, dur_tag, dur, caption) in enumerate(texts):
	1251	if not dur: dur = '4'
	1252	start = float(start)
	1253	end = start + float(dur)
	1254	start = "%02i:%02i:%02i,%03i" %(start/(6060), start/60%60, start%60, start%11000)
	1255	end = "%02i:%02i:%02i,%03i" %(end/(6060), end/60%60, end%60, end%11000)
	1256	caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption)
	1257	caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption) # double cycle, inentional
	1258	srt += str(n) + '\n'
	1259	srt += start + ' --> ' + end + '\n'
	1260	srt += caption + '\n\n'
	1261	return srt
	1262
	1263	def _print_formats(self, formats):
	1264	print 'Available formats:'
	1265	for x in formats:
	1266	print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
	1267
	1268	def _real_initialize(self):
	1269	if self._downloader is None:
	1270	return
	1271
	1272	username = None
	1273	password = None
	1274	downloader_params = self._downloader.params
	1275
	1276	# Attempt to use provided username and password or .netrc data
	1277	if downloader_params.get('username', None) is not None:
	1278	username = downloader_params['username']
	1279	password = downloader_params['password']
	1280	elif downloader_params.get('usenetrc', False):
	1281	try:
	1282	info = netrc.netrc().authenticators(self._NETRC_MACHINE)
	1283	if info is not None:
	1284	username = info[0]
	1285	password = info[2]
	1286	else:
	1287	raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
	1288	except (IOError, netrc.NetrcParseError), err:
	1289	self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
	1290	return
	1291
	1292	# Set language
	1293	request = urllib2.Request(self._LANG_URL)
	1294	try:
	1295	self.report_lang()
	1296	urllib2.urlopen(request).read()
	1297	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1298	self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
	1299	return
	1300
	1301	# No authentication to be performed
	1302	if username is None:
	1303	return
	1304
	1305	# Log in
	1306	login_form = {
	1307	'current_form': 'loginForm',
	1308	'next': '/',
	1309	'action_login': 'Log In',
	1310	'username': username,
	1311	'password': password,
	1312	}
	1313	request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
	1314	try:
	1315	self.report_login()
	1316	login_results = urllib2.urlopen(request).read()
	1317	if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
	1318	self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
	1319	return
	1320	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1321	self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
	1322	return
	1323
	1324	# Confirm age
	1325	age_form = {
	1326	'next_url': '/',
	1327	'action_confirm': 'Confirm',
	1328	}
	1329	request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
	1330	try:
	1331	self.report_age_confirmation()
	1332	age_results = urllib2.urlopen(request).read()
	1333	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1334	self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
	1335	return
	1336
	1337	def _real_extract(self, url):
	1338	# Extract video id from URL
	1339	mobj = re.match(self._VALID_URL, url)
	1340	if mobj is None:
	1341	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1342	return
	1343	video_id = mobj.group(2)
	1344
	1345	# Get video webpage
	1346	self.report_video_webpage_download(video_id)
	1347	request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
	1348	try:
	1349	video_webpage = urllib2.urlopen(request).read()
	1350	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1351	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	1352	return
	1353
	1354	# Attempt to extract SWF player URL
	1355	mobj = re.search(r'swfConfig.?"(http:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	1356	if mobj is not None:
	1357	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	1358	else:
	1359	player_url = None
	1360
	1361	# Get video info
	1362	self.report_video_info_webpage_download(video_id)
	1363	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	1364	video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	1365	% (video_id, el_type))
	1366	request = urllib2.Request(video_info_url)
	1367	try:
	1368	video_info_webpage = urllib2.urlopen(request).read()
	1369	video_info = parse_qs(video_info_webpage)
	1370	if 'token' in video_info:
	1371	break
	1372	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1373	self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
	1374	return
	1375	if 'token' not in video_info:
	1376	if 'reason' in video_info:
	1377	self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
	1378	else:
	1379	self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
	1380	return
	1381
	1382	# Start extracting information
	1383	self.report_information_extraction(video_id)
	1384
	1385	# uploader
	1386	if 'author' not in video_info:
	1387	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1388	return
	1389	video_uploader = urllib.unquote_plus(video_info['author'][0])
	1390
	1391	# title
	1392	if 'title' not in video_info:
	1393	self._downloader.trouble(u'ERROR: unable to extract video title')
	1394	return
	1395	video_title = urllib.unquote_plus(video_info['title'][0])
	1396	video_title = video_title.decode('utf-8')
	1397	video_title = sanitize_title(video_title)
	1398
	1399	# simplified title
	1400	simple_title = _simplify_title(video_title)
	1401
	1402	# thumbnail image
	1403	if 'thumbnail_url' not in video_info:
	1404	self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
	1405	video_thumbnail = ''
	1406	else: # don't panic if we can't find it
	1407	video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
	1408
	1409	# upload date
	1410	upload_date = u'NA'
	1411	mobj = re.search(r'id="eow-date.?>(.?)</span>', video_webpage, re.DOTALL)
	1412	if mobj is not None:
	1413	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	1414	format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
	1415	for expression in format_expressions:
	1416	try:
	1417	upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
	1418	except:
	1419	pass
	1420
	1421	# description
	1422	try:
	1423	lxml.etree
	1424	except NameError:
	1425	video_description = u'No description available.'
	1426	mobj = re.search(r'<meta name="description" content="(.*?)">', video_webpage)
	1427	if mobj is not None:
	1428	video_description = mobj.group(1).decode('utf-8')
	1429	else:
	1430	html_parser = lxml.etree.HTMLParser(encoding='utf-8')
	1431	vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
	1432	video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
	1433	# TODO use another parser
	1434
	1435	# closed captions
	1436	video_subtitles = None
	1437	if self._downloader.params.get('writesubtitles', False):
	1438	self.report_video_subtitles_download(video_id)
	1439	request = urllib2.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
	1440	try:
	1441	srt_list = urllib2.urlopen(request).read()
	1442	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1443	self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
	1444	else:
	1445	srt_lang_list = re.findall(r'lang_code="([\w\-]+)"', srt_list)
	1446	if srt_lang_list:
	1447	if self._downloader.params.get('subtitleslang', False):
	1448	srt_lang = self._downloader.params.get('subtitleslang')
	1449	elif 'en' in srt_lang_list:
	1450	srt_lang = 'en'
	1451	else:
	1452	srt_lang = srt_lang_list[0]
	1453	if not srt_lang in srt_lang_list:
	1454	self._downloader.trouble(u'WARNING: no closed captions found in the specified language')
	1455	else:
	1456	request = urllib2.Request('http://video.google.com/timedtext?hl=en&lang=%s&v=%s' % (srt_lang, video_id))
	1457	try:
	1458	srt_xml = urllib2.urlopen(request).read()
	1459	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1460	self._downloader.trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
	1461	else:
	1462	video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
	1463	else:
	1464	self._downloader.trouble(u'WARNING: video has no closed captions')
	1465
	1466	# token
	1467	video_token = urllib.unquote_plus(video_info['token'][0])
	1468
	1469	# Decide which formats to download
	1470	req_format = self._downloader.params.get('format', None)
	1471
	1472	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1473	self.report_rtmp_download()
	1474	video_url_list = [(None, video_info['conn'][0])]
	1475	elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
	1476	url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
	1477	url_data = [parse_qs(uds) for uds in url_data_strs]
	1478	url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
	1479	url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
	1480
	1481	format_limit = self._downloader.params.get('format_limit', None)
	1482	available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
	1483	if format_limit is not None and format_limit in available_formats:
	1484	format_list = available_formats[available_formats.index(format_limit):]
	1485	else:
	1486	format_list = available_formats
	1487	existing_formats = [x for x in format_list if x in url_map]
	1488	if len(existing_formats) == 0:
	1489	self._downloader.trouble(u'ERROR: no known formats available for video')
	1490	return
	1491	if self._downloader.params.get('listformats', None):
	1492	self._print_formats(existing_formats)
	1493	return
	1494	if req_format is None or req_format == 'best':
	1495	video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
	1496	elif req_format == 'worst':
	1497	video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
	1498	elif req_format in ('-1', 'all'):
	1499	video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
	1500	else:
	1501	# Specific formats. We pick the first in a slash-delimeted sequence.
	1502	# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
	1503	req_formats = req_format.split('/')
	1504	video_url_list = None
	1505	for rf in req_formats:
	1506	if rf in url_map:
	1507	video_url_list = [(rf, url_map[rf])]
	1508	break
	1509	if video_url_list is None:
	1510	self._downloader.trouble(u'ERROR: requested format not available')
	1511	return
	1512	else:
	1513	self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
	1514	return
	1515
	1516	for format_param, video_real_url in video_url_list:
	1517	# At this point we have a new video
	1518	self._downloader.increment_downloads()
	1519
	1520	# Extension
	1521	video_extension = self._video_extensions.get(format_param, 'flv')
	1522
	1523	try:
	1524	# Process video information
	1525	self._downloader.process_info({
	1526	'id': video_id.decode('utf-8'),
	1527	'url': video_real_url.decode('utf-8'),
	1528	'uploader': video_uploader.decode('utf-8'),
	1529	'upload_date': upload_date,
	1530	'title': video_title,
	1531	'stitle': simple_title,
	1532	'ext': video_extension.decode('utf-8'),
	1533	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	1534	'thumbnail': video_thumbnail.decode('utf-8'),
	1535	'description': video_description,
	1536	'player_url': player_url,
	1537	'subtitles': video_subtitles
	1538	})
	1539	except UnavailableVideoError, err:
	1540	self._downloader.trouble(u'\nERROR: unable to download video')
	1541
	1542
	1543	class MetacafeIE(InfoExtractor):
	1544	"""Information Extractor for metacafe.com."""
	1545
	1546	_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
	1547	_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
	1548	_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
	1549	_youtube_ie = None
	1550	IE_NAME = u'metacafe'
	1551
	1552	def __init__(self, youtube_ie, downloader=None):
	1553	InfoExtractor.__init__(self, downloader)
	1554	self._youtube_ie = youtube_ie
	1555
	1556	def report_disclaimer(self):
	1557	"""Report disclaimer retrieval."""
	1558	self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
	1559
	1560	def report_age_confirmation(self):
	1561	"""Report attempt to confirm age."""
	1562	self._downloader.to_screen(u'[metacafe] Confirming age')
	1563
	1564	def report_download_webpage(self, video_id):
	1565	"""Report webpage download."""
	1566	self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
	1567
	1568	def report_extraction(self, video_id):
	1569	"""Report information extraction."""
	1570	self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
	1571
	1572	def _real_initialize(self):
	1573	# Retrieve disclaimer
	1574	request = urllib2.Request(self._DISCLAIMER)
	1575	try:
	1576	self.report_disclaimer()
	1577	disclaimer = urllib2.urlopen(request).read()
	1578	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1579	self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
	1580	return
	1581
	1582	# Confirm age
	1583	disclaimer_form = {
	1584	'filters': '0',
	1585	'submit': "Continue - I'm over 18",
	1586	}
	1587	request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
	1588	try:
	1589	self.report_age_confirmation()
	1590	disclaimer = urllib2.urlopen(request).read()
	1591	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1592	self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
	1593	return
	1594
	1595	def _real_extract(self, url):
	1596	# Extract id and simplified title from URL
	1597	mobj = re.match(self._VALID_URL, url)
	1598	if mobj is None:
	1599	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1600	return
	1601
	1602	video_id = mobj.group(1)
	1603
	1604	# Check if video comes from YouTube
	1605	mobj2 = re.match(r'^yt-(.*)$', video_id)
	1606	if mobj2 is not None:
	1607	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
	1608	return
	1609
	1610	# At this point we have a new video
	1611	self._downloader.increment_downloads()
	1612
	1613	simple_title = mobj.group(2).decode('utf-8')
	1614
	1615	# Retrieve video webpage to extract further information
	1616	request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
	1617	try:
	1618	self.report_download_webpage(video_id)
	1619	webpage = urllib2.urlopen(request).read()
	1620	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1621	self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
	1622	return
	1623
	1624	# Extract URL, uploader and title from webpage
	1625	self.report_extraction(video_id)
	1626	mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
	1627	if mobj is not None:
	1628	mediaURL = urllib.unquote(mobj.group(1))
	1629	video_extension = mediaURL[-3:]
	1630
	1631	# Extract gdaKey if available
	1632	mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
	1633	if mobj is None:
	1634	video_url = mediaURL
	1635	else:
	1636	gdaKey = mobj.group(1)
	1637	video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
	1638	else:
	1639	mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
	1640	if mobj is None:
	1641	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1642	return
	1643	vardict = parse_qs(mobj.group(1))
	1644	if 'mediaData' not in vardict:
	1645	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1646	return
	1647	mobj = re.search(r'"mediaURL":"(http.?)","key":"(.?)"', vardict['mediaData'][0])
	1648	if mobj is None:
	1649	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1650	return
	1651	mediaURL = mobj.group(1).replace('\\/', '/')
	1652	video_extension = mediaURL[-3:]
	1653	video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
	1654
	1655	mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
	1656	if mobj is None:
	1657	self._downloader.trouble(u'ERROR: unable to extract title')
	1658	return
	1659	video_title = mobj.group(1).decode('utf-8')
	1660	video_title = sanitize_title(video_title)
	1661
	1662	mobj = re.search(r'(?ms)By:\s<a .?>(.+?)<', webpage)
	1663	if mobj is None:
	1664	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1665	return
	1666	video_uploader = mobj.group(1)
	1667
	1668	try:
	1669	# Process video information
	1670	self._downloader.process_info({
	1671	'id': video_id.decode('utf-8'),
	1672	'url': video_url.decode('utf-8'),
	1673	'uploader': video_uploader.decode('utf-8'),
	1674	'upload_date': u'NA',
	1675	'title': video_title,
	1676	'stitle': simple_title,
	1677	'ext': video_extension.decode('utf-8'),
	1678	'format': u'NA',
	1679	'player_url': None,
	1680	})
	1681	except UnavailableVideoError:
	1682	self._downloader.trouble(u'\nERROR: unable to download video')
	1683
	1684
	1685	class DailymotionIE(InfoExtractor):
	1686	"""Information Extractor for Dailymotion"""
	1687
	1688	_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
	1689	IE_NAME = u'dailymotion'
	1690
	1691	def __init__(self, downloader=None):
	1692	InfoExtractor.__init__(self, downloader)
	1693
	1694	def report_download_webpage(self, video_id):
	1695	"""Report webpage download."""
	1696	self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
	1697
	1698	def report_extraction(self, video_id):
	1699	"""Report information extraction."""
	1700	self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
	1701
	1702	def _real_extract(self, url):
	1703	# Extract id and simplified title from URL
	1704	mobj = re.match(self._VALID_URL, url)
	1705	if mobj is None:
	1706	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1707	return
	1708
	1709	# At this point we have a new video
	1710	self._downloader.increment_downloads()
	1711	video_id = mobj.group(1)
	1712
	1713	video_extension = 'flv'
	1714
	1715	# Retrieve video webpage to extract further information
	1716	request = urllib2.Request(url)
	1717	request.add_header('Cookie', 'family_filter=off')
	1718	try:
	1719	self.report_download_webpage(video_id)
	1720	webpage = urllib2.urlopen(request).read()
	1721	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1722	self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
	1723	return
	1724
	1725	# Extract URL, uploader and title from webpage
	1726	self.report_extraction(video_id)
	1727	mobj = re.search(r'(?i)addVariable$\"sequence\"\s,\s\"([^\"]+?)\"$', webpage)
	1728	if mobj is None:
	1729	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1730	return
	1731	sequence = urllib.unquote(mobj.group(1))
	1732	mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
	1733	if mobj is None:
	1734	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1735	return
	1736	mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
	1737
	1738	# if needed add http://www.dailymotion.com/ if relative URL
	1739
	1740	video_url = mediaURL
	1741
	1742	mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
	1743	if mobj is None:
	1744	self._downloader.trouble(u'ERROR: unable to extract title')
	1745	return
	1746	video_title = _unescapeHTML(mobj.group('title').decode('utf-8'))
	1747	video_title = sanitize_title(video_title)
	1748	simple_title = _simplify_title(video_title)
	1749
	1750	mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
	1751	if mobj is None:
	1752	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1753	return
	1754	video_uploader = mobj.group(1)
	1755
	1756	try:
	1757	# Process video information
	1758	self._downloader.process_info({
	1759	'id': video_id.decode('utf-8'),
	1760	'url': video_url.decode('utf-8'),
	1761	'uploader': video_uploader.decode('utf-8'),
	1762	'upload_date': u'NA',
	1763	'title': video_title,
	1764	'stitle': simple_title,
	1765	'ext': video_extension.decode('utf-8'),
	1766	'format': u'NA',
	1767	'player_url': None,
	1768	})
	1769	except UnavailableVideoError:
	1770	self._downloader.trouble(u'\nERROR: unable to download video')
	1771
	1772
	1773	class GoogleIE(InfoExtractor):
	1774	"""Information extractor for video.google.com."""
	1775
	1776	_VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?\|co\.(?:uk\|jp\|kr\|cr)\|ca\|de\|es\|fr\|it\|nl\|pl)/videoplay\?docid=([^\&]+).*'
	1777	IE_NAME = u'video.google'
	1778
	1779	def __init__(self, downloader=None):
	1780	InfoExtractor.__init__(self, downloader)
	1781
	1782	def report_download_webpage(self, video_id):
	1783	"""Report webpage download."""
	1784	self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
	1785
	1786	def report_extraction(self, video_id):
	1787	"""Report information extraction."""
	1788	self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
	1789
	1790	def _real_extract(self, url):
	1791	# Extract id from URL
	1792	mobj = re.match(self._VALID_URL, url)
	1793	if mobj is None:
	1794	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1795	return
	1796
	1797	# At this point we have a new video
	1798	self._downloader.increment_downloads()
	1799	video_id = mobj.group(1)
	1800
	1801	video_extension = 'mp4'
	1802
	1803	# Retrieve video webpage to extract further information
	1804	request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
	1805	try:
	1806	self.report_download_webpage(video_id)
	1807	webpage = urllib2.urlopen(request).read()
	1808	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1809	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1810	return
	1811
	1812	# Extract URL, uploader, and title from webpage
	1813	self.report_extraction(video_id)
	1814	mobj = re.search(r"download_url:'([^']+)'", webpage)
	1815	if mobj is None:
	1816	video_extension = 'flv'
	1817	mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
	1818	if mobj is None:
	1819	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1820	return
	1821	mediaURL = urllib.unquote(mobj.group(1))
	1822	mediaURL = mediaURL.replace('\\x3d', '\x3d')
	1823	mediaURL = mediaURL.replace('\\x26', '\x26')
	1824
	1825	video_url = mediaURL
	1826
	1827	mobj = re.search(r'<title>(.*)</title>', webpage)
	1828	if mobj is None:
	1829	self._downloader.trouble(u'ERROR: unable to extract title')
	1830	return
	1831	video_title = mobj.group(1).decode('utf-8')
	1832	video_title = sanitize_title(video_title)
	1833	simple_title = _simplify_title(video_title)
	1834
	1835	# Extract video description
	1836	mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
	1837	if mobj is None:
	1838	self._downloader.trouble(u'ERROR: unable to extract video description')
	1839	return
	1840	video_description = mobj.group(1).decode('utf-8')
	1841	if not video_description:
	1842	video_description = 'No description available.'
	1843
	1844	# Extract video thumbnail
	1845	if self._downloader.params.get('forcethumbnail', False):
	1846	request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
	1847	try:
	1848	webpage = urllib2.urlopen(request).read()
	1849	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1850	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1851	return
	1852	mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
	1853	if mobj is None:
	1854	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	1855	return
	1856	video_thumbnail = mobj.group(1)
	1857	else: # we need something to pass to process_info
	1858	video_thumbnail = ''
	1859
	1860	try:
	1861	# Process video information
	1862	self._downloader.process_info({
	1863	'id': video_id.decode('utf-8'),
	1864	'url': video_url.decode('utf-8'),
	1865	'uploader': u'NA',
	1866	'upload_date': u'NA',
	1867	'title': video_title,
	1868	'stitle': simple_title,
	1869	'ext': video_extension.decode('utf-8'),
	1870	'format': u'NA',
	1871	'player_url': None,
	1872	})
	1873	except UnavailableVideoError:
	1874	self._downloader.trouble(u'\nERROR: unable to download video')
	1875
	1876
	1877	class PhotobucketIE(InfoExtractor):
	1878	"""Information extractor for photobucket.com."""
	1879
	1880	_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.[\?\&]current=(.\.flv)'
	1881	IE_NAME = u'photobucket'
	1882
	1883	def __init__(self, downloader=None):
	1884	InfoExtractor.__init__(self, downloader)
	1885
	1886	def report_download_webpage(self, video_id):
	1887	"""Report webpage download."""
	1888	self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
	1889
	1890	def report_extraction(self, video_id):
	1891	"""Report information extraction."""
	1892	self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
	1893
	1894	def _real_extract(self, url):
	1895	# Extract id from URL
	1896	mobj = re.match(self._VALID_URL, url)
	1897	if mobj is None:
	1898	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1899	return
	1900
	1901	# At this point we have a new video
	1902	self._downloader.increment_downloads()
	1903	video_id = mobj.group(1)
	1904
	1905	video_extension = 'flv'
	1906
	1907	# Retrieve video webpage to extract further information
	1908	request = urllib2.Request(url)
	1909	try:
	1910	self.report_download_webpage(video_id)
	1911	webpage = urllib2.urlopen(request).read()
	1912	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1913	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1914	return
	1915
	1916	# Extract URL, uploader, and title from webpage
	1917	self.report_extraction(video_id)
	1918	mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
	1919	if mobj is None:
	1920	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1921	return
	1922	mediaURL = urllib.unquote(mobj.group(1))
	1923
	1924	video_url = mediaURL
	1925
	1926	mobj = re.search(r'<title>(.) video by (.) - Photobucket</title>', webpage)
	1927	if mobj is None:
	1928	self._downloader.trouble(u'ERROR: unable to extract title')
	1929	return
	1930	video_title = mobj.group(1).decode('utf-8')
	1931	video_title = sanitize_title(video_title)
	1932	simple_title = _simplify_title(vide_title)
	1933
	1934	video_uploader = mobj.group(2).decode('utf-8')
	1935
	1936	try:
	1937	# Process video information
	1938	self._downloader.process_info({
	1939	'id': video_id.decode('utf-8'),
	1940	'url': video_url.decode('utf-8'),
	1941	'uploader': video_uploader,
	1942	'upload_date': u'NA',
	1943	'title': video_title,
	1944	'stitle': simple_title,
	1945	'ext': video_extension.decode('utf-8'),
	1946	'format': u'NA',
	1947	'player_url': None,
	1948	})
	1949	except UnavailableVideoError:
	1950	self._downloader.trouble(u'\nERROR: unable to download video')
	1951
	1952
	1953	class YahooIE(InfoExtractor):
	1954	"""Information extractor for video.yahoo.com."""
	1955
	1956	# _VALID_URL matches all Yahoo! Video URLs
	1957	# _VPAGE_URL matches only the extractable '/watch/' URLs
	1958	_VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch\|network)/([0-9]+)(?:/\|\?v=)([0-9]+)(?:[#\?].*)?'
	1959	_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
	1960	IE_NAME = u'video.yahoo'
	1961
	1962	def __init__(self, downloader=None):
	1963	InfoExtractor.__init__(self, downloader)
	1964
	1965	def report_download_webpage(self, video_id):
	1966	"""Report webpage download."""
	1967	self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
	1968
	1969	def report_extraction(self, video_id):
	1970	"""Report information extraction."""
	1971	self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
	1972
	1973	def _real_extract(self, url, new_video=True):
	1974	# Extract ID from URL
	1975	mobj = re.match(self._VALID_URL, url)
	1976	if mobj is None:
	1977	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1978	return
	1979
	1980	# At this point we have a new video
	1981	self._downloader.increment_downloads()
	1982	video_id = mobj.group(2)
	1983	video_extension = 'flv'
	1984
	1985	# Rewrite valid but non-extractable URLs as
	1986	# extractable English language /watch/ URLs
	1987	if re.match(self._VPAGE_URL, url) is None:
	1988	request = urllib2.Request(url)
	1989	try:
	1990	webpage = urllib2.urlopen(request).read()
	1991	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1992	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1993	return
	1994
	1995	mobj = re.search(r'$"id", "([0-9]+)"$;', webpage)
	1996	if mobj is None:
	1997	self._downloader.trouble(u'ERROR: Unable to extract id field')
	1998	return
	1999	yahoo_id = mobj.group(1)
	2000
	2001	mobj = re.search(r'$"vid", "([0-9]+)"$;', webpage)
	2002	if mobj is None:
	2003	self._downloader.trouble(u'ERROR: Unable to extract vid field')
	2004	return
	2005	yahoo_vid = mobj.group(1)
	2006
	2007	url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
	2008	return self._real_extract(url, new_video=False)
	2009
	2010	# Retrieve video webpage to extract further information
	2011	request = urllib2.Request(url)
	2012	try:
	2013	self.report_download_webpage(video_id)
	2014	webpage = urllib2.urlopen(request).read()
	2015	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2016	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	2017	return
	2018
	2019	# Extract uploader and title from webpage
	2020	self.report_extraction(video_id)
	2021	mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
	2022	if mobj is None:
	2023	self._downloader.trouble(u'ERROR: unable to extract video title')
	2024	return
	2025	video_title = mobj.group(1).decode('utf-8')
	2026	simple_title = _simplify_title(video_title)
	2027
	2028	mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people\|profile)/[0-9]+" beacon=".">(.)</a></h2>', webpage)
	2029	if mobj is None:
	2030	self._downloader.trouble(u'ERROR: unable to extract video uploader')
	2031	return
	2032	video_uploader = mobj.group(1).decode('utf-8')
	2033
	2034	# Extract video thumbnail
	2035	mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
	2036	if mobj is None:
	2037	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	2038	return
	2039	video_thumbnail = mobj.group(1).decode('utf-8')
	2040
	2041	# Extract video description
	2042	mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
	2043	if mobj is None:
	2044	self._downloader.trouble(u'ERROR: unable to extract video description')
	2045	return
	2046	video_description = mobj.group(1).decode('utf-8')
	2047	if not video_description:
	2048	video_description = 'No description available.'
	2049
	2050	# Extract video height and width
	2051	mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
	2052	if mobj is None:
	2053	self._downloader.trouble(u'ERROR: unable to extract video height')
	2054	return
	2055	yv_video_height = mobj.group(1)
	2056
	2057	mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
	2058	if mobj is None:
	2059	self._downloader.trouble(u'ERROR: unable to extract video width')
	2060	return
	2061	yv_video_width = mobj.group(1)
	2062
	2063	# Retrieve video playlist to extract media URL
	2064	# I'm not completely sure what all these options are, but we
	2065	# seem to need most of them, otherwise the server sends a 401.
	2066	yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
	2067	yv_bitrate = '700' # according to Wikipedia this is hard-coded
	2068	request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
	2069	'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
	2070	'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
	2071	try:
	2072	self.report_download_webpage(video_id)
	2073	webpage = urllib2.urlopen(request).read()
	2074	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2075	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	2076	return
	2077
	2078	# Extract media URL from playlist XML
	2079	mobj = re.search(r'<STREAM APP="(http://.)" FULLPATH="/?(/.\.flv\?[^"]*)"', webpage)
	2080	if mobj is None:
	2081	self._downloader.trouble(u'ERROR: Unable to extract media URL')
	2082	return
	2083	video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
	2084	video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
	2085
	2086	try:
	2087	# Process video information
	2088	self._downloader.process_info({
	2089	'id': video_id.decode('utf-8'),
	2090	'url': video_url,
	2091	'uploader': video_uploader,
	2092	'upload_date': u'NA',
	2093	'title': video_title,
	2094	'stitle': simple_title,
	2095	'ext': video_extension.decode('utf-8'),
	2096	'thumbnail': video_thumbnail.decode('utf-8'),
	2097	'description': video_description,
	2098	'thumbnail': video_thumbnail,
	2099	'player_url': None,
	2100	})
	2101	except UnavailableVideoError:
	2102	self._downloader.trouble(u'\nERROR: unable to download video')
	2103
	2104
	2105	class VimeoIE(InfoExtractor):
	2106	"""Information extractor for vimeo.com."""
	2107
	2108	# _VALID_URL matches Vimeo URLs
	2109	_VALID_URL = r'(?:https?://)?(?:(?:www\|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
	2110	IE_NAME = u'vimeo'
	2111
	2112	def __init__(self, downloader=None):
	2113	InfoExtractor.__init__(self, downloader)
	2114
	2115	def report_download_webpage(self, video_id):
	2116	"""Report webpage download."""
	2117	self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
	2118
	2119	def report_extraction(self, video_id):
	2120	"""Report information extraction."""
	2121	self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
	2122
	2123	def _real_extract(self, url, new_video=True):
	2124	# Extract ID from URL
	2125	mobj = re.match(self._VALID_URL, url)
	2126	if mobj is None:
	2127	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2128	return
	2129
	2130	# At this point we have a new video
	2131	self._downloader.increment_downloads()
	2132	video_id = mobj.group(1)
	2133
	2134	# Retrieve video webpage to extract further information
	2135	request = urllib2.Request(url, None, std_headers)
	2136	try:
	2137	self.report_download_webpage(video_id)
	2138	webpage = urllib2.urlopen(request).read()
	2139	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2140	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	2141	return
	2142
	2143	# Now we begin extracting as much information as we can from what we
	2144	# retrieved. First we extract the information common to all extractors,
	2145	# and latter we extract those that are Vimeo specific.
	2146	self.report_extraction(video_id)
	2147
	2148	# Extract the config JSON
	2149	config = webpage.split(' = {config:')[1].split(',assets:')[0]
	2150	try:
	2151	config = json.loads(config)
	2152	except:
	2153	self._downloader.trouble(u'ERROR: unable to extract info section')
	2154	return
	2155
	2156	# Extract title
	2157	video_title = config["video"]["title"]
	2158	simple_title = _simplify_title(video_title)
	2159
	2160	# Extract uploader
	2161	video_uploader = config["video"]["owner"]["name"]
	2162
	2163	# Extract video thumbnail
	2164	video_thumbnail = config["video"]["thumbnail"]
	2165
	2166	# Extract video description
	2167	try:
	2168	lxml.etree
	2169	except NameError:
	2170	video_description = u'No description available.'
	2171	mobj = re.search(r'<meta name="description" content="(.*?)" />', webpage, re.MULTILINE)
	2172	if mobj is not None:
	2173	video_description = mobj.group(1)
	2174	else:
	2175	html_parser = lxml.etree.HTMLParser()
	2176	vwebpage_doc = lxml.etree.parse(StringIO.StringIO(webpage), html_parser)
	2177	video_description = u''.join(vwebpage_doc.xpath('id("description")//text()')).strip()
	2178	# TODO use another parser
	2179
	2180	# Extract upload date
	2181	video_upload_date = u'NA'
	2182	mobj = re.search(r'<span id="clip-date" style="display:none">[^:]: (.?)( $[^\(]*$)?</span>', webpage)
	2183	if mobj is not None:
	2184	video_upload_date = mobj.group(1)
	2185
	2186	# Vimeo specific: extract request signature and timestamp
	2187	sig = config['request']['signature']
	2188	timestamp = config['request']['timestamp']
	2189
	2190	# Vimeo specific: extract video codec and quality information
	2191	# TODO bind to format param
	2192	codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
	2193	for codec in codecs:
	2194	if codec[0] in config["video"]["files"]:
	2195	video_codec = codec[0]
	2196	video_extension = codec[1]
	2197	if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd'
	2198	else: quality = 'sd'
	2199	break
	2200	else:
	2201	self._downloader.trouble(u'ERROR: no known codec found')
	2202	return
	2203
	2204	video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
	2205	%(video_id, sig, timestamp, quality, video_codec.upper())
	2206
	2207	try:
	2208	# Process video information
	2209	self._downloader.process_info({
	2210	'id': video_id,
	2211	'url': video_url,
	2212	'uploader': video_uploader,
	2213	'upload_date': video_upload_date,
	2214	'title': video_title,
	2215	'stitle': simple_title,
	2216	'ext': video_extension,
	2217	'thumbnail': video_thumbnail,
	2218	'description': video_description,
	2219	'player_url': None,
	2220	})
	2221	except UnavailableVideoError:
	2222	self._downloader.trouble(u'ERROR: unable to download video')
	2223
	2224
	2225	class GenericIE(InfoExtractor):
	2226	"""Generic last-resort information extractor."""
	2227
	2228	_VALID_URL = r'.*'
	2229	IE_NAME = u'generic'
	2230
	2231	def __init__(self, downloader=None):
	2232	InfoExtractor.__init__(self, downloader)
	2233
	2234	def report_download_webpage(self, video_id):
	2235	"""Report webpage download."""
	2236	self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
	2237	self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
	2238
	2239	def report_extraction(self, video_id):
	2240	"""Report information extraction."""
	2241	self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
	2242
	2243	def _real_extract(self, url):
	2244	# At this point we have a new video
	2245	self._downloader.increment_downloads()
	2246
	2247	video_id = url.split('/')[-1]
	2248	request = urllib2.Request(url)
	2249	try:
	2250	self.report_download_webpage(video_id)
	2251	webpage = urllib2.urlopen(request).read()
	2252	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2253	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	2254	return
	2255	except ValueError, err:
	2256	# since this is the last-resort InfoExtractor, if
	2257	# this error is thrown, it'll be thrown here
	2258	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2259	return
	2260
	2261	self.report_extraction(video_id)
	2262	# Start with something easy: JW Player in SWFObject
	2263	mobj = re.search(r'flashvars: [\'"](?:.&)?file=(http[^\'"&])', webpage)
	2264	if mobj is None:
	2265	# Broaden the search a little bit
	2266	mobj = re.search(r'[^A-Za-z0-9]?(?:file\|source)=(http[^\'"&]*)', webpage)
	2267	if mobj is None:
	2268	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2269	return
	2270
	2271	# It's possible that one of the regexes
	2272	# matched, but returned an empty group:
	2273	if mobj.group(1) is None:
	2274	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2275	return
	2276
	2277	video_url = urllib.unquote(mobj.group(1))
	2278	video_id = os.path.basename(video_url)
	2279
	2280	# here's a fun little line of code for you:
	2281	video_extension = os.path.splitext(video_id)[1][1:]
	2282	video_id = os.path.splitext(video_id)[0]
	2283
	2284	# it's tempting to parse this further, but you would
	2285	# have to take into account all the variations like
	2286	# Video Title - Site Name
	2287	# Site Name \| Video Title
	2288	# Video Title - Tagline \| Site Name
	2289	# and so on and so forth; it's just not practical
	2290	mobj = re.search(r'<title>(.*)</title>', webpage)
	2291	if mobj is None:
	2292	self._downloader.trouble(u'ERROR: unable to extract title')
	2293	return
	2294	video_title = mobj.group(1).decode('utf-8')
	2295	video_title = sanitize_title(video_title)
	2296	simple_title = _simplify_title(video_title)
	2297
	2298	# video uploader is domain name
	2299	mobj = re.match(r'(?:https?://)?([^/])/.', url)
	2300	if mobj is None:
	2301	self._downloader.trouble(u'ERROR: unable to extract title')
	2302	return
	2303	video_uploader = mobj.group(1).decode('utf-8')
	2304
	2305	try:
	2306	# Process video information
	2307	self._downloader.process_info({
	2308	'id': video_id.decode('utf-8'),
	2309	'url': video_url.decode('utf-8'),
	2310	'uploader': video_uploader,
	2311	'upload_date': u'NA',
	2312	'title': video_title,
	2313	'stitle': simple_title,
	2314	'ext': video_extension.decode('utf-8'),
	2315	'format': u'NA',
	2316	'player_url': None,
	2317	})
	2318	except UnavailableVideoError, err:
	2319	self._downloader.trouble(u'\nERROR: unable to download video')
	2320
	2321
	2322	class YoutubeSearchIE(InfoExtractor):
	2323	"""Information Extractor for YouTube search queries."""
	2324	_VALID_URL = r'ytsearch(\d+\|all)?:[\s\S]+'
	2325	_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
	2326	_youtube_ie = None
	2327	_max_youtube_results = 1000
	2328	IE_NAME = u'youtube:search'
	2329
	2330	def __init__(self, youtube_ie, downloader=None):
	2331	InfoExtractor.__init__(self, downloader)
	2332	self._youtube_ie = youtube_ie
	2333
	2334	def report_download_page(self, query, pagenum):
	2335	"""Report attempt to download playlist page with given number."""
	2336	query = query.decode(preferredencoding())
	2337	self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
	2338
	2339	def _real_initialize(self):
	2340	self._youtube_ie.initialize()
	2341
	2342	def _real_extract(self, query):
	2343	mobj = re.match(self._VALID_URL, query)
	2344	if mobj is None:
	2345	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2346	return
	2347
	2348	prefix, query = query.split(':')
	2349	prefix = prefix[8:]
	2350	query = query.encode('utf-8')
	2351	if prefix == '':
	2352	self._download_n_results(query, 1)
	2353	return
	2354	elif prefix == 'all':
	2355	self._download_n_results(query, self._max_youtube_results)
	2356	return
	2357	else:
	2358	try:
	2359	n = long(prefix)
	2360	if n <= 0:
	2361	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2362	return
	2363	elif n > self._max_youtube_results:
	2364	self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
	2365	n = self._max_youtube_results
	2366	self._download_n_results(query, n)
	2367	return
	2368	except ValueError: # parsing prefix as integer fails
	2369	self._download_n_results(query, 1)
	2370	return
	2371
	2372	def _download_n_results(self, query, n):
	2373	"""Downloads a specified number of results for a query"""
	2374
	2375	video_ids = []
	2376	pagenum = 0
	2377	limit = n
	2378
	2379	while (50 * pagenum) < limit:
	2380	self.report_download_page(query, pagenum+1)
	2381	result_url = self._API_URL % (urllib.quote_plus(query), (50*pagenum)+1)
	2382	request = urllib2.Request(result_url)
	2383	try:
	2384	data = urllib2.urlopen(request).read()
	2385	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2386	self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err))
	2387	return
	2388	api_response = json.loads(data)['data']
	2389
	2390	new_ids = list(video['id'] for video in api_response['items'])
	2391	video_ids += new_ids
	2392
	2393	limit = min(n, api_response['totalItems'])
	2394	pagenum += 1
	2395
	2396	if len(video_ids) > n:
	2397	video_ids = video_ids[:n]
	2398	for id in video_ids:
	2399	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2400	return
	2401
	2402
	2403	class GoogleSearchIE(InfoExtractor):
	2404	"""Information Extractor for Google Video search queries."""
	2405	_VALID_URL = r'gvsearch(\d+\|all)?:[\s\S]+'
	2406	_TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
	2407	_VIDEO_INDICATOR = r'<a href="http://video\.google\.com/videoplay\?docid=([^"\&]+)'
	2408	_MORE_PAGES_INDICATOR = r'class="pn" id="pnnext"'
	2409	_google_ie = None
	2410	_max_google_results = 1000
	2411	IE_NAME = u'video.google:search'
	2412
	2413	def __init__(self, google_ie, downloader=None):
	2414	InfoExtractor.__init__(self, downloader)
	2415	self._google_ie = google_ie
	2416
	2417	def report_download_page(self, query, pagenum):
	2418	"""Report attempt to download playlist page with given number."""
	2419	query = query.decode(preferredencoding())
	2420	self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
	2421
	2422	def _real_initialize(self):
	2423	self._google_ie.initialize()
	2424
	2425	def _real_extract(self, query):
	2426	mobj = re.match(self._VALID_URL, query)
	2427	if mobj is None:
	2428	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2429	return
	2430
	2431	prefix, query = query.split(':')
	2432	prefix = prefix[8:]
	2433	query = query.encode('utf-8')
	2434	if prefix == '':
	2435	self._download_n_results(query, 1)
	2436	return
	2437	elif prefix == 'all':
	2438	self._download_n_results(query, self._max_google_results)
	2439	return
	2440	else:
	2441	try:
	2442	n = long(prefix)
	2443	if n <= 0:
	2444	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2445	return
	2446	elif n > self._max_google_results:
	2447	self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
	2448	n = self._max_google_results
	2449	self._download_n_results(query, n)
	2450	return
	2451	except ValueError: # parsing prefix as integer fails
	2452	self._download_n_results(query, 1)
	2453	return
	2454
	2455	def _download_n_results(self, query, n):
	2456	"""Downloads a specified number of results for a query"""
	2457
	2458	video_ids = []
	2459	pagenum = 0
	2460
	2461	while True:
	2462	self.report_download_page(query, pagenum)
	2463	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum*10)
	2464	request = urllib2.Request(result_url)
	2465	try:
	2466	page = urllib2.urlopen(request).read()
	2467	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2468	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2469	return
	2470
	2471	# Extract video identifiers
	2472	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2473	video_id = mobj.group(1)
	2474	if video_id not in video_ids:
	2475	video_ids.append(video_id)
	2476	if len(video_ids) == n:
	2477	# Specified n videos reached
	2478	for id in video_ids:
	2479	self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
	2480	return
	2481
	2482	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2483	for id in video_ids:
	2484	self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
	2485	return
	2486
	2487	pagenum = pagenum + 1
	2488
	2489
	2490	class YahooSearchIE(InfoExtractor):
	2491	"""Information Extractor for Yahoo! Video search queries."""
	2492	_VALID_URL = r'yvsearch(\d+\|all)?:[\s\S]+'
	2493	_TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
	2494	_VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
	2495	_MORE_PAGES_INDICATOR = r'\s*Next'
	2496	_yahoo_ie = None
	2497	_max_yahoo_results = 1000
	2498	IE_NAME = u'video.yahoo:search'
	2499
	2500	def __init__(self, yahoo_ie, downloader=None):
	2501	InfoExtractor.__init__(self, downloader)
	2502	self._yahoo_ie = yahoo_ie
	2503
	2504	def report_download_page(self, query, pagenum):
	2505	"""Report attempt to download playlist page with given number."""
	2506	query = query.decode(preferredencoding())
	2507	self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
	2508
	2509	def _real_initialize(self):
	2510	self._yahoo_ie.initialize()
	2511
	2512	def _real_extract(self, query):
	2513	mobj = re.match(self._VALID_URL, query)
	2514	if mobj is None:
	2515	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2516	return
	2517
	2518	prefix, query = query.split(':')
	2519	prefix = prefix[8:]
	2520	query = query.encode('utf-8')
	2521	if prefix == '':
	2522	self._download_n_results(query, 1)
	2523	return
	2524	elif prefix == 'all':
	2525	self._download_n_results(query, self._max_yahoo_results)
	2526	return
	2527	else:
	2528	try:
	2529	n = long(prefix)
	2530	if n <= 0:
	2531	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2532	return
	2533	elif n > self._max_yahoo_results:
	2534	self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
	2535	n = self._max_yahoo_results
	2536	self._download_n_results(query, n)
	2537	return
	2538	except ValueError: # parsing prefix as integer fails
	2539	self._download_n_results(query, 1)
	2540	return
	2541
	2542	def _download_n_results(self, query, n):
	2543	"""Downloads a specified number of results for a query"""
	2544
	2545	video_ids = []
	2546	already_seen = set()
	2547	pagenum = 1
	2548
	2549	while True:
	2550	self.report_download_page(query, pagenum)
	2551	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2552	request = urllib2.Request(result_url)
	2553	try:
	2554	page = urllib2.urlopen(request).read()
	2555	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2556	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2557	return
	2558
	2559	# Extract video identifiers
	2560	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2561	video_id = mobj.group(1)
	2562	if video_id not in already_seen:
	2563	video_ids.append(video_id)
	2564	already_seen.add(video_id)
	2565	if len(video_ids) == n:
	2566	# Specified n videos reached
	2567	for id in video_ids:
	2568	self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
	2569	return
	2570
	2571	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2572	for id in video_ids:
	2573	self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
	2574	return
	2575
	2576	pagenum = pagenum + 1
	2577
	2578
	2579	class YoutubePlaylistIE(InfoExtractor):
	2580	"""Information Extractor for YouTube playlists."""
	2581
	2582	_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course\|view_play_list\|my_playlists\|artist\|playlist)\?.?(p\|a\|list)=\|user/.?/user/\|p/\|user/.?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.?/([0-9A-Za-z_-]+))?.*'
	2583	_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
	2584	_VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&list=PL%s&'
	2585	_MORE_PAGES_INDICATOR = r'(?m)>\sNext\s</a>'
	2586	_youtube_ie = None
	2587	IE_NAME = u'youtube:playlist'
	2588
	2589	def __init__(self, youtube_ie, downloader=None):
	2590	InfoExtractor.__init__(self, downloader)
	2591	self._youtube_ie = youtube_ie
	2592
	2593	def report_download_page(self, playlist_id, pagenum):
	2594	"""Report attempt to download playlist page with given number."""
	2595	self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
	2596
	2597	def _real_initialize(self):
	2598	self._youtube_ie.initialize()
	2599
	2600	def _real_extract(self, url):
	2601	# Extract playlist id
	2602	mobj = re.match(self._VALID_URL, url)
	2603	if mobj is None:
	2604	self._downloader.trouble(u'ERROR: invalid url: %s' % url)
	2605	return
	2606
	2607	# Single video case
	2608	if mobj.group(3) is not None:
	2609	self._youtube_ie.extract(mobj.group(3))
	2610	return
	2611
	2612	# Download playlist pages
	2613	# prefix is 'p' as default for playlists but there are other types that need extra care
	2614	playlist_prefix = mobj.group(1)
	2615	if playlist_prefix == 'a':
	2616	playlist_access = 'artist'
	2617	else:
	2618	playlist_prefix = 'p'
	2619	playlist_access = 'view_play_list'
	2620	playlist_id = mobj.group(2)
	2621	video_ids = []
	2622	pagenum = 1
	2623
	2624	while True:
	2625	self.report_download_page(playlist_id, pagenum)
	2626	url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
	2627	request = urllib2.Request(url)
	2628	try:
	2629	page = urllib2.urlopen(request).read()
	2630	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2631	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2632	return
	2633
	2634	# Extract video identifiers
	2635	ids_in_page = []
	2636	for mobj in re.finditer(self._VIDEO_INDICATOR_TEMPLATE % playlist_id, page):
	2637	if mobj.group(1) not in ids_in_page:
	2638	ids_in_page.append(mobj.group(1))
	2639	video_ids.extend(ids_in_page)
	2640
	2641	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2642	break
	2643	pagenum = pagenum + 1
	2644
	2645	playliststart = self._downloader.params.get('playliststart', 1) - 1
	2646	playlistend = self._downloader.params.get('playlistend', -1)
	2647	if playlistend == -1:
	2648	video_ids = video_ids[playliststart:]
	2649	else:
	2650	video_ids = video_ids[playliststart:playlistend]
	2651
	2652	for id in video_ids:
	2653	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2654	return
	2655
	2656
	2657	class YoutubeUserIE(InfoExtractor):
	2658	"""Information Extractor for YouTube users."""
	2659
	2660	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)\|ytuser:)([A-Za-z0-9_-]+)'
	2661	_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
	2662	_GDATA_PAGE_SIZE = 50
	2663	_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
	2664	_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
	2665	_youtube_ie = None
	2666	IE_NAME = u'youtube:user'
	2667
	2668	def __init__(self, youtube_ie, downloader=None):
	2669	InfoExtractor.__init__(self, downloader)
	2670	self._youtube_ie = youtube_ie
	2671
	2672	def report_download_page(self, username, start_index):
	2673	"""Report attempt to download user page."""
	2674	self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
	2675	(username, start_index, start_index + self._GDATA_PAGE_SIZE))
	2676
	2677	def _real_initialize(self):
	2678	self._youtube_ie.initialize()
	2679
	2680	def _real_extract(self, url):
	2681	# Extract username
	2682	mobj = re.match(self._VALID_URL, url)
	2683	if mobj is None:
	2684	self._downloader.trouble(u'ERROR: invalid url: %s' % url)
	2685	return
	2686
	2687	username = mobj.group(1)
	2688
	2689	# Download video ids using YouTube Data API. Result size per
	2690	# query is limited (currently to 50 videos) so we need to query
	2691	# page by page until there are no video ids - it means we got
	2692	# all of them.
	2693
	2694	video_ids = []
	2695	pagenum = 0
	2696
	2697	while True:
	2698	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	2699	self.report_download_page(username, start_index)
	2700
	2701	request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
	2702
	2703	try:
	2704	page = urllib2.urlopen(request).read()
	2705	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2706	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2707	return
	2708
	2709	# Extract video identifiers
	2710	ids_in_page = []
	2711
	2712	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2713	if mobj.group(1) not in ids_in_page:
	2714	ids_in_page.append(mobj.group(1))
	2715
	2716	video_ids.extend(ids_in_page)
	2717
	2718	# A little optimization - if current page is not
	2719	# "full", ie. does not contain PAGE_SIZE video ids then
	2720	# we can assume that this page is the last one - there
	2721	# are no more ids on further pages - no need to query
	2722	# again.
	2723
	2724	if len(ids_in_page) < self._GDATA_PAGE_SIZE:
	2725	break
	2726
	2727	pagenum += 1
	2728
	2729	all_ids_count = len(video_ids)
	2730	playliststart = self._downloader.params.get('playliststart', 1) - 1
	2731	playlistend = self._downloader.params.get('playlistend', -1)
	2732
	2733	if playlistend == -1:
	2734	video_ids = video_ids[playliststart:]
	2735	else:
	2736	video_ids = video_ids[playliststart:playlistend]
	2737
	2738	self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
	2739	(username, all_ids_count, len(video_ids)))
	2740
	2741	for video_id in video_ids:
	2742	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
	2743
	2744
	2745	class DepositFilesIE(InfoExtractor):
	2746	"""Information extractor for depositfiles.com"""
	2747
	2748	_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
	2749	IE_NAME = u'DepositFiles'
	2750
	2751	def __init__(self, downloader=None):
	2752	InfoExtractor.__init__(self, downloader)
	2753
	2754	def report_download_webpage(self, file_id):
	2755	"""Report webpage download."""
	2756	self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
	2757
	2758	def report_extraction(self, file_id):
	2759	"""Report information extraction."""
	2760	self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
	2761
	2762	def _real_extract(self, url):
	2763	# At this point we have a new file
	2764	self._downloader.increment_downloads()
	2765
	2766	file_id = url.split('/')[-1]
	2767	# Rebuild url in english locale
	2768	url = 'http://depositfiles.com/en/files/' + file_id
	2769
	2770	# Retrieve file webpage with 'Free download' button pressed
	2771	free_download_indication = { 'gateway_result' : '1' }
	2772	request = urllib2.Request(url, urllib.urlencode(free_download_indication))
	2773	try:
	2774	self.report_download_webpage(file_id)
	2775	webpage = urllib2.urlopen(request).read()
	2776	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2777	self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
	2778	return
	2779
	2780	# Search for the real file URL
	2781	mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
	2782	if (mobj is None) or (mobj.group(1) is None):
	2783	# Try to figure out reason of the error.
	2784	mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
	2785	if (mobj is not None) and (mobj.group(1) is not None):
	2786	restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
	2787	self._downloader.trouble(u'ERROR: %s' % restriction_message)
	2788	else:
	2789	self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
	2790	return
	2791
	2792	file_url = mobj.group(1)
	2793	file_extension = os.path.splitext(file_url)[1][1:]
	2794
	2795	# Search for file title
	2796	mobj = re.search(r'<b title="(.*?)">', webpage)
	2797	if mobj is None:
	2798	self._downloader.trouble(u'ERROR: unable to extract title')
	2799	return
	2800	file_title = mobj.group(1).decode('utf-8')
	2801
	2802	try:
	2803	# Process file information
	2804	self._downloader.process_info({
	2805	'id': file_id.decode('utf-8'),
	2806	'url': file_url.decode('utf-8'),
	2807	'uploader': u'NA',
	2808	'upload_date': u'NA',
	2809	'title': file_title,
	2810	'stitle': file_title,
	2811	'ext': file_extension.decode('utf-8'),
	2812	'format': u'NA',
	2813	'player_url': None,
	2814	})
	2815	except UnavailableVideoError, err:
	2816	self._downloader.trouble(u'ERROR: unable to download file')
	2817
	2818
	2819	class FacebookIE(InfoExtractor):
	2820	"""Information Extractor for Facebook"""
	2821
	2822	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video\|photo)\.php\?(?:.?)v=(?P<ID>\d+)(?:.)'
	2823	_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
	2824	_NETRC_MACHINE = 'facebook'
	2825	_available_formats = ['video', 'highqual', 'lowqual']
	2826	_video_extensions = {
	2827	'video': 'mp4',
	2828	'highqual': 'mp4',
	2829	'lowqual': 'mp4',
	2830	}
	2831	IE_NAME = u'facebook'
	2832
	2833	def __init__(self, downloader=None):
	2834	InfoExtractor.__init__(self, downloader)
	2835
	2836	def _reporter(self, message):
	2837	"""Add header and report message."""
	2838	self._downloader.to_screen(u'[facebook] %s' % message)
	2839
	2840	def report_login(self):
	2841	"""Report attempt to log in."""
	2842	self._reporter(u'Logging in')
	2843
	2844	def report_video_webpage_download(self, video_id):
	2845	"""Report attempt to download video webpage."""
	2846	self._reporter(u'%s: Downloading video webpage' % video_id)
	2847
	2848	def report_information_extraction(self, video_id):
	2849	"""Report attempt to extract video information."""
	2850	self._reporter(u'%s: Extracting video information' % video_id)
	2851
	2852	def _parse_page(self, video_webpage):
	2853	"""Extract video information from page"""
	2854	# General data
	2855	data = {'title': r'$"video_title", "(.*?)"$',
	2856	'description': r'<div class="datawrap">(.*?)</div>',
	2857	'owner': r'$"video_owner_name", "(.*?)"$',
	2858	'thumbnail': r'$"thumb_url", "(?P<THUMB>.*?)"$',
	2859	}
	2860	video_info = {}
	2861	for piece in data.keys():
	2862	mobj = re.search(data[piece], video_webpage)
	2863	if mobj is not None:
	2864	video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
	2865
	2866	# Video urls
	2867	video_urls = {}
	2868	for fmt in self._available_formats:
	2869	mobj = re.search(r'$"%s_src\", "(.+?)"$' % fmt, video_webpage)
	2870	if mobj is not None:
	2871	# URL is in a Javascript segment inside an escaped Unicode format within
	2872	# the generally utf-8 page
	2873	video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
	2874	video_info['video_urls'] = video_urls
	2875
	2876	return video_info
	2877
	2878	def _real_initialize(self):
	2879	if self._downloader is None:
	2880	return
	2881
	2882	useremail = None
	2883	password = None
	2884	downloader_params = self._downloader.params
	2885
	2886	# Attempt to use provided username and password or .netrc data
	2887	if downloader_params.get('username', None) is not None:
	2888	useremail = downloader_params['username']
	2889	password = downloader_params['password']
	2890	elif downloader_params.get('usenetrc', False):
	2891	try:
	2892	info = netrc.netrc().authenticators(self._NETRC_MACHINE)
	2893	if info is not None:
	2894	useremail = info[0]
	2895	password = info[2]
	2896	else:
	2897	raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
	2898	except (IOError, netrc.NetrcParseError), err:
	2899	self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
	2900	return
	2901
	2902	if useremail is None:
	2903	return
	2904
	2905	# Log in
	2906	login_form = {
	2907	'email': useremail,
	2908	'pass': password,
	2909	'login': 'Log+In'
	2910	}
	2911	request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
	2912	try:
	2913	self.report_login()
	2914	login_results = urllib2.urlopen(request).read()
	2915	if re.search(r'<form(.)name="login"(.)</form>', login_results) is not None:
	2916	self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
	2917	return
	2918	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2919	self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
	2920	return
	2921
	2922	def _real_extract(self, url):
	2923	mobj = re.match(self._VALID_URL, url)
	2924	if mobj is None:
	2925	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	2926	return
	2927	video_id = mobj.group('ID')
	2928
	2929	# Get video webpage
	2930	self.report_video_webpage_download(video_id)
	2931	request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
	2932	try:
	2933	page = urllib2.urlopen(request)
	2934	video_webpage = page.read()
	2935	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2936	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	2937	return
	2938
	2939	# Start extracting information
	2940	self.report_information_extraction(video_id)
	2941
	2942	# Extract information
	2943	video_info = self._parse_page(video_webpage)
	2944
	2945	# uploader
	2946	if 'owner' not in video_info:
	2947	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	2948	return
	2949	video_uploader = video_info['owner']
	2950
	2951	# title
	2952	if 'title' not in video_info:
	2953	self._downloader.trouble(u'ERROR: unable to extract video title')
	2954	return
	2955	video_title = video_info['title']
	2956	video_title = video_title.decode('utf-8')
	2957	video_title = sanitize_title(video_title)
	2958
	2959	simple_title = _simplify_title(video_title)
	2960
	2961	# thumbnail image
	2962	if 'thumbnail' not in video_info:
	2963	self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
	2964	video_thumbnail = ''
	2965	else:
	2966	video_thumbnail = video_info['thumbnail']
	2967
	2968	# upload date
	2969	upload_date = u'NA'
	2970	if 'upload_date' in video_info:
	2971	upload_time = video_info['upload_date']
	2972	timetuple = email.utils.parsedate_tz(upload_time)
	2973	if timetuple is not None:
	2974	try:
	2975	upload_date = time.strftime('%Y%m%d', timetuple[0:9])
	2976	except:
	2977	pass
	2978
	2979	# description
	2980	video_description = video_info.get('description', 'No description available.')
	2981
	2982	url_map = video_info['video_urls']
	2983	if len(url_map.keys()) > 0:
	2984	# Decide which formats to download
	2985	req_format = self._downloader.params.get('format', None)
	2986	format_limit = self._downloader.params.get('format_limit', None)
	2987
	2988	if format_limit is not None and format_limit in self._available_formats:
	2989	format_list = self._available_formats[self._available_formats.index(format_limit):]
	2990	else:
	2991	format_list = self._available_formats
	2992	existing_formats = [x for x in format_list if x in url_map]
	2993	if len(existing_formats) == 0:
	2994	self._downloader.trouble(u'ERROR: no known formats available for video')
	2995	return
	2996	if req_format is None:
	2997	video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
	2998	elif req_format == 'worst':
	2999	video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
	3000	elif req_format == '-1':
	3001	video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
	3002	else:
	3003	# Specific format
	3004	if req_format not in url_map:
	3005	self._downloader.trouble(u'ERROR: requested format not available')
	3006	return
	3007	video_url_list = [(req_format, url_map[req_format])] # Specific format
	3008
	3009	for format_param, video_real_url in video_url_list:
	3010
	3011	# At this point we have a new video
	3012	self._downloader.increment_downloads()
	3013
	3014	# Extension
	3015	video_extension = self._video_extensions.get(format_param, 'mp4')
	3016
	3017	try:
	3018	# Process video information
	3019	self._downloader.process_info({
	3020	'id': video_id.decode('utf-8'),
	3021	'url': video_real_url.decode('utf-8'),
	3022	'uploader': video_uploader.decode('utf-8'),
	3023	'upload_date': upload_date,
	3024	'title': video_title,
	3025	'stitle': simple_title,
	3026	'ext': video_extension.decode('utf-8'),
	3027	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	3028	'thumbnail': video_thumbnail.decode('utf-8'),
	3029	'description': video_description.decode('utf-8'),
	3030	'player_url': None,
	3031	})
	3032	except UnavailableVideoError, err:
	3033	self._downloader.trouble(u'\nERROR: unable to download video')
	3034
	3035	class BlipTVIE(InfoExtractor):
	3036	"""Information extractor for blip.tv"""
	3037
	3038	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
	3039	_URL_EXT = r'^.*\.([a-z0-9]+)$'
	3040	IE_NAME = u'blip.tv'
	3041
	3042	def report_extraction(self, file_id):
	3043	"""Report information extraction."""
	3044	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
	3045
	3046	def report_direct_download(self, title):
	3047	"""Report information extraction."""
	3048	self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title))
	3049
	3050	def _real_extract(self, url):
	3051	mobj = re.match(self._VALID_URL, url)
	3052	if mobj is None:
	3053	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3054	return
	3055
	3056	if '?' in url:
	3057	cchar = '&'
	3058	else:
	3059	cchar = '?'
	3060	json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
	3061	request = urllib2.Request(json_url)
	3062	self.report_extraction(mobj.group(1))
	3063	info = None
	3064	try:
	3065	urlh = urllib2.urlopen(request)
	3066	if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
	3067	basename = url.split('/')[-1]
	3068	title,ext = os.path.splitext(basename)
	3069	title = title.decode('UTF-8')
	3070	ext = ext.replace('.', '')
	3071	self.report_direct_download(title)
	3072	info = {
	3073	'id': title,
	3074	'url': url,
	3075	'title': title,
	3076	'stitle': _simplify_title(title),
	3077	'ext': ext,
	3078	'urlhandle': urlh
	3079	}
	3080	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3081	self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
	3082	return
	3083	if info is None: # Regular URL
	3084	try:
	3085	json_code = urlh.read()
	3086	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3087	self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
	3088	return
	3089
	3090	try:
	3091	json_data = json.loads(json_code)
	3092	if 'Post' in json_data:
	3093	data = json_data['Post']
	3094	else:
	3095	data = json_data
	3096
	3097	upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
	3098	video_url = data['media']['url']
	3099	umobj = re.match(self._URL_EXT, video_url)
	3100	if umobj is None:
	3101	raise ValueError('Can not determine filename extension')
	3102	ext = umobj.group(1)
	3103
	3104	info = {
	3105	'id': data['item_id'],
	3106	'url': video_url,
	3107	'uploader': data['display_name'],
	3108	'upload_date': upload_date,
	3109	'title': data['title'],
	3110	'stitle': _simplify_title(data['title']),
	3111	'ext': ext,
	3112	'format': data['media']['mimeType'],
	3113	'thumbnail': data['thumbnailUrl'],
	3114	'description': data['description'],
	3115	'player_url': data['embedUrl']
	3116	}
	3117	except (ValueError,KeyError), err:
	3118	self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
	3119	return
	3120
	3121	self._downloader.increment_downloads()
	3122
	3123	try:
	3124	self._downloader.process_info(info)
	3125	except UnavailableVideoError, err:
	3126	self._downloader.trouble(u'\nERROR: unable to download video')
	3127
	3128
	3129	class MyVideoIE(InfoExtractor):
	3130	"""Information Extractor for myvideo.de."""
	3131
	3132	_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
	3133	IE_NAME = u'myvideo'
	3134
	3135	def __init__(self, downloader=None):
	3136	InfoExtractor.__init__(self, downloader)
	3137
	3138	def report_download_webpage(self, video_id):
	3139	"""Report webpage download."""
	3140	self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
	3141
	3142	def report_extraction(self, video_id):
	3143	"""Report information extraction."""
	3144	self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
	3145
	3146	def _real_extract(self,url):
	3147	mobj = re.match(self._VALID_URL, url)
	3148	if mobj is None:
	3149	self._download.trouble(u'ERROR: invalid URL: %s' % url)
	3150	return
	3151
	3152	video_id = mobj.group(1)
	3153
	3154	# Get video webpage
	3155	request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
	3156	try:
	3157	self.report_download_webpage(video_id)
	3158	webpage = urllib2.urlopen(request).read()
	3159	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3160	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	3161	return
	3162
	3163	self.report_extraction(video_id)
	3164	mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
	3165	webpage)
	3166	if mobj is None:
	3167	self._downloader.trouble(u'ERROR: unable to extract media URL')
	3168	return
	3169	video_url = mobj.group(1) + ('/%s.flv' % video_id)
	3170
	3171	mobj = re.search('<title>([^<]+)</title>', webpage)
	3172	if mobj is None:
	3173	self._downloader.trouble(u'ERROR: unable to extract title')
	3174	return
	3175
	3176	video_title = mobj.group(1)
	3177	video_title = sanitize_title(video_title)
	3178
	3179	simple_title = _simplify_title(video_title)
	3180
	3181	try:
	3182	self._downloader.process_info({
	3183	'id': video_id,
	3184	'url': video_url,
	3185	'uploader': u'NA',
	3186	'upload_date': u'NA',
	3187	'title': video_title,
	3188	'stitle': simple_title,
	3189	'ext': u'flv',
	3190	'format': u'NA',
	3191	'player_url': None,
	3192	})
	3193	except UnavailableVideoError:
	3194	self._downloader.trouble(u'\nERROR: Unable to download video')
	3195
	3196	class ComedyCentralIE(InfoExtractor):
	3197	"""Information extractor for The Daily Show and Colbert Report """
	3198
	3199	_VALID_URL = r'^(:(?P<shortname>tds\|thedailyshow\|cr\|colbert\|colbertnation\|colbertreport))\|(https?://)?(www\.)?(?P<showname>thedailyshow\|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
	3200	IE_NAME = u'comedycentral'
	3201
	3202	def report_extraction(self, episode_id):
	3203	self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
	3204
	3205	def report_config_download(self, episode_id):
	3206	self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
	3207
	3208	def report_index_download(self, episode_id):
	3209	self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
	3210
	3211	def report_player_url(self, episode_id):
	3212	self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
	3213
	3214	def _real_extract(self, url):
	3215	mobj = re.match(self._VALID_URL, url)
	3216	if mobj is None:
	3217	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3218	return
	3219
	3220	if mobj.group('shortname'):
	3221	if mobj.group('shortname') in ('tds', 'thedailyshow'):
	3222	url = u'http://www.thedailyshow.com/full-episodes/'
	3223	else:
	3224	url = u'http://www.colbertnation.com/full-episodes/'
	3225	mobj = re.match(self._VALID_URL, url)
	3226	assert mobj is not None
	3227
	3228	dlNewest = not mobj.group('episode')
	3229	if dlNewest:
	3230	epTitle = mobj.group('showname')
	3231	else:
	3232	epTitle = mobj.group('episode')
	3233
	3234	req = urllib2.Request(url)
	3235	self.report_extraction(epTitle)
	3236	try:
	3237	htmlHandle = urllib2.urlopen(req)
	3238	html = htmlHandle.read()
	3239	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3240	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
	3241	return
	3242	if dlNewest:
	3243	url = htmlHandle.geturl()
	3244	mobj = re.match(self._VALID_URL, url)
	3245	if mobj is None:
	3246	self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
	3247	return
	3248	if mobj.group('episode') == '':
	3249	self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
	3250	return
	3251	epTitle = mobj.group('episode')
	3252
	3253	mMovieParams = re.findall('(?:<param name="movie" value="\|var url = ")(http://media.mtvnservices.com/([^"]episode.?:.*?))"', html)
	3254	if len(mMovieParams) == 0:
	3255	self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
	3256	return
	3257
	3258	playerUrl_raw = mMovieParams[0][0]
	3259	self.report_player_url(epTitle)
	3260	try:
	3261	urlHandle = urllib2.urlopen(playerUrl_raw)
	3262	playerUrl = urlHandle.geturl()
	3263	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3264	self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
	3265	return
	3266
	3267	uri = mMovieParams[0][1]
	3268	indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
	3269	self.report_index_download(epTitle)
	3270	try:
	3271	indexXml = urllib2.urlopen(indexUrl).read()
	3272	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3273	self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
	3274	return
	3275
	3276	idoc = xml.etree.ElementTree.fromstring(indexXml)
	3277	itemEls = idoc.findall('.//item')
	3278	for itemEl in itemEls:
	3279	mediaId = itemEl.findall('./guid')[0].text
	3280	shortMediaId = mediaId.split(':')[-1]
	3281	showId = mediaId.split(':')[-2].replace('.com', '')
	3282	officialTitle = itemEl.findall('./title')[0].text
	3283	officialDate = itemEl.findall('./pubDate')[0].text
	3284
	3285	configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
	3286	urllib.urlencode({'uri': mediaId}))
	3287	configReq = urllib2.Request(configUrl)
	3288	self.report_config_download(epTitle)
	3289	try:
	3290	configXml = urllib2.urlopen(configReq).read()
	3291	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3292	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
	3293	return
	3294
	3295	cdoc = xml.etree.ElementTree.fromstring(configXml)
	3296	turls = []
	3297	for rendition in cdoc.findall('.//rendition'):
	3298	finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
	3299	turls.append(finfo)
	3300
	3301	if len(turls) == 0:
	3302	self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
	3303	continue
	3304
	3305	# For now, just pick the highest bitrate
	3306	format,video_url = turls[-1]
	3307
	3308	self._downloader.increment_downloads()
	3309
	3310	effTitle = showId + u'-' + epTitle
	3311	info = {
	3312	'id': shortMediaId,
	3313	'url': video_url,
	3314	'uploader': showId,
	3315	'upload_date': officialDate,
	3316	'title': effTitle,
	3317	'stitle': _simplify_title(effTitle),
	3318	'ext': 'mp4',
	3319	'format': format,
	3320	'thumbnail': None,
	3321	'description': officialTitle,
	3322	'player_url': playerUrl
	3323	}
	3324
	3325	try:
	3326	self._downloader.process_info(info)
	3327	except UnavailableVideoError, err:
	3328	self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
	3329	continue
	3330
	3331
	3332	class EscapistIE(InfoExtractor):
	3333	"""Information extractor for The Escapist """
	3334
	3335	_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
	3336	IE_NAME = u'escapist'
	3337
	3338	def report_extraction(self, showName):
	3339	self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
	3340
	3341	def report_config_download(self, showName):
	3342	self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
	3343
	3344	def _real_extract(self, url):
	3345	htmlParser = HTMLParser.HTMLParser()
	3346
	3347	mobj = re.match(self._VALID_URL, url)
	3348	if mobj is None:
	3349	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3350	return
	3351	showName = mobj.group('showname')
	3352	videoId = mobj.group('episode')
	3353
	3354	self.report_extraction(showName)
	3355	try:
	3356	webPage = urllib2.urlopen(url).read()
	3357	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3358	self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
	3359	return
	3360
	3361	descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
	3362	description = htmlParser.unescape(descMatch.group(1))
	3363	imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
	3364	imgUrl = htmlParser.unescape(imgMatch.group(1))
	3365	playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
	3366	playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
	3367	configUrlMatch = re.search('config=(.*)$', playerUrl)
	3368	configUrl = urllib2.unquote(configUrlMatch.group(1))
	3369
	3370	self.report_config_download(showName)
	3371	try:
	3372	configJSON = urllib2.urlopen(configUrl).read()
	3373	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3374	self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
	3375	return
	3376
	3377	# Technically, it's JavaScript, not JSON
	3378	configJSON = configJSON.replace("'", '"')
	3379
	3380	try:
	3381	config = json.loads(configJSON)
	3382	except (ValueError,), err:
	3383	self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
	3384	return
	3385
	3386	playlist = config['playlist']
	3387	videoUrl = playlist[1]['url']
	3388
	3389	self._downloader.increment_downloads()
	3390	info = {
	3391	'id': videoId,
	3392	'url': videoUrl,
	3393	'uploader': showName,
	3394	'upload_date': None,
	3395	'title': showName,
	3396	'stitle': _simplify_title(showName),
	3397	'ext': 'flv',
	3398	'format': 'flv',
	3399	'thumbnail': imgUrl,
	3400	'description': description,
	3401	'player_url': playerUrl,
	3402	}
	3403
	3404	try:
	3405	self._downloader.process_info(info)
	3406	except UnavailableVideoError, err:
	3407	self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
	3408
	3409
	3410	class CollegeHumorIE(InfoExtractor):
	3411	"""Information extractor for collegehumor.com"""
	3412
	3413	_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
	3414	IE_NAME = u'collegehumor'
	3415
	3416	def report_webpage(self, video_id):
	3417	"""Report information extraction."""
	3418	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
	3419
	3420	def report_extraction(self, video_id):
	3421	"""Report information extraction."""
	3422	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3423
	3424	def _real_extract(self, url):
	3425	htmlParser = HTMLParser.HTMLParser()
	3426
	3427	mobj = re.match(self._VALID_URL, url)
	3428	if mobj is None:
	3429	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3430	return
	3431	video_id = mobj.group('videoid')
	3432
	3433	self.report_webpage(video_id)
	3434	request = urllib2.Request(url)
	3435	try:
	3436	webpage = urllib2.urlopen(request).read()
	3437	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3438	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	3439	return
	3440
	3441	m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
	3442	if m is None:
	3443	self._downloader.trouble(u'ERROR: Cannot extract internal video ID')
	3444	return
	3445	internal_video_id = m.group('internalvideoid')
	3446
	3447	info = {
	3448	'id': video_id,
	3449	'internal_id': internal_video_id,
	3450	}
	3451
	3452	self.report_extraction(video_id)
	3453	xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
	3454	try:
	3455	metaXml = urllib2.urlopen(xmlUrl).read()
	3456	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3457	self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
	3458	return
	3459
	3460	mdoc = xml.etree.ElementTree.fromstring(metaXml)
	3461	try:
	3462	videoNode = mdoc.findall('./video')[0]
	3463	info['description'] = videoNode.findall('./description')[0].text
	3464	info['title'] = videoNode.findall('./caption')[0].text
	3465	info['stitle'] = _simplify_title(info['title'])
	3466	info['url'] = videoNode.findall('./file')[0].text
	3467	info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
	3468	info['ext'] = info['url'].rpartition('.')[2]
	3469	info['format'] = info['ext']
	3470	except IndexError:
	3471	self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
	3472	return
	3473
	3474	self._downloader.increment_downloads()
	3475
	3476	try:
	3477	self._downloader.process_info(info)
	3478	except UnavailableVideoError, err:
	3479	self._downloader.trouble(u'\nERROR: unable to download video')
	3480
	3481
	3482	class XVideosIE(InfoExtractor):
	3483	"""Information extractor for xvideos.com"""
	3484
	3485	_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
	3486	IE_NAME = u'xvideos'
	3487
	3488	def report_webpage(self, video_id):
	3489	"""Report information extraction."""
	3490	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
	3491
	3492	def report_extraction(self, video_id):
	3493	"""Report information extraction."""
	3494	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3495
	3496	def _real_extract(self, url):
	3497	htmlParser = HTMLParser.HTMLParser()
	3498
	3499	mobj = re.match(self._VALID_URL, url)
	3500	if mobj is None:
	3501	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3502	return
	3503	video_id = mobj.group(1).decode('utf-8')
	3504
	3505	self.report_webpage(video_id)
	3506
	3507	request = urllib2.Request(r'http://www.xvideos.com/video' + video_id)
	3508	try:
	3509	webpage = urllib2.urlopen(request).read()
	3510	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3511	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	3512	return
	3513
	3514	self.report_extraction(video_id)
	3515
	3516
	3517	# Extract video URL
	3518	mobj = re.search(r'flv_url=(.+?)&', webpage)
	3519	if mobj is None:
	3520	self._downloader.trouble(u'ERROR: unable to extract video url')
	3521	return
	3522	video_url = urllib2.unquote(mobj.group(1).decode('utf-8'))
	3523
	3524
	3525	# Extract title
	3526	mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
	3527	if mobj is None:
	3528	self._downloader.trouble(u'ERROR: unable to extract video title')
	3529	return
	3530	video_title = mobj.group(1).decode('utf-8')
	3531
	3532
	3533	# Extract video thumbnail
	3534	mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage)
	3535	if mobj is None:
	3536	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	3537	return
	3538	video_thumbnail = mobj.group(1).decode('utf-8')
	3539
	3540
	3541
	3542	self._downloader.increment_downloads()
	3543	info = {
	3544	'id': video_id,
	3545	'url': video_url,
	3546	'uploader': None,
	3547	'upload_date': None,
	3548	'title': video_title,
	3549	'stitle': _simplify_title(video_title),
	3550	'ext': 'flv',
	3551	'format': 'flv',
	3552	'thumbnail': video_thumbnail,
	3553	'description': None,
	3554	'player_url': None,
	3555	}
	3556
	3557	try:
	3558	self._downloader.process_info(info)
	3559	except UnavailableVideoError, err:
	3560	self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
	3561
	3562
	3563	class SoundcloudIE(InfoExtractor):
	3564	"""Information extractor for soundcloud.com
	3565	To access the media, the uid of the song and a stream token
	3566	must be extracted from the page source and the script must make
	3567	a request to media.soundcloud.com/crossdomain.xml. Then
	3568	the media can be grabbed by requesting from an url composed
	3569	of the stream token and uid
	3570	"""
	3571
	3572	_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
	3573	IE_NAME = u'soundcloud'
	3574
	3575	def __init__(self, downloader=None):
	3576	InfoExtractor.__init__(self, downloader)
	3577
	3578	def report_webpage(self, video_id):
	3579	"""Report information extraction."""
	3580	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
	3581
	3582	def report_extraction(self, video_id):
	3583	"""Report information extraction."""
	3584	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3585
	3586	def _real_extract(self, url):
	3587	htmlParser = HTMLParser.HTMLParser()
	3588
	3589	mobj = re.match(self._VALID_URL, url)
	3590	if mobj is None:
	3591	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3592	return
	3593
	3594	# extract uploader (which is in the url)
	3595	uploader = mobj.group(1).decode('utf-8')
	3596	# extract simple title (uploader + slug of song title)
	3597	slug_title = mobj.group(2).decode('utf-8')
	3598	simple_title = uploader + '-' + slug_title
	3599
	3600	self.report_webpage('%s/%s' % (uploader, slug_title))
	3601
	3602	request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title))
	3603	try:
	3604	webpage = urllib2.urlopen(request).read()
	3605	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3606	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	3607	return
	3608
	3609	self.report_extraction('%s/%s' % (uploader, slug_title))
	3610
	3611	# extract uid and stream token that soundcloud hands out for access
	3612	mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage)
	3613	if mobj:
	3614	video_id = mobj.group(1)
	3615	stream_token = mobj.group(2)
	3616
	3617	# extract unsimplified title
	3618	mobj = re.search('"title":"(.*?)",', webpage)
	3619	if mobj:
	3620	title = mobj.group(1)
	3621
	3622	# construct media url (with uid/token)
	3623	mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
	3624	mediaURL = mediaURL % (video_id, stream_token)
	3625
	3626	# description
	3627	description = u'No description available'
	3628	mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
	3629	if mobj:
	3630	description = mobj.group(1)
	3631
	3632	# upload date
	3633	upload_date = None
	3634	mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
	3635	if mobj:
	3636	try:
	3637	upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
	3638	except Exception, e:
	3639	print str(e)
	3640
	3641	# for soundcloud, a request to a cross domain is required for cookies
	3642	request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
	3643
	3644	try:
	3645	self._downloader.process_info({
	3646	'id': video_id.decode('utf-8'),
	3647	'url': mediaURL,
	3648	'uploader': uploader.decode('utf-8'),
	3649	'upload_date': upload_date,
	3650	'title': simple_title.decode('utf-8'),
	3651	'stitle': simple_title.decode('utf-8'),
	3652	'ext': u'mp3',
	3653	'format': u'NA',
	3654	'player_url': None,
	3655	'description': description.decode('utf-8')
	3656	})
	3657	except UnavailableVideoError:
	3658	self._downloader.trouble(u'\nERROR: unable to download video')
	3659
	3660
	3661	class InfoQIE(InfoExtractor):
	3662	"""Information extractor for infoq.com"""
	3663
	3664	_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
	3665	IE_NAME = u'infoq'
	3666
	3667	def report_webpage(self, video_id):
	3668	"""Report information extraction."""
	3669	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
	3670
	3671	def report_extraction(self, video_id):
	3672	"""Report information extraction."""
	3673	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3674
	3675	def _real_extract(self, url):
	3676	htmlParser = HTMLParser.HTMLParser()
	3677
	3678	mobj = re.match(self._VALID_URL, url)
	3679	if mobj is None:
	3680	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3681	return
	3682
	3683	self.report_webpage(url)
	3684
	3685	request = urllib2.Request(url)
	3686	try:
	3687	webpage = urllib2.urlopen(request).read()
	3688	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3689	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	3690	return
	3691
	3692	self.report_extraction(url)
	3693
	3694
	3695	# Extract video URL
	3696	mobj = re.search(r"jsclassref='([^']*)'", webpage)
	3697	if mobj is None:
	3698	self._downloader.trouble(u'ERROR: unable to extract video url')
	3699	return
	3700	video_url = 'rtmpe://video.infoq.com/cfx/st/' + urllib2.unquote(mobj.group(1).decode('base64'))
	3701
	3702
	3703	# Extract title
	3704	mobj = re.search(r'contentTitle = "(.*?)";', webpage)
	3705	if mobj is None:
	3706	self._downloader.trouble(u'ERROR: unable to extract video title')
	3707	return
	3708	video_title = mobj.group(1).decode('utf-8')
	3709
	3710	# Extract description
	3711	video_description = u'No description available.'
	3712	mobj = re.search(r'<meta name="description" content="(.)"(?:\s/)?>', webpage)
	3713	if mobj is not None:
	3714	video_description = mobj.group(1).decode('utf-8')
	3715
	3716	video_filename = video_url.split('/')[-1]
	3717	video_id, extension = video_filename.split('.')
	3718
	3719	self._downloader.increment_downloads()
	3720	info = {
	3721	'id': video_id,
	3722	'url': video_url,
	3723	'uploader': None,
	3724	'upload_date': None,
	3725	'title': video_title,
	3726	'stitle': _simplify_title(video_title),
	3727	'ext': extension,
	3728	'format': extension, # Extension is always(?) mp4, but seems to be flv
	3729	'thumbnail': None,
	3730	'description': video_description,
	3731	'player_url': None,
	3732	}
	3733
	3734	try:
	3735	self._downloader.process_info(info)
	3736	except UnavailableVideoError, err:
	3737	self._downloader.trouble(u'\nERROR: unable to download ' + video_url)
	3738
	3739	class MixcloudIE(InfoExtractor):
	3740	"""Information extractor for www.mixcloud.com"""
	3741	_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
	3742	IE_NAME = u'mixcloud'
	3743
	3744	def __init__(self, downloader=None):
	3745	InfoExtractor.__init__(self, downloader)
	3746
	3747	def report_download_json(self, file_id):
	3748	"""Report JSON download."""
	3749	self._downloader.to_screen(u'[%s] Downloading json' % self.IE_NAME)
	3750
	3751	def report_extraction(self, file_id):
	3752	"""Report information extraction."""
	3753	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
	3754
	3755	def get_urls(self, jsonData, fmt, bitrate='best'):
	3756	"""Get urls from 'audio_formats' section in json"""
	3757	file_url = None
	3758	try:
	3759	bitrate_list = jsonData[fmt]
	3760	if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
	3761	bitrate = max(bitrate_list) # select highest
	3762
	3763	url_list = jsonData[fmt][bitrate]
	3764	except TypeError: # we have no bitrate info.
	3765	url_list = jsonData[fmt]
	3766
	3767	return url_list
	3768
	3769	def check_urls(self, url_list):
	3770	"""Returns 1st active url from list"""
	3771	for url in url_list:
	3772	try:
	3773	urllib2.urlopen(url)
	3774	return url
	3775	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3776	url = None
	3777
	3778	return None
	3779
	3780	def _print_formats(self, formats):
	3781	print 'Available formats:'
	3782	for fmt in formats.keys():
	3783	for b in formats[fmt]:
	3784	try:
	3785	ext = formats[fmt][b][0]
	3786	print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
	3787	except TypeError: # we have no bitrate info
	3788	ext = formats[fmt][0]
	3789	print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
	3790	break
	3791
	3792	def _real_extract(self, url):
	3793	mobj = re.match(self._VALID_URL, url)
	3794	if mobj is None:
	3795	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3796	return
	3797	# extract uploader & filename from url
	3798	uploader = mobj.group(1).decode('utf-8')
	3799	file_id = uploader + "-" + mobj.group(2).decode('utf-8')
	3800
	3801	# construct API request
	3802	file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
	3803	# retrieve .json file with links to files
	3804	request = urllib2.Request(file_url)
	3805	try:
	3806	self.report_download_json(file_url)
	3807	jsonData = urllib2.urlopen(request).read()
	3808	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3809	self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
	3810	return
	3811
	3812	# parse JSON
	3813	json_data = json.loads(jsonData)
	3814	player_url = json_data['player_swf_url']
	3815	formats = dict(json_data['audio_formats'])
	3816
	3817	req_format = self._downloader.params.get('format', None)
	3818	bitrate = None
	3819
	3820	if self._downloader.params.get('listformats', None):
	3821	self._print_formats(formats)
	3822	return
	3823
	3824	if req_format is None or req_format == 'best':
	3825	for format_param in formats.keys():
	3826	url_list = self.get_urls(formats, format_param)
	3827	# check urls
	3828	file_url = self.check_urls(url_list)
	3829	if file_url is not None:
	3830	break # got it!
	3831	else:
	3832	if req_format not in formats.keys():
	3833	self._downloader.trouble(u'ERROR: format is not available')
	3834	return
	3835
	3836	url_list = self.get_urls(formats, req_format)
	3837	file_url = self.check_urls(url_list)
	3838	format_param = req_format
	3839
	3840	# We have audio
	3841	self._downloader.increment_downloads()
	3842	try:
	3843	# Process file information
	3844	self._downloader.process_info({
	3845	'id': file_id.decode('utf-8'),
	3846	'url': file_url.decode('utf-8'),
	3847	'uploader': uploader.decode('utf-8'),
	3848	'upload_date': u'NA',
	3849	'title': json_data['name'],
	3850	'stitle': _simplify_title(json_data['name']),
	3851	'ext': file_url.split('.')[-1].decode('utf-8'),
	3852	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	3853	'thumbnail': json_data['thumbnail_url'],
	3854	'description': json_data['description'],
	3855	'player_url': player_url.decode('utf-8'),
	3856	})
	3857	except UnavailableVideoError, err:
	3858	self._downloader.trouble(u'ERROR: unable to download file')
	3859
	3860	class StanfordOpenClassroomIE(InfoExtractor):
	3861	"""Information extractor for Stanford's Open ClassRoom"""
	3862
	3863	_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?\|(/MainFolder/(?:HomePage\|CoursePage\|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
	3864	IE_NAME = u'stanfordoc'
	3865
	3866	def report_download_webpage(self, objid):
	3867	"""Report information extraction."""
	3868	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, objid))
	3869
	3870	def report_extraction(self, video_id):
	3871	"""Report information extraction."""
	3872	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3873
	3874	def _real_extract(self, url):
	3875	mobj = re.match(self._VALID_URL, url)
	3876	if mobj is None:
	3877	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3878	return
	3879
	3880	if mobj.group('course') and mobj.group('video'): # A specific video
	3881	course = mobj.group('course')
	3882	video = mobj.group('video')
	3883	info = {
	3884	'id': _simplify_title(course + '_' + video),
	3885	}
	3886
	3887	self.report_extraction(info['id'])
	3888	baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
	3889	xmlUrl = baseUrl + video + '.xml'
	3890	try:
	3891	metaXml = urllib2.urlopen(xmlUrl).read()
	3892	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3893	self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err))
	3894	return
	3895	mdoc = xml.etree.ElementTree.fromstring(metaXml)
	3896	try:
	3897	info['title'] = mdoc.findall('./title')[0].text
	3898	info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
	3899	except IndexError:
	3900	self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
	3901	return
	3902	info['stitle'] = _simplify_title(info['title'])
	3903	info['ext'] = info['url'].rpartition('.')[2]
	3904	info['format'] = info['ext']
	3905	self._downloader.increment_downloads()
	3906	try:
	3907	self._downloader.process_info(info)
	3908	except UnavailableVideoError, err:
	3909	self._downloader.trouble(u'\nERROR: unable to download video')
	3910	elif mobj.group('course'): # A course page
	3911	unescapeHTML = HTMLParser.HTMLParser().unescape
	3912
	3913	course = mobj.group('course')
	3914	info = {
	3915	'id': _simplify_title(course),
	3916	'type': 'playlist',
	3917	}
	3918
	3919	self.report_download_webpage(info['id'])
	3920	try:
	3921	coursepage = urllib2.urlopen(url).read()
	3922	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3923	self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
	3924	return
	3925
	3926	m = re.search('<h1>([^<]+)</h1>', coursepage)
	3927	if m:
	3928	info['title'] = unescapeHTML(m.group(1))
	3929	else:
	3930	info['title'] = info['id']
	3931	info['stitle'] = _simplify_title(info['title'])
	3932
	3933	m = re.search('<description>([^<]+)</description>', coursepage)
	3934	if m:
	3935	info['description'] = unescapeHTML(m.group(1))
	3936
	3937	links = _orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
	3938	info['list'] = [
	3939	{
	3940	'type': 'reference',
	3941	'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
	3942	}
	3943	for vpage in links]
	3944
	3945	for entry in info['list']:
	3946	assert entry['type'] == 'reference'
	3947	self.extract(entry['url'])
	3948	else: # Root page
	3949	unescapeHTML = HTMLParser.HTMLParser().unescape
	3950
	3951	info = {
	3952	'id': 'Stanford OpenClassroom',
	3953	'type': 'playlist',
	3954	}
	3955
	3956	self.report_download_webpage(info['id'])
	3957	rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
	3958	try:
	3959	rootpage = urllib2.urlopen(rootURL).read()
	3960	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3961	self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
	3962	return
	3963
	3964	info['title'] = info['id']
	3965	info['stitle'] = _simplify_title(info['title'])
	3966
	3967	links = _orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
	3968	info['list'] = [
	3969	{
	3970	'type': 'reference',
	3971	'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
	3972	}
	3973	for cpage in links]
	3974
	3975	for entry in info['list']:
	3976	assert entry['type'] == 'reference'
	3977	self.extract(entry['url'])
	3978
	3979	class MTVIE(InfoExtractor):
	3980	"""Information extractor for MTV.com"""
	3981
	3982	_VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
	3983	IE_NAME = u'mtv'
	3984
	3985	def report_webpage(self, video_id):
	3986	"""Report information extraction."""
	3987	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
	3988
	3989	def report_extraction(self, video_id):
	3990	"""Report information extraction."""
	3991	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3992
	3993	def _real_extract(self, url):
	3994	mobj = re.match(self._VALID_URL, url)
	3995	if mobj is None:
	3996	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3997	return
	3998	if not mobj.group('proto'):
	3999	url = 'http://' + url
	4000	video_id = mobj.group('videoid')
	4001	self.report_webpage(video_id)
	4002
	4003	request = urllib2.Request(url)
	4004	try:
	4005	webpage = urllib2.urlopen(request).read()
	4006	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	4007	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	4008	return
	4009
	4010	mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
	4011	if mobj is None:
	4012	self._downloader.trouble(u'ERROR: unable to extract song name')
	4013	return
	4014	song_name = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
	4015	mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage)
	4016	if mobj is None:
	4017	self._downloader.trouble(u'ERROR: unable to extract performer')
	4018	return
	4019	performer = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
	4020	video_title = performer + ' - ' + song_name
	4021
	4022	mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
	4023	if mobj is None:
	4024	self._downloader.trouble(u'ERROR: unable to mtvn_uri')
	4025	return
	4026	mtvn_uri = mobj.group(1)
	4027
	4028	mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage)
	4029	if mobj is None:
	4030	self._downloader.trouble(u'ERROR: unable to extract content id')
	4031	return
	4032	content_id = mobj.group(1)
	4033
	4034	videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
	4035	self.report_extraction(video_id)
	4036	request = urllib2.Request(videogen_url)
	4037	try:
	4038	metadataXml = urllib2.urlopen(request).read()
	4039	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	4040	self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
	4041	return
	4042
	4043	mdoc = xml.etree.ElementTree.fromstring(metadataXml)
	4044	renditions = mdoc.findall('.//rendition')
	4045
	4046	# For now, always pick the highest quality.
	4047	rendition = renditions[-1]
	4048
	4049	try:
	4050	_,_,ext = rendition.attrib['type'].partition('/')
	4051	format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
	4052	video_url = rendition.find('./src').text
	4053	except KeyError:
	4054	self._downloader.trouble('Invalid rendition field.')
	4055	return
	4056
	4057	self._downloader.increment_downloads()
	4058	info = {
	4059	'id': video_id,
	4060	'url': video_url,
	4061	'uploader': performer,
	4062	'title': video_title,
	4063	'stitle': _simplify_title(video_title),
	4064	'ext': ext,
	4065	'format': format,
	4066	}
	4067
	4068	try:
	4069	self._downloader.process_info(info)
	4070	except UnavailableVideoError, err:
	4071	self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
	4072
	4073
	4074	class PostProcessor(object):
	4075	"""Post Processor class.
	4076
	4077	PostProcessor objects can be added to downloaders with their
	4078	add_post_processor() method. When the downloader has finished a
	4079	successful download, it will take its internal chain of PostProcessors
	4080	and start calling the run() method on each one of them, first with
	4081	an initial argument and then with the returned value of the previous
	4082	PostProcessor.
	4083
	4084	The chain will be stopped if one of them ever returns None or the end
	4085	of the chain is reached.
	4086
	4087	PostProcessor objects follow a "mutual registration" process similar
	4088	to InfoExtractor objects.
	4089	"""
	4090
	4091	_downloader = None
	4092
	4093	def __init__(self, downloader=None):
	4094	self._downloader = downloader
	4095
	4096	def set_downloader(self, downloader):
	4097	"""Sets the downloader for this PP."""
	4098	self._downloader = downloader
	4099
	4100	def run(self, information):
	4101	"""Run the PostProcessor.
	4102
	4103	The "information" argument is a dictionary like the ones
	4104	composed by InfoExtractors. The only difference is that this
	4105	one has an extra field called "filepath" that points to the
	4106	downloaded file.
	4107
	4108	When this method returns None, the postprocessing chain is
	4109	stopped. However, this method may return an information
	4110	dictionary that will be passed to the next postprocessing
	4111	object in the chain. It can be the one it received after
	4112	changing some fields.
	4113
	4114	In addition, this method may raise a PostProcessingError
	4115	exception that will be taken into account by the downloader
	4116	it was called from.
	4117	"""
	4118	return information # by default, do nothing
	4119
	4120	class AudioConversionError(BaseException):
	4121	def __init__(self, message):
	4122	self.message = message
	4123
	4124	class FFmpegExtractAudioPP(PostProcessor):
	4125
	4126	def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
	4127	PostProcessor.__init__(self, downloader)
	4128	if preferredcodec is None:
	4129	preferredcodec = 'best'
	4130	self._preferredcodec = preferredcodec
	4131	self._preferredquality = preferredquality
	4132	self._keepvideo = keepvideo
	4133
	4134	@staticmethod
	4135	def get_audio_codec(path):
	4136	try:
	4137	cmd = ['ffprobe', '-show_streams', '--', _encodeFilename(path)]
	4138	handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
	4139	output = handle.communicate()[0]
	4140	if handle.wait() != 0:
	4141	return None
	4142	except (IOError, OSError):
	4143	return None
	4144	audio_codec = None
	4145	for line in output.split('\n'):
	4146	if line.startswith('codec_name='):
	4147	audio_codec = line.split('=')[1].strip()
	4148	elif line.strip() == 'codec_type=audio' and audio_codec is not None:
	4149	return audio_codec
	4150	return None
	4151
	4152	@staticmethod
	4153	def run_ffmpeg(path, out_path, codec, more_opts):
	4154	if codec is None:
	4155	acodec_opts = []
	4156	else:
	4157	acodec_opts = ['-acodec', codec]
	4158	cmd = ['ffmpeg', '-y', '-i', _encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', _encodeFilename(out_path)]
	4159	try:
	4160	p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	4161	stdout,stderr = p.communicate()
	4162	except (IOError, OSError):
	4163	e = sys.exc_info()[1]
	4164	if isinstance(e, OSError) and e.errno == 2:
	4165	raise AudioConversionError('ffmpeg not found. Please install ffmpeg.')
	4166	else:
	4167	raise e
	4168	if p.returncode != 0:
	4169	msg = stderr.strip().split('\n')[-1]
	4170	raise AudioConversionError(msg)
	4171
	4172	def run(self, information):
	4173	path = information['filepath']
	4174
	4175	filecodec = self.get_audio_codec(path)
	4176	if filecodec is None:
	4177	self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
	4178	return None
	4179
	4180	more_opts = []
	4181	if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
	4182	if self._preferredcodec == 'm4a' and filecodec == 'aac':
	4183	# Lossless, but in another container
	4184	acodec = 'copy'
	4185	extension = self._preferredcodec
	4186	more_opts = ['-absf', 'aac_adtstoasc']
	4187	elif filecodec in ['aac', 'mp3', 'vorbis']:
	4188	# Lossless if possible
	4189	acodec = 'copy'
	4190	extension = filecodec
	4191	if filecodec == 'aac':
	4192	more_opts = ['-f', 'adts']
	4193	if filecodec == 'vorbis':
	4194	extension = 'ogg'
	4195	else:
	4196	# MP3 otherwise.
	4197	acodec = 'libmp3lame'
	4198	extension = 'mp3'
	4199	more_opts = []
	4200	if self._preferredquality is not None:
	4201	more_opts += ['-ab', self._preferredquality]
	4202	else:
	4203	# We convert the audio (lossy)
	4204	acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
	4205	extension = self._preferredcodec
	4206	more_opts = []
	4207	if self._preferredquality is not None:
	4208	more_opts += ['-ab', self._preferredquality]
	4209	if self._preferredcodec == 'aac':
	4210	more_opts += ['-f', 'adts']
	4211	if self._preferredcodec == 'm4a':
	4212	more_opts += ['-absf', 'aac_adtstoasc']
	4213	if self._preferredcodec == 'vorbis':
	4214	extension = 'ogg'
	4215	if self._preferredcodec == 'wav':
	4216	extension = 'wav'
	4217	more_opts += ['-f', 'wav']
	4218
	4219	prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
	4220	new_path = prefix + sep + extension
	4221	self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path)
	4222	try:
	4223	self.run_ffmpeg(path, new_path, acodec, more_opts)
	4224	except:
	4225	etype,e,tb = sys.exc_info()
	4226	if isinstance(e, AudioConversionError):
	4227	self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
	4228	else:
	4229	self._downloader.to_stderr(u'ERROR: error running ffmpeg')
	4230	return None
	4231
	4232	# Try to update the date time for extracted audio file.
	4233	if information.get('filetime') is not None:
	4234	try:
	4235	os.utime(_encodeFilename(new_path), (time.time(), information['filetime']))
	4236	except:
	4237	self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
	4238
	4239	if not self._keepvideo:
	4240	try:
	4241	os.remove(_encodeFilename(path))
	4242	except (IOError, OSError):
	4243	self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
	4244	return None
	4245
	4246	information['filepath'] = new_path
	4247	return information
	4248
	4249
	4250	def updateSelf(downloader, filename):
	4251	''' Update the program file with the latest version from the repository '''
	4252	# Note: downloader only used for options
	4253	if not os.access(filename, os.W_OK):
	4254	sys.exit('ERROR: no write permissions on %s' % filename)
	4255
	4256	downloader.to_screen(u'Updating to latest version...')
	4257
	4258	try:
	4259	try:
	4260	urlh = urllib.urlopen(UPDATE_URL)
	4261	newcontent = urlh.read()
	4262
	4263	vmatch = re.search("__version__ = '([^']+)'", newcontent)
	4264	if vmatch is not None and vmatch.group(1) == __version__:
	4265	downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
	4266	return
	4267	finally:
	4268	urlh.close()
	4269	except (IOError, OSError), err:
	4270	sys.exit('ERROR: unable to download latest version')
	4271
	4272	try:
	4273	outf = open(filename, 'wb')
	4274	try:
	4275	outf.write(newcontent)
	4276	finally:
	4277	outf.close()
	4278	except (IOError, OSError), err:
	4279	sys.exit('ERROR: unable to overwrite current version')
	4280
	4281	downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
	4282
	4283	def parseOpts():
	4284	def _readOptions(filename_bytes):
	4285	try:
	4286	optionf = open(filename_bytes)
	4287	except IOError:
	4288	return [] # silently skip if file is not present
	4289	try:
	4290	res = []
	4291	for l in optionf:
	4292	res += shlex.split(l, comments=True)
	4293	finally:
	4294	optionf.close()
	4295	return res
	4296
	4297	def _format_option_string(option):
	4298	''' ('-o', '--option') -> -o, --format METAVAR'''
	4299
	4300	opts = []
	4301
	4302	if option._short_opts: opts.append(option._short_opts[0])
	4303	if option._long_opts: opts.append(option._long_opts[0])
	4304	if len(opts) > 1: opts.insert(1, ', ')
	4305
	4306	if option.takes_value(): opts.append(' %s' % option.metavar)
	4307
	4308	return "".join(opts)
	4309
	4310	def _find_term_columns():
	4311	columns = os.environ.get('COLUMNS', None)
	4312	if columns:
	4313	return int(columns)
	4314
	4315	try:
	4316	sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	4317	out,err = sp.communicate()
	4318	return int(out.split()[1])
	4319	except:
	4320	pass
	4321	return None
	4322
	4323	max_width = 80
	4324	max_help_position = 80
	4325
	4326	# No need to wrap help messages if we're on a wide console
	4327	columns = _find_term_columns()
	4328	if columns: max_width = columns
	4329
	4330	fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
	4331	fmt.format_option_strings = _format_option_string
	4332
	4333	kw = {
	4334	'version' : __version__,
	4335	'formatter' : fmt,
	4336	'usage' : '%prog [options] url [url...]',
	4337	'conflict_handler' : 'resolve',
	4338	}
	4339
	4340	parser = optparse.OptionParser(**kw)
	4341
	4342	# option groups
	4343	general = optparse.OptionGroup(parser, 'General Options')
	4344	selection = optparse.OptionGroup(parser, 'Video Selection')
	4345	authentication = optparse.OptionGroup(parser, 'Authentication Options')
	4346	video_format = optparse.OptionGroup(parser, 'Video Format Options')
	4347	postproc = optparse.OptionGroup(parser, 'Post-processing Options')
	4348	filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
	4349	verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
	4350
	4351	general.add_option('-h', '--help',
	4352	action='help', help='print this help text and exit')
	4353	general.add_option('-v', '--version',
	4354	action='version', help='print program version and exit')
	4355	general.add_option('-U', '--update',
	4356	action='store_true', dest='update_self', help='update this program to latest version')
	4357	general.add_option('-i', '--ignore-errors',
	4358	action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
	4359	general.add_option('-r', '--rate-limit',
	4360	dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
	4361	general.add_option('-R', '--retries',
	4362	dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
	4363	general.add_option('--dump-user-agent',
	4364	action='store_true', dest='dump_user_agent',
	4365	help='display the current browser identification', default=False)
	4366	general.add_option('--list-extractors',
	4367	action='store_true', dest='list_extractors',
	4368	help='List all supported extractors and the URLs they would handle', default=False)
	4369
	4370	selection.add_option('--playlist-start',
	4371	dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
	4372	selection.add_option('--playlist-end',
	4373	dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
	4374	selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
	4375	selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
	4376	selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
	4377
	4378	authentication.add_option('-u', '--username',
	4379	dest='username', metavar='USERNAME', help='account username')
	4380	authentication.add_option('-p', '--password',
	4381	dest='password', metavar='PASSWORD', help='account password')
	4382	authentication.add_option('-n', '--netrc',
	4383	action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
	4384
	4385
	4386	video_format.add_option('-f', '--format',
	4387	action='store', dest='format', metavar='FORMAT', help='video format code')
	4388	video_format.add_option('--all-formats',
	4389	action='store_const', dest='format', help='download all available video formats', const='all')
	4390	video_format.add_option('--prefer-free-formats',
	4391	action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
	4392	video_format.add_option('--max-quality',
	4393	action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
	4394	video_format.add_option('-F', '--list-formats',
	4395	action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
	4396	video_format.add_option('--write-srt',
	4397	action='store_true', dest='writesubtitles',
	4398	help='write video closed captions to a .srt file (currently youtube only)', default=False)
	4399	video_format.add_option('--srt-lang',
	4400	action='store', dest='subtitleslang', metavar='LANG',
	4401	help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
	4402
	4403
	4404	verbosity.add_option('-q', '--quiet',
	4405	action='store_true', dest='quiet', help='activates quiet mode', default=False)
	4406	verbosity.add_option('-s', '--simulate',
	4407	action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
	4408	verbosity.add_option('--skip-download',
	4409	action='store_true', dest='skip_download', help='do not download the video', default=False)
	4410	verbosity.add_option('-g', '--get-url',
	4411	action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
	4412	verbosity.add_option('-e', '--get-title',
	4413	action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
	4414	verbosity.add_option('--get-thumbnail',
	4415	action='store_true', dest='getthumbnail',
	4416	help='simulate, quiet but print thumbnail URL', default=False)
	4417	verbosity.add_option('--get-description',
	4418	action='store_true', dest='getdescription',
	4419	help='simulate, quiet but print video description', default=False)
	4420	verbosity.add_option('--get-filename',
	4421	action='store_true', dest='getfilename',
	4422	help='simulate, quiet but print output filename', default=False)
	4423	verbosity.add_option('--get-format',
	4424	action='store_true', dest='getformat',
	4425	help='simulate, quiet but print output format', default=False)
	4426	verbosity.add_option('--no-progress',
	4427	action='store_true', dest='noprogress', help='do not print progress bar', default=False)
	4428	verbosity.add_option('--console-title',
	4429	action='store_true', dest='consoletitle',
	4430	help='display progress in console titlebar', default=False)
	4431	verbosity.add_option('-v', '--verbose',
	4432	action='store_true', dest='verbose', help='print various debugging information', default=False)
	4433
	4434
	4435	filesystem.add_option('-t', '--title',
	4436	action='store_true', dest='usetitle', help='use title in file name', default=False)
	4437	filesystem.add_option('-l', '--literal',
	4438	action='store_true', dest='useliteral', help='use literal title in file name', default=False)
	4439	filesystem.add_option('-A', '--auto-number',
	4440	action='store_true', dest='autonumber',
	4441	help='number downloaded files starting from 00000', default=False)
	4442	filesystem.add_option('-o', '--output',
	4443	dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
	4444	filesystem.add_option('-a', '--batch-file',
	4445	dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
	4446	filesystem.add_option('-w', '--no-overwrites',
	4447	action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
	4448	filesystem.add_option('-c', '--continue',
	4449	action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
	4450	filesystem.add_option('--no-continue',
	4451	action='store_false', dest='continue_dl',
	4452	help='do not resume partially downloaded files (restart from beginning)')
	4453	filesystem.add_option('--cookies',
	4454	dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
	4455	filesystem.add_option('--no-part',
	4456	action='store_true', dest='nopart', help='do not use .part files', default=False)
	4457	filesystem.add_option('--no-mtime',
	4458	action='store_false', dest='updatetime',
	4459	help='do not use the Last-modified header to set the file modification time', default=True)
	4460	filesystem.add_option('--write-description',
	4461	action='store_true', dest='writedescription',
	4462	help='write video description to a .description file', default=False)
	4463	filesystem.add_option('--write-info-json',
	4464	action='store_true', dest='writeinfojson',
	4465	help='write video metadata to a .info.json file', default=False)
	4466
	4467
	4468	postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
	4469	help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
	4470	postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
	4471	help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
	4472	postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
	4473	help='ffmpeg audio bitrate specification, 128k by default')
	4474	postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
	4475	help='keeps the video file on disk after the post-processing; the video is erased by default')
	4476
	4477
	4478	parser.add_option_group(general)
	4479	parser.add_option_group(selection)
	4480	parser.add_option_group(filesystem)
	4481	parser.add_option_group(verbosity)
	4482	parser.add_option_group(video_format)
	4483	parser.add_option_group(authentication)
	4484	parser.add_option_group(postproc)
	4485
	4486	xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
	4487	if xdg_config_home:
	4488	userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
	4489	else:
	4490	userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
	4491	argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
	4492	opts, args = parser.parse_args(argv)
	4493
	4494	return parser, opts, args
	4495
	4496	def gen_extractors():
	4497	""" Return a list of an instance of every supported extractor.
	4498	The order does matter; the first extractor matched is the one handling the URL.
	4499	"""
	4500	youtube_ie = YoutubeIE()
	4501	google_ie = GoogleIE()
	4502	yahoo_ie = YahooIE()
	4503	return [
	4504	YoutubePlaylistIE(youtube_ie),
	4505	YoutubeUserIE(youtube_ie),
	4506	YoutubeSearchIE(youtube_ie),
	4507	youtube_ie,
	4508	MetacafeIE(youtube_ie),
	4509	DailymotionIE(),
	4510	google_ie,
	4511	GoogleSearchIE(google_ie),
	4512	PhotobucketIE(),
	4513	yahoo_ie,
	4514	YahooSearchIE(yahoo_ie),
	4515	DepositFilesIE(),
	4516	FacebookIE(),
	4517	BlipTVIE(),
	4518	VimeoIE(),
	4519	MyVideoIE(),
	4520	ComedyCentralIE(),
	4521	EscapistIE(),
	4522	CollegeHumorIE(),
	4523	XVideosIE(),
	4524	SoundcloudIE(),
	4525	InfoQIE(),
	4526	MixcloudIE(),
	4527	StanfordOpenClassroomIE(),
	4528	MTVIE(),
	4529
	4530	GenericIE()
	4531	]
	4532
	4533	def _real_main():
	4534	parser, opts, args = parseOpts()
	4535
	4536	# Open appropriate CookieJar
	4537	if opts.cookiefile is None:
	4538	jar = cookielib.CookieJar()
	4539	else:
	4540	try:
	4541	jar = cookielib.MozillaCookieJar(opts.cookiefile)
	4542	if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
	4543	jar.load()
	4544	except (IOError, OSError), err:
	4545	sys.exit(u'ERROR: unable to open cookie file')
	4546
	4547	# Dump user agent
	4548	if opts.dump_user_agent:
	4549	print std_headers['User-Agent']
	4550	sys.exit(0)
	4551
	4552	# Batch file verification
	4553	batchurls = []
	4554	if opts.batchfile is not None:
	4555	try:
	4556	if opts.batchfile == '-':
	4557	batchfd = sys.stdin
	4558	else:
	4559	batchfd = open(opts.batchfile, 'r')
	4560	batchurls = batchfd.readlines()
	4561	batchurls = [x.strip() for x in batchurls]
	4562	batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
	4563	except IOError:
	4564	sys.exit(u'ERROR: batch file could not be read')
	4565	all_urls = batchurls + args
	4566
	4567	# General configuration
	4568	cookie_processor = urllib2.HTTPCookieProcessor(jar)
	4569	proxy_handler = urllib2.ProxyHandler()
	4570	opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
	4571	urllib2.install_opener(opener)
	4572	socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
	4573
	4574	if opts.verbose:
	4575	print(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
	4576
	4577	extractors = gen_extractors()
	4578
	4579	if opts.list_extractors:
	4580	for ie in extractors:
	4581	print(ie.IE_NAME)
	4582	matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
	4583	all_urls = filter(lambda url: url not in matchedUrls, all_urls)
	4584	for mu in matchedUrls:
	4585	print(u' ' + mu)
	4586	sys.exit(0)
	4587
	4588	# Conflicting, missing and erroneous options
	4589	if opts.usenetrc and (opts.username is not None or opts.password is not None):
	4590	parser.error(u'using .netrc conflicts with giving username/password')
	4591	if opts.password is not None and opts.username is None:
	4592	parser.error(u'account username missing')
	4593	if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
	4594	parser.error(u'using output template conflicts with using title, literal title or auto number')
	4595	if opts.usetitle and opts.useliteral:
	4596	parser.error(u'using title conflicts with using literal title')
	4597	if opts.username is not None and opts.password is None:
	4598	opts.password = getpass.getpass(u'Type account password and press return:')
	4599	if opts.ratelimit is not None:
	4600	numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
	4601	if numeric_limit is None:
	4602	parser.error(u'invalid rate limit specified')
	4603	opts.ratelimit = numeric_limit
	4604	if opts.retries is not None:
	4605	try:
	4606	opts.retries = long(opts.retries)
	4607	except (TypeError, ValueError), err:
	4608	parser.error(u'invalid retry count specified')
	4609	try:
	4610	opts.playliststart = int(opts.playliststart)
	4611	if opts.playliststart <= 0:
	4612	raise ValueError(u'Playlist start must be positive')
	4613	except (TypeError, ValueError), err:
	4614	parser.error(u'invalid playlist start number specified')
	4615	try:
	4616	opts.playlistend = int(opts.playlistend)
	4617	if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
	4618	raise ValueError(u'Playlist end must be greater than playlist start')
	4619	except (TypeError, ValueError), err:
	4620	parser.error(u'invalid playlist end number specified')
	4621	if opts.extractaudio:
	4622	if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
	4623	parser.error(u'invalid audio format specified')
	4624
	4625	# File downloader
	4626	fd = FileDownloader({
	4627	'usenetrc': opts.usenetrc,
	4628	'username': opts.username,
	4629	'password': opts.password,
	4630	'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
	4631	'forceurl': opts.geturl,
	4632	'forcetitle': opts.gettitle,
	4633	'forcethumbnail': opts.getthumbnail,
	4634	'forcedescription': opts.getdescription,
	4635	'forcefilename': opts.getfilename,
	4636	'forceformat': opts.getformat,
	4637	'simulate': opts.simulate,
	4638	'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
	4639	'format': opts.format,
	4640	'format_limit': opts.format_limit,
	4641	'listformats': opts.listformats,
	4642	'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
	4643	or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
	4644	or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
	4645	or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
	4646	or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
	4647	or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
	4648	or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
	4649	or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
	4650	or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
	4651	or u'%(id)s.%(ext)s'),
	4652	'ignoreerrors': opts.ignoreerrors,
	4653	'ratelimit': opts.ratelimit,
	4654	'nooverwrites': opts.nooverwrites,
	4655	'retries': opts.retries,
	4656	'continuedl': opts.continue_dl,
	4657	'noprogress': opts.noprogress,
	4658	'playliststart': opts.playliststart,
	4659	'playlistend': opts.playlistend,
	4660	'logtostderr': opts.outtmpl == '-',
	4661	'consoletitle': opts.consoletitle,
	4662	'nopart': opts.nopart,
	4663	'updatetime': opts.updatetime,
	4664	'writedescription': opts.writedescription,
	4665	'writeinfojson': opts.writeinfojson,
	4666	'writesubtitles': opts.writesubtitles,
	4667	'subtitleslang': opts.subtitleslang,
	4668	'matchtitle': opts.matchtitle,
	4669	'rejecttitle': opts.rejecttitle,
	4670	'max_downloads': opts.max_downloads,
	4671	'prefer_free_formats': opts.prefer_free_formats,
	4672	'verbose': opts.verbose,
	4673	})
	4674	for extractor in extractors:
	4675	fd.add_info_extractor(extractor)
	4676
	4677	# PostProcessors
	4678	if opts.extractaudio:
	4679	fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
	4680
	4681	# Update version
	4682	if opts.update_self:
	4683	updateSelf(fd, sys.argv[0])
	4684
	4685	# Maybe do nothing
	4686	if len(all_urls) < 1:
	4687	if not opts.update_self:
	4688	parser.error(u'you must provide at least one URL')
	4689	else:
	4690	sys.exit()
	4691
	4692	try:
	4693	retcode = fd.download(all_urls)
	4694	except MaxDownloadsReached:
	4695	fd.to_screen(u'--max-download limit reached, aborting.')
	4696	retcode = 101
	4697
	4698	# Dump cookie jar if requested
	4699	if opts.cookiefile is not None:
	4700	try:
	4701	jar.save()
	4702	except (IOError, OSError), err:
	4703	sys.exit(u'ERROR: unable to save cookie jar')
	4704
	4705	sys.exit(retcode)
	4706
	4707	def main():
	4708	try:
	4709	_real_main()
	4710	except DownloadError:
	4711	sys.exit(1)
	4712	except SameFileError:
	4713	sys.exit(u'ERROR: fixed output name but more than one file to download')
	4714	except KeyboardInterrupt:
	4715	sys.exit(u'\nERROR: Interrupted by user')
	4716
	4717	if __name__ == '__main__':
	4718	main()
	4719
	4720	# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: