jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	__author__ = (
	5	'Ricardo Garcia Gonzalez',
	6	'Danny Colligan',
	7	'Benjamin Johnson',
	8	'Vasyl\' Vavrychuk',
	9	'Witold Baryluk',
	10	'Paweł Paprota',
	11	'Gergely Imreh',
	12	'Rogério Brito',
	13	'Philipp Hagemeister',
	14	'Sören Schulze',
	15	)
	16
	17	__license__ = 'Public Domain'
	18	__version__ = '2011.09.15'
	19
	20	UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
	21
	22	import cookielib
	23	import datetime
	24	import gzip
	25	import htmlentitydefs
	26	import HTMLParser
	27	import httplib
	28	import locale
	29	import math
	30	import netrc
	31	import os
	32	import os.path
	33	import re
	34	import socket
	35	import string
	36	import subprocess
	37	import sys
	38	import time
	39	import urllib
	40	import urllib2
	41	import warnings
	42	import zlib
	43
	44	if os.name == 'nt':
	45	import ctypes
	46
	47	try:
	48	import email.utils
	49	except ImportError: # Python 2.4
	50	import email.Utils
	51	try:
	52	import cStringIO as StringIO
	53	except ImportError:
	54	import StringIO
	55
	56	# parse_qs was moved from the cgi module to the urlparse module recently.
	57	try:
	58	from urlparse import parse_qs
	59	except ImportError:
	60	from cgi import parse_qs
	61
	62	try:
	63	import lxml.etree
	64	except ImportError:
	65	pass # Handled below
	66
	67	try:
	68	import xml.etree.ElementTree
	69	except ImportError: # Python<2.5
	70	pass # Not officially supported, but let it slip
	71
	72	std_headers = {
	73	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
	74	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	75	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	76	'Accept-Encoding': 'gzip, deflate',
	77	'Accept-Language': 'en-us,en;q=0.5',
	78	}
	79
	80	simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
	81
	82	try:
	83	import json
	84	except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
	85	import re
	86	class json(object):
	87	@staticmethod
	88	def loads(s):
	89	s = s.decode('UTF-8')
	90	def raiseError(msg, i):
	91	raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
	92	def skipSpace(i, expectMore=True):
	93	while i < len(s) and s[i] in ' \t\r\n':
	94	i += 1
	95	if expectMore:
	96	if i >= len(s):
	97	raiseError('Premature end', i)
	98	return i
	99	def decodeEscape(match):
	100	esc = match.group(1)
	101	_STATIC = {
	102	'"': '"',
	103	'\\': '\\',
	104	'/': '/',
	105	'b': unichr(0x8),
	106	'f': unichr(0xc),
	107	'n': '\n',
	108	'r': '\r',
	109	't': '\t',
	110	}
	111	if esc in _STATIC:
	112	return _STATIC[esc]
	113	if esc[0] == 'u':
	114	if len(esc) == 1+4:
	115	return unichr(int(esc[1:5], 16))
	116	if len(esc) == 5+6 and esc[5:7] == '\\u':
	117	hi = int(esc[1:5], 16)
	118	low = int(esc[7:11], 16)
	119	return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
	120	raise ValueError('Unknown escape ' + str(esc))
	121	def parseString(i):
	122	i += 1
	123	e = i
	124	while True:
	125	e = s.index('"', e)
	126	bslashes = 0
	127	while s[e-bslashes-1] == '\\':
	128	bslashes += 1
	129	if bslashes % 2 == 1:
	130	e += 1
	131	continue
	132	break
	133	rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}\|u[0-9a-fA-F]{4}\|.\|$)')
	134	stri = rexp.sub(decodeEscape, s[i:e])
	135	return (e+1,stri)
	136	def parseObj(i):
	137	i += 1
	138	res = {}
	139	i = skipSpace(i)
	140	if s[i] == '}': # Empty dictionary
	141	return (i+1,res)
	142	while True:
	143	if s[i] != '"':
	144	raiseError('Expected a string object key', i)
	145	i,key = parseString(i)
	146	i = skipSpace(i)
	147	if i >= len(s) or s[i] != ':':
	148	raiseError('Expected a colon', i)
	149	i,val = parse(i+1)
	150	res[key] = val
	151	i = skipSpace(i)
	152	if s[i] == '}':
	153	return (i+1, res)
	154	if s[i] != ',':
	155	raiseError('Expected comma or closing curly brace', i)
	156	i = skipSpace(i+1)
	157	def parseArray(i):
	158	res = []
	159	i = skipSpace(i+1)
	160	if s[i] == ']': # Empty array
	161	return (i+1,res)
	162	while True:
	163	i,val = parse(i)
	164	res.append(val)
	165	i = skipSpace(i) # Raise exception if premature end
	166	if s[i] == ']':
	167	return (i+1, res)
	168	if s[i] != ',':
	169	raiseError('Expected a comma or closing bracket', i)
	170	i = skipSpace(i+1)
	171	def parseDiscrete(i):
	172	for k,v in {'true': True, 'false': False, 'null': None}.items():
	173	if s.startswith(k, i):
	174	return (i+len(k), v)
	175	raiseError('Not a boolean (or null)', i)
	176	def parseNumber(i):
	177	mobj = re.match('^(-?(0\|[1-9][0-9])(\.[0-9])?([eE][+-]?[0-9]+)?)', s[i:])
	178	if mobj is None:
	179	raiseError('Not a number', i)
	180	nums = mobj.group(1)
	181	if '.' in nums or 'e' in nums or 'E' in nums:
	182	return (i+len(nums), float(nums))
	183	return (i+len(nums), int(nums))
	184	CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
	185	def parse(i):
	186	i = skipSpace(i)
	187	i,res = CHARMAP.get(s[i], parseNumber)(i)
	188	i = skipSpace(i, False)
	189	return (i,res)
	190	i,res = parse(0)
	191	if i < len(s):
	192	raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
	193	return res
	194
	195	def preferredencoding():
	196	"""Get preferred encoding.
	197
	198	Returns the best encoding scheme for the system, based on
	199	locale.getpreferredencoding() and some further tweaks.
	200	"""
	201	def yield_preferredencoding():
	202	try:
	203	pref = locale.getpreferredencoding()
	204	u'TEST'.encode(pref)
	205	except:
	206	pref = 'UTF-8'
	207	while True:
	208	yield pref
	209	return yield_preferredencoding().next()
	210
	211
	212	def htmlentity_transform(matchobj):
	213	"""Transforms an HTML entity to a Unicode character.
	214
	215	This function receives a match object and is intended to be used with
	216	the re.sub() function.
	217	"""
	218	entity = matchobj.group(1)
	219
	220	# Known non-numeric HTML entity
	221	if entity in htmlentitydefs.name2codepoint:
	222	return unichr(htmlentitydefs.name2codepoint[entity])
	223
	224	# Unicode character
	225	mobj = re.match(ur'(?u)#(x?\d+)', entity)
	226	if mobj is not None:
	227	numstr = mobj.group(1)
	228	if numstr.startswith(u'x'):
	229	base = 16
	230	numstr = u'0%s' % numstr
	231	else:
	232	base = 10
	233	return unichr(long(numstr, base))
	234
	235	# Unknown entity in name, return its literal representation
	236	return (u'&%s;' % entity)
	237
	238
	239	def sanitize_title(utitle):
	240	"""Sanitizes a video title so it could be used as part of a filename."""
	241	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
	242	return utitle.replace(unicode(os.sep), u'%')
	243
	244
	245	def sanitize_open(filename, open_mode):
	246	"""Try to open the given filename, and slightly tweak it if this fails.
	247
	248	Attempts to open the given filename. If this fails, it tries to change
	249	the filename slightly, step by step, until it's either able to open it
	250	or it fails and raises a final exception, like the standard open()
	251	function.
	252
	253	It returns the tuple (stream, definitive_file_name).
	254	"""
	255	try:
	256	if filename == u'-':
	257	if sys.platform == 'win32':
	258	import msvcrt
	259	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	260	return (sys.stdout, filename)
	261	stream = open(filename, open_mode)
	262	return (stream, filename)
	263	except (IOError, OSError), err:
	264	# In case of error, try to remove win32 forbidden chars
	265	filename = re.sub(ur'[/<>:"\\|\?\*]', u'#', filename)
	266
	267	# An exception here should be caught in the caller
	268	stream = open(filename, open_mode)
	269	return (stream, filename)
	270
	271
	272	def timeconvert(timestr):
	273	"""Convert RFC 2822 defined time string into system timestamp"""
	274	timestamp = None
	275	timetuple = email.utils.parsedate_tz(timestr)
	276	if timetuple is not None:
	277	timestamp = email.utils.mktime_tz(timetuple)
	278	return timestamp
	279
	280
	281	class DownloadError(Exception):
	282	"""Download Error exception.
	283
	284	This exception may be thrown by FileDownloader objects if they are not
	285	configured to continue on errors. They will contain the appropriate
	286	error message.
	287	"""
	288	pass
	289
	290
	291	class SameFileError(Exception):
	292	"""Same File exception.
	293
	294	This exception will be thrown by FileDownloader objects if they detect
	295	multiple files would have to be downloaded to the same file on disk.
	296	"""
	297	pass
	298
	299
	300	class PostProcessingError(Exception):
	301	"""Post Processing exception.
	302
	303	This exception may be raised by PostProcessor's .run() method to
	304	indicate an error in the postprocessing task.
	305	"""
	306	pass
	307
	308
	309	class UnavailableVideoError(Exception):
	310	"""Unavailable Format exception.
	311
	312	This exception will be thrown when a video is requested
	313	in a format that is not available for that video.
	314	"""
	315	pass
	316
	317
	318	class ContentTooShortError(Exception):
	319	"""Content Too Short exception.
	320
	321	This exception may be raised by FileDownloader objects when a file they
	322	download is too small for what the server announced first, indicating
	323	the connection was probably interrupted.
	324	"""
	325	# Both in bytes
	326	downloaded = None
	327	expected = None
	328
	329	def __init__(self, downloaded, expected):
	330	self.downloaded = downloaded
	331	self.expected = expected
	332
	333
	334	class YoutubeDLHandler(urllib2.HTTPHandler):
	335	"""Handler for HTTP requests and responses.
	336
	337	This class, when installed with an OpenerDirector, automatically adds
	338	the standard headers to every HTTP request and handles gzipped and
	339	deflated responses from web servers. If compression is to be avoided in
	340	a particular request, the original request in the program code only has
	341	to include the HTTP header "Youtubedl-No-Compression", which will be
	342	removed before making the real request.
	343
	344	Part of this code was copied from:
	345
	346	http://techknack.net/python-urllib2-handlers/
	347
	348	Andrew Rowls, the author of that code, agreed to release it to the
	349	public domain.
	350	"""
	351
	352	@staticmethod
	353	def deflate(data):
	354	try:
	355	return zlib.decompress(data, -zlib.MAX_WBITS)
	356	except zlib.error:
	357	return zlib.decompress(data)
	358
	359	@staticmethod
	360	def addinfourl_wrapper(stream, headers, url, code):
	361	if hasattr(urllib2.addinfourl, 'getcode'):
	362	return urllib2.addinfourl(stream, headers, url, code)
	363	ret = urllib2.addinfourl(stream, headers, url)
	364	ret.code = code
	365	return ret
	366
	367	def http_request(self, req):
	368	for h in std_headers:
	369	if h in req.headers:
	370	del req.headers[h]
	371	req.add_header(h, std_headers[h])
	372	if 'Youtubedl-no-compression' in req.headers:
	373	if 'Accept-encoding' in req.headers:
	374	del req.headers['Accept-encoding']
	375	del req.headers['Youtubedl-no-compression']
	376	return req
	377
	378	def http_response(self, req, resp):
	379	old_resp = resp
	380	# gzip
	381	if resp.headers.get('Content-encoding', '') == 'gzip':
	382	gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
	383	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	384	resp.msg = old_resp.msg
	385	# deflate
	386	if resp.headers.get('Content-encoding', '') == 'deflate':
	387	gz = StringIO.StringIO(self.deflate(resp.read()))
	388	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	389	resp.msg = old_resp.msg
	390	return resp
	391
	392
	393	class FileDownloader(object):
	394	"""File Downloader class.
	395
	396	File downloader objects are the ones responsible of downloading the
	397	actual video file and writing it to disk if the user has requested
	398	it, among some other tasks. In most cases there should be one per
	399	program. As, given a video URL, the downloader doesn't know how to
	400	extract all the needed information, task that InfoExtractors do, it
	401	has to pass the URL to one of them.
	402
	403	For this, file downloader objects have a method that allows
	404	InfoExtractors to be registered in a given order. When it is passed
	405	a URL, the file downloader handles it to the first InfoExtractor it
	406	finds that reports being able to handle it. The InfoExtractor extracts
	407	all the information about the video or videos the URL refers to, and
	408	asks the FileDownloader to process the video information, possibly
	409	downloading the video.
	410
	411	File downloaders accept a lot of parameters. In order not to saturate
	412	the object constructor with arguments, it receives a dictionary of
	413	options instead. These options are available through the params
	414	attribute for the InfoExtractors to use. The FileDownloader also
	415	registers itself as the downloader in charge for the InfoExtractors
	416	that are added to it, so this is a "mutual registration".
	417
	418	Available options:
	419
	420	username: Username for authentication purposes.
	421	password: Password for authentication purposes.
	422	usenetrc: Use netrc for authentication instead.
	423	quiet: Do not print messages to stdout.
	424	forceurl: Force printing final URL.
	425	forcetitle: Force printing title.
	426	forcethumbnail: Force printing thumbnail URL.
	427	forcedescription: Force printing description.
	428	forcefilename: Force printing final filename.
	429	simulate: Do not download the video files.
	430	format: Video format code.
	431	format_limit: Highest quality format to try.
	432	outtmpl: Template for output names.
	433	ignoreerrors: Do not stop on download errors.
	434	ratelimit: Download speed limit, in bytes/sec.
	435	nooverwrites: Prevent overwriting files.
	436	retries: Number of times to retry for HTTP error 5xx
	437	continuedl: Try to continue downloads if possible.
	438	noprogress: Do not print the progress bar.
	439	playliststart: Playlist item to start at.
	440	playlistend: Playlist item to end at.
	441	matchtitle: Download only matching titles.
	442	rejecttitle: Reject downloads for matching titles.
	443	logtostderr: Log messages to stderr instead of stdout.
	444	consoletitle: Display progress in console window's titlebar.
	445	nopart: Do not use temporary .part files.
	446	updatetime: Use the Last-modified header to set output file timestamps.
	447	writedescription: Write the video description to a .description file
	448	writeinfojson: Write the video description to a .info.json file
	449	"""
	450
	451	params = None
	452	_ies = []
	453	_pps = []
	454	_download_retcode = None
	455	_num_downloads = None
	456	_screen_file = None
	457
	458	def __init__(self, params):
	459	"""Create a FileDownloader object with the given options."""
	460	self._ies = []
	461	self._pps = []
	462	self._download_retcode = 0
	463	self._num_downloads = 0
	464	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	465	self.params = params
	466
	467	@staticmethod
	468	def format_bytes(bytes):
	469	if bytes is None:
	470	return 'N/A'
	471	if type(bytes) is str:
	472	bytes = float(bytes)
	473	if bytes == 0.0:
	474	exponent = 0
	475	else:
	476	exponent = long(math.log(bytes, 1024.0))
	477	suffix = 'bkMGTPEZY'[exponent]
	478	converted = float(bytes) / float(1024 ** exponent)
	479	return '%.2f%s' % (converted, suffix)
	480
	481	@staticmethod
	482	def calc_percent(byte_counter, data_len):
	483	if data_len is None:
	484	return '---.-%'
	485	return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
	486
	487	@staticmethod
	488	def calc_eta(start, now, total, current):
	489	if total is None:
	490	return '--:--'
	491	dif = now - start
	492	if current == 0 or dif < 0.001: # One millisecond
	493	return '--:--'
	494	rate = float(current) / dif
	495	eta = long((float(total) - float(current)) / rate)
	496	(eta_mins, eta_secs) = divmod(eta, 60)
	497	if eta_mins > 99:
	498	return '--:--'
	499	return '%02d:%02d' % (eta_mins, eta_secs)
	500
	501	@staticmethod
	502	def calc_speed(start, now, bytes):
	503	dif = now - start
	504	if bytes == 0 or dif < 0.001: # One millisecond
	505	return '%10s' % '---b/s'
	506	return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
	507
	508	@staticmethod
	509	def best_block_size(elapsed_time, bytes):
	510	new_min = max(bytes / 2.0, 1.0)
	511	new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
	512	if elapsed_time < 0.001:
	513	return long(new_max)
	514	rate = bytes / elapsed_time
	515	if rate > new_max:
	516	return long(new_max)
	517	if rate < new_min:
	518	return long(new_min)
	519	return long(rate)
	520
	521	@staticmethod
	522	def parse_bytes(bytestr):
	523	"""Parse a string indicating a byte quantity into a long integer."""
	524	matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
	525	if matchobj is None:
	526	return None
	527	number = float(matchobj.group(1))
	528	multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
	529	return long(round(number * multiplier))
	530
	531	def add_info_extractor(self, ie):
	532	"""Add an InfoExtractor object to the end of the list."""
	533	self._ies.append(ie)
	534	ie.set_downloader(self)
	535
	536	def add_post_processor(self, pp):
	537	"""Add a PostProcessor object to the end of the chain."""
	538	self._pps.append(pp)
	539	pp.set_downloader(self)
	540
	541	def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
	542	"""Print message to stdout if not in quiet mode."""
	543	try:
	544	if not self.params.get('quiet', False):
	545	terminator = [u'\n', u''][skip_eol]
	546	print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
	547	self._screen_file.flush()
	548	except (UnicodeEncodeError), err:
	549	if not ignore_encoding_errors:
	550	raise
	551
	552	def to_stderr(self, message):
	553	"""Print message to stderr."""
	554	print >>sys.stderr, message.encode(preferredencoding())
	555
	556	def to_cons_title(self, message):
	557	"""Set console/terminal window title to message."""
	558	if not self.params.get('consoletitle', False):
	559	return
	560	if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
	561	# c_wchar_p() might not be necessary if `message` is
	562	# already of type unicode()
	563	ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
	564	elif 'TERM' in os.environ:
	565	sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
	566
	567	def fixed_template(self):
	568	"""Checks if the output template is fixed."""
	569	return (re.search(ur'(?u)%$.+?$s', self.params['outtmpl']) is None)
	570
	571	def trouble(self, message=None):
	572	"""Determine action to take when a download problem appears.
	573
	574	Depending on if the downloader has been configured to ignore
	575	download errors or not, this method may throw an exception or
	576	not when errors are found, after printing the message.
	577	"""
	578	if message is not None:
	579	self.to_stderr(message)
	580	if not self.params.get('ignoreerrors', False):
	581	raise DownloadError(message)
	582	self._download_retcode = 1
	583
	584	def slow_down(self, start_time, byte_counter):
	585	"""Sleep if the download speed is over the rate limit."""
	586	rate_limit = self.params.get('ratelimit', None)
	587	if rate_limit is None or byte_counter == 0:
	588	return
	589	now = time.time()
	590	elapsed = now - start_time
	591	if elapsed <= 0.0:
	592	return
	593	speed = float(byte_counter) / elapsed
	594	if speed > rate_limit:
	595	time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
	596
	597	def temp_name(self, filename):
	598	"""Returns a temporary filename for the given filename."""
	599	if self.params.get('nopart', False) or filename == u'-' or \
	600	(os.path.exists(filename) and not os.path.isfile(filename)):
	601	return filename
	602	return filename + u'.part'
	603
	604	def undo_temp_name(self, filename):
	605	if filename.endswith(u'.part'):
	606	return filename[:-len(u'.part')]
	607	return filename
	608
	609	def try_rename(self, old_filename, new_filename):
	610	try:
	611	if old_filename == new_filename:
	612	return
	613	os.rename(old_filename, new_filename)
	614	except (IOError, OSError), err:
	615	self.trouble(u'ERROR: unable to rename file')
	616
	617	def try_utime(self, filename, last_modified_hdr):
	618	"""Try to set the last-modified time of the given file."""
	619	if last_modified_hdr is None:
	620	return
	621	if not os.path.isfile(filename):
	622	return
	623	timestr = last_modified_hdr
	624	if timestr is None:
	625	return
	626	filetime = timeconvert(timestr)
	627	if filetime is None:
	628	return
	629	try:
	630	os.utime(filename, (time.time(), filetime))
	631	except:
	632	pass
	633
	634	def report_writedescription(self, descfn):
	635	""" Report that the description file is being written """
	636	self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
	637
	638	def report_writeinfojson(self, infofn):
	639	""" Report that the metadata file has been written """
	640	self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
	641
	642	def report_destination(self, filename):
	643	"""Report destination filename."""
	644	self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
	645
	646	def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
	647	"""Report download progress."""
	648	if self.params.get('noprogress', False):
	649	return
	650	self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
	651	(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
	652	self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
	653	(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
	654
	655	def report_resuming_byte(self, resume_len):
	656	"""Report attempt to resume at given byte."""
	657	self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
	658
	659	def report_retry(self, count, retries):
	660	"""Report retry in case of HTTP error 5xx"""
	661	self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
	662
	663	def report_file_already_downloaded(self, file_name):
	664	"""Report file has already been fully downloaded."""
	665	try:
	666	self.to_screen(u'[download] %s has already been downloaded' % file_name)
	667	except (UnicodeEncodeError), err:
	668	self.to_screen(u'[download] The file has already been downloaded')
	669
	670	def report_unable_to_resume(self):
	671	"""Report it was impossible to resume download."""
	672	self.to_screen(u'[download] Unable to resume')
	673
	674	def report_finish(self):
	675	"""Report download finished."""
	676	if self.params.get('noprogress', False):
	677	self.to_screen(u'[download] Download completed')
	678	else:
	679	self.to_screen(u'')
	680
	681	def increment_downloads(self):
	682	"""Increment the ordinal that assigns a number to each file."""
	683	self._num_downloads += 1
	684
	685	def prepare_filename(self, info_dict):
	686	"""Generate the output filename."""
	687	try:
	688	template_dict = dict(info_dict)
	689	template_dict['epoch'] = unicode(long(time.time()))
	690	template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
	691	filename = self.params['outtmpl'] % template_dict
	692	return filename
	693	except (ValueError, KeyError), err:
	694	self.trouble(u'ERROR: invalid system charset or erroneous output template')
	695	return None
	696
	697	def process_info(self, info_dict):
	698	"""Process a single dictionary returned by an InfoExtractor."""
	699	filename = self.prepare_filename(info_dict)
	700	# Do nothing else if in simulate mode
	701	if self.params.get('simulate', False):
	702	# Forced printings
	703	if self.params.get('forcetitle', False):
	704	print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
	705	if self.params.get('forceurl', False):
	706	print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
	707	if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
	708	print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
	709	if self.params.get('forcedescription', False) and 'description' in info_dict:
	710	print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
	711	if self.params.get('forcefilename', False) and filename is not None:
	712	print filename.encode(preferredencoding(), 'xmlcharrefreplace')
	713
	714	return
	715
	716	if filename is None:
	717	return
	718
	719	matchtitle=self.params.get('matchtitle',False)
	720	rejecttitle=self.params.get('rejecttitle',False)
	721	title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
	722	if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
	723	self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
	724	return
	725	if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
	726	self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
	727	return
	728
	729	if self.params.get('nooverwrites', False) and os.path.exists(filename):
	730	self.to_stderr(u'WARNING: file exists and will be skipped')
	731	return
	732
	733	try:
	734	dn = os.path.dirname(filename)
	735	if dn != '' and not os.path.exists(dn):
	736	os.makedirs(dn)
	737	except (OSError, IOError), err:
	738	self.trouble(u'ERROR: unable to create directory ' + unicode(err))
	739	return
	740
	741	if self.params.get('writedescription', False):
	742	try:
	743	descfn = filename + '.description'
	744	self.report_writedescription(descfn)
	745	descfile = open(descfn, 'wb')
	746	try:
	747	descfile.write(info_dict['description'].encode('utf-8'))
	748	finally:
	749	descfile.close()
	750	except (OSError, IOError):
	751	self.trouble(u'ERROR: Cannot write description file ' + descfn)
	752	return
	753
	754	if self.params.get('writeinfojson', False):
	755	infofn = filename + '.info.json'
	756	self.report_writeinfojson(infofn)
	757	try:
	758	json.dump
	759	except (NameError,AttributeError):
	760	self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
	761	return
	762	try:
	763	infof = open(infofn, 'wb')
	764	try:
	765	json.dump(info_dict, infof)
	766	finally:
	767	infof.close()
	768	except (OSError, IOError):
	769	self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
	770	return
	771
	772	try:
	773	success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
	774	except (OSError, IOError), err:
	775	raise UnavailableVideoError
	776	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	777	self.trouble(u'ERROR: unable to download video data: %s' % str(err))
	778	return
	779	except (ContentTooShortError, ), err:
	780	self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
	781	return
	782
	783	if success:
	784	try:
	785	self.post_process(filename, info_dict)
	786	except (PostProcessingError), err:
	787	self.trouble(u'ERROR: postprocessing: %s' % str(err))
	788	return
	789
	790	def download(self, url_list):
	791	"""Download a given list of URLs."""
	792	if len(url_list) > 1 and self.fixed_template():
	793	raise SameFileError(self.params['outtmpl'])
	794
	795	for url in url_list:
	796	suitable_found = False
	797	for ie in self._ies:
	798	# Go to next InfoExtractor if not suitable
	799	if not ie.suitable(url):
	800	continue
	801
	802	# Suitable InfoExtractor found
	803	suitable_found = True
	804
	805	# Extract information from URL and process it
	806	ie.extract(url)
	807
	808	# Suitable InfoExtractor had been found; go to next URL
	809	break
	810
	811	if not suitable_found:
	812	self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
	813
	814	return self._download_retcode
	815
	816	def post_process(self, filename, ie_info):
	817	"""Run the postprocessing chain on the given file."""
	818	info = dict(ie_info)
	819	info['filepath'] = filename
	820	for pp in self._pps:
	821	info = pp.run(info)
	822	if info is None:
	823	break
	824
	825	def _download_with_rtmpdump(self, filename, url, player_url):
	826	self.report_destination(filename)
	827	tmpfilename = self.temp_name(filename)
	828
	829	# Check for rtmpdump first
	830	try:
	831	subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
	832	except (OSError, IOError):
	833	self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
	834	return False
	835
	836	# Download using rtmpdump. rtmpdump returns exit code 2 when
	837	# the connection was interrumpted and resuming appears to be
	838	# possible. This is part of rtmpdump's normal usage, AFAIK.
	839	basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
	840	retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
	841	while retval == 2 or retval == 1:
	842	prevsize = os.path.getsize(tmpfilename)
	843	self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
	844	time.sleep(5.0) # This seems to be needed
	845	retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
	846	cursize = os.path.getsize(tmpfilename)
	847	if prevsize == cursize and retval == 1:
	848	break
	849	# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
	850	if prevsize == cursize and retval == 2 and cursize > 1024:
	851	self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
	852	retval = 0
	853	break
	854	if retval == 0:
	855	self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
	856	self.try_rename(tmpfilename, filename)
	857	return True
	858	else:
	859	self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
	860	return False
	861
	862	def _do_download(self, filename, url, player_url):
	863	# Check file already present
	864	if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
	865	self.report_file_already_downloaded(filename)
	866	return True
	867
	868	# Attempt to download using rtmpdump
	869	if url.startswith('rtmp'):
	870	return self._download_with_rtmpdump(filename, url, player_url)
	871
	872	tmpfilename = self.temp_name(filename)
	873	stream = None
	874	open_mode = 'wb'
	875
	876	# Do not include the Accept-Encoding header
	877	headers = {'Youtubedl-no-compression': 'True'}
	878	basic_request = urllib2.Request(url, None, headers)
	879	request = urllib2.Request(url, None, headers)
	880
	881	# Establish possible resume length
	882	if os.path.isfile(tmpfilename):
	883	resume_len = os.path.getsize(tmpfilename)
	884	else:
	885	resume_len = 0
	886
	887	# Request parameters in case of being able to resume
	888	if self.params.get('continuedl', False) and resume_len != 0:
	889	self.report_resuming_byte(resume_len)
	890	request.add_header('Range', 'bytes=%d-' % resume_len)
	891	open_mode = 'ab'
	892
	893	count = 0
	894	retries = self.params.get('retries', 0)
	895	while count <= retries:
	896	# Establish connection
	897	try:
	898	data = urllib2.urlopen(request)
	899	break
	900	except (urllib2.HTTPError, ), err:
	901	if (err.code < 500 or err.code >= 600) and err.code != 416:
	902	# Unexpected HTTP error
	903	raise
	904	elif err.code == 416:
	905	# Unable to resume (requested range not satisfiable)
	906	try:
	907	# Open the connection again without the range header
	908	data = urllib2.urlopen(basic_request)
	909	content_length = data.info()['Content-Length']
	910	except (urllib2.HTTPError, ), err:
	911	if err.code < 500 or err.code >= 600:
	912	raise
	913	else:
	914	# Examine the reported length
	915	if (content_length is not None and
	916	(resume_len - 100 < long(content_length) < resume_len + 100)):
	917	# The file had already been fully downloaded.
	918	# Explanation to the above condition: in issue #175 it was revealed that
	919	# YouTube sometimes adds or removes a few bytes from the end of the file,
	920	# changing the file size slightly and causing problems for some users. So
	921	# I decided to implement a suggested change and consider the file
	922	# completely downloaded if the file size differs less than 100 bytes from
	923	# the one in the hard drive.
	924	self.report_file_already_downloaded(filename)
	925	self.try_rename(tmpfilename, filename)
	926	return True
	927	else:
	928	# The length does not match, we start the download over
	929	self.report_unable_to_resume()
	930	open_mode = 'wb'
	931	break
	932	# Retry
	933	count += 1
	934	if count <= retries:
	935	self.report_retry(count, retries)
	936
	937	if count > retries:
	938	self.trouble(u'ERROR: giving up after %s retries' % retries)
	939	return False
	940
	941	data_len = data.info().get('Content-length', None)
	942	if data_len is not None:
	943	data_len = long(data_len) + resume_len
	944	data_len_str = self.format_bytes(data_len)
	945	byte_counter = 0 + resume_len
	946	block_size = 1024
	947	start = time.time()
	948	while True:
	949	# Download and write
	950	before = time.time()
	951	data_block = data.read(block_size)
	952	after = time.time()
	953	if len(data_block) == 0:
	954	break
	955	byte_counter += len(data_block)
	956
	957	# Open file just in time
	958	if stream is None:
	959	try:
	960	(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
	961	assert stream is not None
	962	filename = self.undo_temp_name(tmpfilename)
	963	self.report_destination(filename)
	964	except (OSError, IOError), err:
	965	self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
	966	return False
	967	try:
	968	stream.write(data_block)
	969	except (IOError, OSError), err:
	970	self.trouble(u'\nERROR: unable to write data: %s' % str(err))
	971	return False
	972	block_size = self.best_block_size(after - before, len(data_block))
	973
	974	# Progress message
	975	percent_str = self.calc_percent(byte_counter, data_len)
	976	eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
	977	speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
	978	self.report_progress(percent_str, data_len_str, speed_str, eta_str)
	979
	980	# Apply rate limit
	981	self.slow_down(start, byte_counter - resume_len)
	982
	983	if stream is None:
	984	self.trouble(u'\nERROR: Did not get any data blocks')
	985	return False
	986	stream.close()
	987	self.report_finish()
	988	if data_len is not None and byte_counter != data_len:
	989	raise ContentTooShortError(byte_counter, long(data_len))
	990	self.try_rename(tmpfilename, filename)
	991
	992	# Update file modification time
	993	if self.params.get('updatetime', True):
	994	self.try_utime(filename, data.info().get('last-modified', None))
	995
	996	return True
	997
	998
	999	class InfoExtractor(object):
	1000	"""Information Extractor class.
	1001
	1002	Information extractors are the classes that, given a URL, extract
	1003	information from the video (or videos) the URL refers to. This
	1004	information includes the real video URL, the video title and simplified
	1005	title, author and others. The information is stored in a dictionary
	1006	which is then passed to the FileDownloader. The FileDownloader
	1007	processes this information possibly downloading the video to the file
	1008	system, among other possible outcomes. The dictionaries must include
	1009	the following fields:
	1010
	1011	id: Video identifier.
	1012	url: Final video URL.
	1013	uploader: Nickname of the video uploader.
	1014	title: Literal title.
	1015	stitle: Simplified title.
	1016	ext: Video filename extension.
	1017	format: Video format.
	1018	player_url: SWF Player URL (may be None).
	1019
	1020	The following fields are optional. Their primary purpose is to allow
	1021	youtube-dl to serve as the backend for a video search function, such
	1022	as the one in youtube2mp3. They are only used when their respective
	1023	forced printing functions are called:
	1024
	1025	thumbnail: Full URL to a video thumbnail image.
	1026	description: One-line video description.
	1027
	1028	Subclasses of this one should re-define the _real_initialize() and
	1029	_real_extract() methods, as well as the suitable() static method.
	1030	Probably, they should also be instantiated and added to the main
	1031	downloader.
	1032	"""
	1033
	1034	_ready = False
	1035	_downloader = None
	1036
	1037	def __init__(self, downloader=None):
	1038	"""Constructor. Receives an optional downloader."""
	1039	self._ready = False
	1040	self.set_downloader(downloader)
	1041
	1042	@staticmethod
	1043	def suitable(url):
	1044	"""Receives a URL and returns True if suitable for this IE."""
	1045	return False
	1046
	1047	def initialize(self):
	1048	"""Initializes an instance (authentication, etc)."""
	1049	if not self._ready:
	1050	self._real_initialize()
	1051	self._ready = True
	1052
	1053	def extract(self, url):
	1054	"""Extracts URL information and returns it in list of dicts."""
	1055	self.initialize()
	1056	return self._real_extract(url)
	1057
	1058	def set_downloader(self, downloader):
	1059	"""Sets the downloader for this IE."""
	1060	self._downloader = downloader
	1061
	1062	def _real_initialize(self):
	1063	"""Real initialization process. Redefine in subclasses."""
	1064	pass
	1065
	1066	def _real_extract(self, url):
	1067	"""Real extraction process. Redefine in subclasses."""
	1068	pass
	1069
	1070
	1071	class YoutubeIE(InfoExtractor):
	1072	"""Information extractor for youtube.com."""
	1073
	1074	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/\|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v\|embed\|e)/)\|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?\|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
	1075	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
	1076	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
	1077	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
	1078	_NETRC_MACHINE = 'youtube'
	1079	# Listed in order of quality
	1080	_available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
	1081	_video_extensions = {
	1082	'13': '3gp',
	1083	'17': 'mp4',
	1084	'18': 'mp4',
	1085	'22': 'mp4',
	1086	'37': 'mp4',
	1087	'38': 'video', # You actually don't know if this will be MOV, AVI or whatever
	1088	'43': 'webm',
	1089	'45': 'webm',
	1090	}
	1091
	1092	@staticmethod
	1093	def suitable(url):
	1094	return (re.match(YoutubeIE._VALID_URL, url) is not None)
	1095
	1096	def report_lang(self):
	1097	"""Report attempt to set language."""
	1098	self._downloader.to_screen(u'[youtube] Setting language')
	1099
	1100	def report_login(self):
	1101	"""Report attempt to log in."""
	1102	self._downloader.to_screen(u'[youtube] Logging in')
	1103
	1104	def report_age_confirmation(self):
	1105	"""Report attempt to confirm age."""
	1106	self._downloader.to_screen(u'[youtube] Confirming age')
	1107
	1108	def report_video_webpage_download(self, video_id):
	1109	"""Report attempt to download video webpage."""
	1110	self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
	1111
	1112	def report_video_info_webpage_download(self, video_id):
	1113	"""Report attempt to download video info webpage."""
	1114	self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
	1115
	1116	def report_information_extraction(self, video_id):
	1117	"""Report attempt to extract video information."""
	1118	self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
	1119
	1120	def report_unavailable_format(self, video_id, format):
	1121	"""Report extracted video URL."""
	1122	self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
	1123
	1124	def report_rtmp_download(self):
	1125	"""Indicate the download will use the RTMP protocol."""
	1126	self._downloader.to_screen(u'[youtube] RTMP download detected')
	1127
	1128	def _real_initialize(self):
	1129	if self._downloader is None:
	1130	return
	1131
	1132	username = None
	1133	password = None
	1134	downloader_params = self._downloader.params
	1135
	1136	# Attempt to use provided username and password or .netrc data
	1137	if downloader_params.get('username', None) is not None:
	1138	username = downloader_params['username']
	1139	password = downloader_params['password']
	1140	elif downloader_params.get('usenetrc', False):
	1141	try:
	1142	info = netrc.netrc().authenticators(self._NETRC_MACHINE)
	1143	if info is not None:
	1144	username = info[0]
	1145	password = info[2]
	1146	else:
	1147	raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
	1148	except (IOError, netrc.NetrcParseError), err:
	1149	self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
	1150	return
	1151
	1152	# Set language
	1153	request = urllib2.Request(self._LANG_URL)
	1154	try:
	1155	self.report_lang()
	1156	urllib2.urlopen(request).read()
	1157	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1158	self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
	1159	return
	1160
	1161	# No authentication to be performed
	1162	if username is None:
	1163	return
	1164
	1165	# Log in
	1166	login_form = {
	1167	'current_form': 'loginForm',
	1168	'next': '/',
	1169	'action_login': 'Log In',
	1170	'username': username,
	1171	'password': password,
	1172	}
	1173	request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
	1174	try:
	1175	self.report_login()
	1176	login_results = urllib2.urlopen(request).read()
	1177	if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
	1178	self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
	1179	return
	1180	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1181	self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
	1182	return
	1183
	1184	# Confirm age
	1185	age_form = {
	1186	'next_url': '/',
	1187	'action_confirm': 'Confirm',
	1188	}
	1189	request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
	1190	try:
	1191	self.report_age_confirmation()
	1192	age_results = urllib2.urlopen(request).read()
	1193	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1194	self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
	1195	return
	1196
	1197	def _real_extract(self, url):
	1198	# Extract video id from URL
	1199	mobj = re.match(self._VALID_URL, url)
	1200	if mobj is None:
	1201	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1202	return
	1203	video_id = mobj.group(2)
	1204
	1205	# Get video webpage
	1206	self.report_video_webpage_download(video_id)
	1207	request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
	1208	try:
	1209	video_webpage = urllib2.urlopen(request).read()
	1210	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1211	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	1212	return
	1213
	1214	# Attempt to extract SWF player URL
	1215	mobj = re.search(r'swfConfig.?"(http:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	1216	if mobj is not None:
	1217	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	1218	else:
	1219	player_url = None
	1220
	1221	# Get video info
	1222	self.report_video_info_webpage_download(video_id)
	1223	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	1224	video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	1225	% (video_id, el_type))
	1226	request = urllib2.Request(video_info_url)
	1227	try:
	1228	video_info_webpage = urllib2.urlopen(request).read()
	1229	video_info = parse_qs(video_info_webpage)
	1230	if 'token' in video_info:
	1231	break
	1232	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1233	self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
	1234	return
	1235	if 'token' not in video_info:
	1236	if 'reason' in video_info:
	1237	self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
	1238	else:
	1239	self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
	1240	return
	1241
	1242	# Start extracting information
	1243	self.report_information_extraction(video_id)
	1244
	1245	# uploader
	1246	if 'author' not in video_info:
	1247	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1248	return
	1249	video_uploader = urllib.unquote_plus(video_info['author'][0])
	1250
	1251	# title
	1252	if 'title' not in video_info:
	1253	self._downloader.trouble(u'ERROR: unable to extract video title')
	1254	return
	1255	video_title = urllib.unquote_plus(video_info['title'][0])
	1256	video_title = video_title.decode('utf-8')
	1257	video_title = sanitize_title(video_title)
	1258
	1259	# simplified title
	1260	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	1261	simple_title = simple_title.strip(ur'_')
	1262
	1263	# thumbnail image
	1264	if 'thumbnail_url' not in video_info:
	1265	self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
	1266	video_thumbnail = ''
	1267	else: # don't panic if we can't find it
	1268	video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
	1269
	1270	# upload date
	1271	upload_date = u'NA'
	1272	mobj = re.search(r'id="eow-date.?>(.?)</span>', video_webpage, re.DOTALL)
	1273	if mobj is not None:
	1274	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	1275	format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
	1276	for expression in format_expressions:
	1277	try:
	1278	upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
	1279	except:
	1280	pass
	1281
	1282	# description
	1283	try:
	1284	lxml.etree
	1285	except NameError:
	1286	video_description = u'No description available.'
	1287	if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
	1288	mobj = re.search(r'<meta name="description" content="(.)"(?:\s/)?>', video_webpage)
	1289	if mobj is not None:
	1290	video_description = mobj.group(1).decode('utf-8')
	1291	else:
	1292	html_parser = lxml.etree.HTMLParser(encoding='utf-8')
	1293	vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
	1294	video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
	1295	# TODO use another parser
	1296
	1297	# token
	1298	video_token = urllib.unquote_plus(video_info['token'][0])
	1299
	1300	# Decide which formats to download
	1301	req_format = self._downloader.params.get('format', None)
	1302
	1303	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1304	self.report_rtmp_download()
	1305	video_url_list = [(None, video_info['conn'][0])]
	1306	elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
	1307	url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
	1308	url_data = [parse_qs(uds) for uds in url_data_strs]
	1309	url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
	1310	url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
	1311
	1312	format_limit = self._downloader.params.get('format_limit', None)
	1313	if format_limit is not None and format_limit in self._available_formats:
	1314	format_list = self._available_formats[self._available_formats.index(format_limit):]
	1315	else:
	1316	format_list = self._available_formats
	1317	existing_formats = [x for x in format_list if x in url_map]
	1318	if len(existing_formats) == 0:
	1319	self._downloader.trouble(u'ERROR: no known formats available for video')
	1320	return
	1321	if req_format is None:
	1322	video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
	1323	elif req_format == '-1':
	1324	video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
	1325	else:
	1326	# Specific format
	1327	if req_format not in url_map:
	1328	self._downloader.trouble(u'ERROR: requested format not available')
	1329	return
	1330	video_url_list = [(req_format, url_map[req_format])] # Specific format
	1331	else:
	1332	self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
	1333	return
	1334
	1335	for format_param, video_real_url in video_url_list:
	1336	# At this point we have a new video
	1337	self._downloader.increment_downloads()
	1338
	1339	# Extension
	1340	video_extension = self._video_extensions.get(format_param, 'flv')
	1341
	1342	try:
	1343	# Process video information
	1344	self._downloader.process_info({
	1345	'id': video_id.decode('utf-8'),
	1346	'url': video_real_url.decode('utf-8'),
	1347	'uploader': video_uploader.decode('utf-8'),
	1348	'upload_date': upload_date,
	1349	'title': video_title,
	1350	'stitle': simple_title,
	1351	'ext': video_extension.decode('utf-8'),
	1352	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	1353	'thumbnail': video_thumbnail.decode('utf-8'),
	1354	'description': video_description,
	1355	'player_url': player_url,
	1356	})
	1357	except UnavailableVideoError, err:
	1358	self._downloader.trouble(u'\nERROR: unable to download video')
	1359
	1360
	1361	class MetacafeIE(InfoExtractor):
	1362	"""Information Extractor for metacafe.com."""
	1363
	1364	_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
	1365	_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
	1366	_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
	1367	_youtube_ie = None
	1368
	1369	def __init__(self, youtube_ie, downloader=None):
	1370	InfoExtractor.__init__(self, downloader)
	1371	self._youtube_ie = youtube_ie
	1372
	1373	@staticmethod
	1374	def suitable(url):
	1375	return (re.match(MetacafeIE._VALID_URL, url) is not None)
	1376
	1377	def report_disclaimer(self):
	1378	"""Report disclaimer retrieval."""
	1379	self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
	1380
	1381	def report_age_confirmation(self):
	1382	"""Report attempt to confirm age."""
	1383	self._downloader.to_screen(u'[metacafe] Confirming age')
	1384
	1385	def report_download_webpage(self, video_id):
	1386	"""Report webpage download."""
	1387	self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
	1388
	1389	def report_extraction(self, video_id):
	1390	"""Report information extraction."""
	1391	self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
	1392
	1393	def _real_initialize(self):
	1394	# Retrieve disclaimer
	1395	request = urllib2.Request(self._DISCLAIMER)
	1396	try:
	1397	self.report_disclaimer()
	1398	disclaimer = urllib2.urlopen(request).read()
	1399	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1400	self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
	1401	return
	1402
	1403	# Confirm age
	1404	disclaimer_form = {
	1405	'filters': '0',
	1406	'submit': "Continue - I'm over 18",
	1407	}
	1408	request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
	1409	try:
	1410	self.report_age_confirmation()
	1411	disclaimer = urllib2.urlopen(request).read()
	1412	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1413	self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
	1414	return
	1415
	1416	def _real_extract(self, url):
	1417	# Extract id and simplified title from URL
	1418	mobj = re.match(self._VALID_URL, url)
	1419	if mobj is None:
	1420	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1421	return
	1422
	1423	video_id = mobj.group(1)
	1424
	1425	# Check if video comes from YouTube
	1426	mobj2 = re.match(r'^yt-(.*)$', video_id)
	1427	if mobj2 is not None:
	1428	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
	1429	return
	1430
	1431	# At this point we have a new video
	1432	self._downloader.increment_downloads()
	1433
	1434	simple_title = mobj.group(2).decode('utf-8')
	1435
	1436	# Retrieve video webpage to extract further information
	1437	request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
	1438	try:
	1439	self.report_download_webpage(video_id)
	1440	webpage = urllib2.urlopen(request).read()
	1441	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1442	self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
	1443	return
	1444
	1445	# Extract URL, uploader and title from webpage
	1446	self.report_extraction(video_id)
	1447	mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
	1448	if mobj is not None:
	1449	mediaURL = urllib.unquote(mobj.group(1))
	1450	video_extension = mediaURL[-3:]
	1451
	1452	# Extract gdaKey if available
	1453	mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
	1454	if mobj is None:
	1455	video_url = mediaURL
	1456	else:
	1457	gdaKey = mobj.group(1)
	1458	video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
	1459	else:
	1460	mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
	1461	if mobj is None:
	1462	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1463	return
	1464	vardict = parse_qs(mobj.group(1))
	1465	if 'mediaData' not in vardict:
	1466	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1467	return
	1468	mobj = re.search(r'"mediaURL":"(http.?)","key":"(.?)"', vardict['mediaData'][0])
	1469	if mobj is None:
	1470	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1471	return
	1472	mediaURL = mobj.group(1).replace('\\/', '/')
	1473	video_extension = mediaURL[-3:]
	1474	video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
	1475
	1476	mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
	1477	if mobj is None:
	1478	self._downloader.trouble(u'ERROR: unable to extract title')
	1479	return
	1480	video_title = mobj.group(1).decode('utf-8')
	1481	video_title = sanitize_title(video_title)
	1482
	1483	mobj = re.search(r'(?ms)By:\s<a .?>(.+?)<', webpage)
	1484	if mobj is None:
	1485	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1486	return
	1487	video_uploader = mobj.group(1)
	1488
	1489	try:
	1490	# Process video information
	1491	self._downloader.process_info({
	1492	'id': video_id.decode('utf-8'),
	1493	'url': video_url.decode('utf-8'),
	1494	'uploader': video_uploader.decode('utf-8'),
	1495	'upload_date': u'NA',
	1496	'title': video_title,
	1497	'stitle': simple_title,
	1498	'ext': video_extension.decode('utf-8'),
	1499	'format': u'NA',
	1500	'player_url': None,
	1501	})
	1502	except UnavailableVideoError:
	1503	self._downloader.trouble(u'\nERROR: unable to download video')
	1504
	1505
	1506	class DailymotionIE(InfoExtractor):
	1507	"""Information Extractor for Dailymotion"""
	1508
	1509	_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
	1510
	1511	def __init__(self, downloader=None):
	1512	InfoExtractor.__init__(self, downloader)
	1513
	1514	@staticmethod
	1515	def suitable(url):
	1516	return (re.match(DailymotionIE._VALID_URL, url) is not None)
	1517
	1518	def report_download_webpage(self, video_id):
	1519	"""Report webpage download."""
	1520	self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
	1521
	1522	def report_extraction(self, video_id):
	1523	"""Report information extraction."""
	1524	self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
	1525
	1526	def _real_initialize(self):
	1527	return
	1528
	1529	def _real_extract(self, url):
	1530	# Extract id and simplified title from URL
	1531	mobj = re.match(self._VALID_URL, url)
	1532	if mobj is None:
	1533	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1534	return
	1535
	1536	# At this point we have a new video
	1537	self._downloader.increment_downloads()
	1538	video_id = mobj.group(1)
	1539
	1540	simple_title = mobj.group(2).decode('utf-8')
	1541	video_extension = 'flv'
	1542
	1543	# Retrieve video webpage to extract further information
	1544	request = urllib2.Request(url)
	1545	request.add_header('Cookie', 'family_filter=off')
	1546	try:
	1547	self.report_download_webpage(video_id)
	1548	webpage = urllib2.urlopen(request).read()
	1549	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1550	self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
	1551	return
	1552
	1553	# Extract URL, uploader and title from webpage
	1554	self.report_extraction(video_id)
	1555	mobj = re.search(r'(?i)addVariable$\"sequence\"\s,\s\"([^\"]+?)\"$', webpage)
	1556	if mobj is None:
	1557	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1558	return
	1559	sequence = urllib.unquote(mobj.group(1))
	1560	mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
	1561	if mobj is None:
	1562	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1563	return
	1564	mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
	1565
	1566	# if needed add http://www.dailymotion.com/ if relative URL
	1567
	1568	video_url = mediaURL
	1569
	1570	mobj = re.search(r'(?im)<title>Dailymotion\s-\s(.+)\s-\s[^<]+?</title>', webpage)
	1571	if mobj is None:
	1572	self._downloader.trouble(u'ERROR: unable to extract title')
	1573	return
	1574	video_title = mobj.group(1).decode('utf-8')
	1575	video_title = sanitize_title(video_title)
	1576
	1577	mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
	1578	if mobj is None:
	1579	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1580	return
	1581	video_uploader = mobj.group(1)
	1582
	1583	try:
	1584	# Process video information
	1585	self._downloader.process_info({
	1586	'id': video_id.decode('utf-8'),
	1587	'url': video_url.decode('utf-8'),
	1588	'uploader': video_uploader.decode('utf-8'),
	1589	'upload_date': u'NA',
	1590	'title': video_title,
	1591	'stitle': simple_title,
	1592	'ext': video_extension.decode('utf-8'),
	1593	'format': u'NA',
	1594	'player_url': None,
	1595	})
	1596	except UnavailableVideoError:
	1597	self._downloader.trouble(u'\nERROR: unable to download video')
	1598
	1599
	1600	class GoogleIE(InfoExtractor):
	1601	"""Information extractor for video.google.com."""
	1602
	1603	_VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?\|co\.(?:uk\|jp\|kr\|cr)\|ca\|de\|es\|fr\|it\|nl\|pl)/videoplay\?docid=([^\&]+).*'
	1604
	1605	def __init__(self, downloader=None):
	1606	InfoExtractor.__init__(self, downloader)
	1607
	1608	@staticmethod
	1609	def suitable(url):
	1610	return (re.match(GoogleIE._VALID_URL, url) is not None)
	1611
	1612	def report_download_webpage(self, video_id):
	1613	"""Report webpage download."""
	1614	self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
	1615
	1616	def report_extraction(self, video_id):
	1617	"""Report information extraction."""
	1618	self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
	1619
	1620	def _real_initialize(self):
	1621	return
	1622
	1623	def _real_extract(self, url):
	1624	# Extract id from URL
	1625	mobj = re.match(self._VALID_URL, url)
	1626	if mobj is None:
	1627	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1628	return
	1629
	1630	# At this point we have a new video
	1631	self._downloader.increment_downloads()
	1632	video_id = mobj.group(1)
	1633
	1634	video_extension = 'mp4'
	1635
	1636	# Retrieve video webpage to extract further information
	1637	request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
	1638	try:
	1639	self.report_download_webpage(video_id)
	1640	webpage = urllib2.urlopen(request).read()
	1641	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1642	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1643	return
	1644
	1645	# Extract URL, uploader, and title from webpage
	1646	self.report_extraction(video_id)
	1647	mobj = re.search(r"download_url:'([^']+)'", webpage)
	1648	if mobj is None:
	1649	video_extension = 'flv'
	1650	mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
	1651	if mobj is None:
	1652	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1653	return
	1654	mediaURL = urllib.unquote(mobj.group(1))
	1655	mediaURL = mediaURL.replace('\\x3d', '\x3d')
	1656	mediaURL = mediaURL.replace('\\x26', '\x26')
	1657
	1658	video_url = mediaURL
	1659
	1660	mobj = re.search(r'<title>(.*)</title>', webpage)
	1661	if mobj is None:
	1662	self._downloader.trouble(u'ERROR: unable to extract title')
	1663	return
	1664	video_title = mobj.group(1).decode('utf-8')
	1665	video_title = sanitize_title(video_title)
	1666	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	1667
	1668	# Extract video description
	1669	mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
	1670	if mobj is None:
	1671	self._downloader.trouble(u'ERROR: unable to extract video description')
	1672	return
	1673	video_description = mobj.group(1).decode('utf-8')
	1674	if not video_description:
	1675	video_description = 'No description available.'
	1676
	1677	# Extract video thumbnail
	1678	if self._downloader.params.get('forcethumbnail', False):
	1679	request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
	1680	try:
	1681	webpage = urllib2.urlopen(request).read()
	1682	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1683	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1684	return
	1685	mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
	1686	if mobj is None:
	1687	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	1688	return
	1689	video_thumbnail = mobj.group(1)
	1690	else: # we need something to pass to process_info
	1691	video_thumbnail = ''
	1692
	1693	try:
	1694	# Process video information
	1695	self._downloader.process_info({
	1696	'id': video_id.decode('utf-8'),
	1697	'url': video_url.decode('utf-8'),
	1698	'uploader': u'NA',
	1699	'upload_date': u'NA',
	1700	'title': video_title,
	1701	'stitle': simple_title,
	1702	'ext': video_extension.decode('utf-8'),
	1703	'format': u'NA',
	1704	'player_url': None,
	1705	})
	1706	except UnavailableVideoError:
	1707	self._downloader.trouble(u'\nERROR: unable to download video')
	1708
	1709
	1710	class PhotobucketIE(InfoExtractor):
	1711	"""Information extractor for photobucket.com."""
	1712
	1713	_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.[\?\&]current=(.\.flv)'
	1714
	1715	def __init__(self, downloader=None):
	1716	InfoExtractor.__init__(self, downloader)
	1717
	1718	@staticmethod
	1719	def suitable(url):
	1720	return (re.match(PhotobucketIE._VALID_URL, url) is not None)
	1721
	1722	def report_download_webpage(self, video_id):
	1723	"""Report webpage download."""
	1724	self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
	1725
	1726	def report_extraction(self, video_id):
	1727	"""Report information extraction."""
	1728	self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
	1729
	1730	def _real_initialize(self):
	1731	return
	1732
	1733	def _real_extract(self, url):
	1734	# Extract id from URL
	1735	mobj = re.match(self._VALID_URL, url)
	1736	if mobj is None:
	1737	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1738	return
	1739
	1740	# At this point we have a new video
	1741	self._downloader.increment_downloads()
	1742	video_id = mobj.group(1)
	1743
	1744	video_extension = 'flv'
	1745
	1746	# Retrieve video webpage to extract further information
	1747	request = urllib2.Request(url)
	1748	try:
	1749	self.report_download_webpage(video_id)
	1750	webpage = urllib2.urlopen(request).read()
	1751	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1752	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1753	return
	1754
	1755	# Extract URL, uploader, and title from webpage
	1756	self.report_extraction(video_id)
	1757	mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
	1758	if mobj is None:
	1759	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1760	return
	1761	mediaURL = urllib.unquote(mobj.group(1))
	1762
	1763	video_url = mediaURL
	1764
	1765	mobj = re.search(r'<title>(.) video by (.) - Photobucket</title>', webpage)
	1766	if mobj is None:
	1767	self._downloader.trouble(u'ERROR: unable to extract title')
	1768	return
	1769	video_title = mobj.group(1).decode('utf-8')
	1770	video_title = sanitize_title(video_title)
	1771	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	1772
	1773	video_uploader = mobj.group(2).decode('utf-8')
	1774
	1775	try:
	1776	# Process video information
	1777	self._downloader.process_info({
	1778	'id': video_id.decode('utf-8'),
	1779	'url': video_url.decode('utf-8'),
	1780	'uploader': video_uploader,
	1781	'upload_date': u'NA',
	1782	'title': video_title,
	1783	'stitle': simple_title,
	1784	'ext': video_extension.decode('utf-8'),
	1785	'format': u'NA',
	1786	'player_url': None,
	1787	})
	1788	except UnavailableVideoError:
	1789	self._downloader.trouble(u'\nERROR: unable to download video')
	1790
	1791
	1792	class YahooIE(InfoExtractor):
	1793	"""Information extractor for video.yahoo.com."""
	1794
	1795	# _VALID_URL matches all Yahoo! Video URLs
	1796	# _VPAGE_URL matches only the extractable '/watch/' URLs
	1797	_VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch\|network)/([0-9]+)(?:/\|\?v=)([0-9]+)(?:[#\?].*)?'
	1798	_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
	1799
	1800	def __init__(self, downloader=None):
	1801	InfoExtractor.__init__(self, downloader)
	1802
	1803	@staticmethod
	1804	def suitable(url):
	1805	return (re.match(YahooIE._VALID_URL, url) is not None)
	1806
	1807	def report_download_webpage(self, video_id):
	1808	"""Report webpage download."""
	1809	self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
	1810
	1811	def report_extraction(self, video_id):
	1812	"""Report information extraction."""
	1813	self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
	1814
	1815	def _real_initialize(self):
	1816	return
	1817
	1818	def _real_extract(self, url, new_video=True):
	1819	# Extract ID from URL
	1820	mobj = re.match(self._VALID_URL, url)
	1821	if mobj is None:
	1822	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1823	return
	1824
	1825	# At this point we have a new video
	1826	self._downloader.increment_downloads()
	1827	video_id = mobj.group(2)
	1828	video_extension = 'flv'
	1829
	1830	# Rewrite valid but non-extractable URLs as
	1831	# extractable English language /watch/ URLs
	1832	if re.match(self._VPAGE_URL, url) is None:
	1833	request = urllib2.Request(url)
	1834	try:
	1835	webpage = urllib2.urlopen(request).read()
	1836	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1837	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1838	return
	1839
	1840	mobj = re.search(r'$"id", "([0-9]+)"$;', webpage)
	1841	if mobj is None:
	1842	self._downloader.trouble(u'ERROR: Unable to extract id field')
	1843	return
	1844	yahoo_id = mobj.group(1)
	1845
	1846	mobj = re.search(r'$"vid", "([0-9]+)"$;', webpage)
	1847	if mobj is None:
	1848	self._downloader.trouble(u'ERROR: Unable to extract vid field')
	1849	return
	1850	yahoo_vid = mobj.group(1)
	1851
	1852	url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
	1853	return self._real_extract(url, new_video=False)
	1854
	1855	# Retrieve video webpage to extract further information
	1856	request = urllib2.Request(url)
	1857	try:
	1858	self.report_download_webpage(video_id)
	1859	webpage = urllib2.urlopen(request).read()
	1860	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1861	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1862	return
	1863
	1864	# Extract uploader and title from webpage
	1865	self.report_extraction(video_id)
	1866	mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
	1867	if mobj is None:
	1868	self._downloader.trouble(u'ERROR: unable to extract video title')
	1869	return
	1870	video_title = mobj.group(1).decode('utf-8')
	1871	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	1872
	1873	mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people\|profile)/[0-9]+" beacon=".">(.)</a></h2>', webpage)
	1874	if mobj is None:
	1875	self._downloader.trouble(u'ERROR: unable to extract video uploader')
	1876	return
	1877	video_uploader = mobj.group(1).decode('utf-8')
	1878
	1879	# Extract video thumbnail
	1880	mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
	1881	if mobj is None:
	1882	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	1883	return
	1884	video_thumbnail = mobj.group(1).decode('utf-8')
	1885
	1886	# Extract video description
	1887	mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
	1888	if mobj is None:
	1889	self._downloader.trouble(u'ERROR: unable to extract video description')
	1890	return
	1891	video_description = mobj.group(1).decode('utf-8')
	1892	if not video_description:
	1893	video_description = 'No description available.'
	1894
	1895	# Extract video height and width
	1896	mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
	1897	if mobj is None:
	1898	self._downloader.trouble(u'ERROR: unable to extract video height')
	1899	return
	1900	yv_video_height = mobj.group(1)
	1901
	1902	mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
	1903	if mobj is None:
	1904	self._downloader.trouble(u'ERROR: unable to extract video width')
	1905	return
	1906	yv_video_width = mobj.group(1)
	1907
	1908	# Retrieve video playlist to extract media URL
	1909	# I'm not completely sure what all these options are, but we
	1910	# seem to need most of them, otherwise the server sends a 401.
	1911	yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
	1912	yv_bitrate = '700' # according to Wikipedia this is hard-coded
	1913	request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
	1914	'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
	1915	'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
	1916	try:
	1917	self.report_download_webpage(video_id)
	1918	webpage = urllib2.urlopen(request).read()
	1919	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1920	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1921	return
	1922
	1923	# Extract media URL from playlist XML
	1924	mobj = re.search(r'<STREAM APP="(http://.)" FULLPATH="/?(/.\.flv\?[^"]*)"', webpage)
	1925	if mobj is None:
	1926	self._downloader.trouble(u'ERROR: Unable to extract media URL')
	1927	return
	1928	video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
	1929	video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
	1930
	1931	try:
	1932	# Process video information
	1933	self._downloader.process_info({
	1934	'id': video_id.decode('utf-8'),
	1935	'url': video_url,
	1936	'uploader': video_uploader,
	1937	'upload_date': u'NA',
	1938	'title': video_title,
	1939	'stitle': simple_title,
	1940	'ext': video_extension.decode('utf-8'),
	1941	'thumbnail': video_thumbnail.decode('utf-8'),
	1942	'description': video_description,
	1943	'thumbnail': video_thumbnail,
	1944	'player_url': None,
	1945	})
	1946	except UnavailableVideoError:
	1947	self._downloader.trouble(u'\nERROR: unable to download video')
	1948
	1949
	1950	class VimeoIE(InfoExtractor):
	1951	"""Information extractor for vimeo.com."""
	1952
	1953	# _VALID_URL matches Vimeo URLs
	1954	_VALID_URL = r'(?:https?://)?(?:(?:www\|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
	1955
	1956	def __init__(self, downloader=None):
	1957	InfoExtractor.__init__(self, downloader)
	1958
	1959	@staticmethod
	1960	def suitable(url):
	1961	return (re.match(VimeoIE._VALID_URL, url) is not None)
	1962
	1963	def report_download_webpage(self, video_id):
	1964	"""Report webpage download."""
	1965	self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
	1966
	1967	def report_extraction(self, video_id):
	1968	"""Report information extraction."""
	1969	self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
	1970
	1971	def _real_initialize(self):
	1972	return
	1973
	1974	def _real_extract(self, url, new_video=True):
	1975	# Extract ID from URL
	1976	mobj = re.match(self._VALID_URL, url)
	1977	if mobj is None:
	1978	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1979	return
	1980
	1981	# At this point we have a new video
	1982	self._downloader.increment_downloads()
	1983	video_id = mobj.group(1)
	1984
	1985	# Retrieve video webpage to extract further information
	1986	request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
	1987	try:
	1988	self.report_download_webpage(video_id)
	1989	webpage = urllib2.urlopen(request).read()
	1990	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1991	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1992	return
	1993
	1994	# Now we begin extracting as much information as we can from what we
	1995	# retrieved. First we extract the information common to all extractors,
	1996	# and latter we extract those that are Vimeo specific.
	1997	self.report_extraction(video_id)
	1998
	1999	# Extract title
	2000	mobj = re.search(r'<caption>(.*?)</caption>', webpage)
	2001	if mobj is None:
	2002	self._downloader.trouble(u'ERROR: unable to extract video title')
	2003	return
	2004	video_title = mobj.group(1).decode('utf-8')
	2005	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	2006
	2007	# Extract uploader
	2008	mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
	2009	if mobj is None:
	2010	self._downloader.trouble(u'ERROR: unable to extract video uploader')
	2011	return
	2012	video_uploader = mobj.group(1).decode('utf-8')
	2013
	2014	# Extract video thumbnail
	2015	mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
	2016	if mobj is None:
	2017	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	2018	return
	2019	video_thumbnail = mobj.group(1).decode('utf-8')
	2020
	2021	# # Extract video description
	2022	# mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
	2023	# if mobj is None:
	2024	# self._downloader.trouble(u'ERROR: unable to extract video description')
	2025	# return
	2026	# video_description = mobj.group(1).decode('utf-8')
	2027	# if not video_description: video_description = 'No description available.'
	2028	video_description = 'Foo.'
	2029
	2030	# Vimeo specific: extract request signature
	2031	mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
	2032	if mobj is None:
	2033	self._downloader.trouble(u'ERROR: unable to extract request signature')
	2034	return
	2035	sig = mobj.group(1).decode('utf-8')
	2036
	2037	# Vimeo specific: Extract request signature expiration
	2038	mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
	2039	if mobj is None:
	2040	self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
	2041	return
	2042	sig_exp = mobj.group(1).decode('utf-8')
	2043
	2044	video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
	2045
	2046	try:
	2047	# Process video information
	2048	self._downloader.process_info({
	2049	'id': video_id.decode('utf-8'),
	2050	'url': video_url,
	2051	'uploader': video_uploader,
	2052	'upload_date': u'NA',
	2053	'title': video_title,
	2054	'stitle': simple_title,
	2055	'ext': u'mp4',
	2056	'thumbnail': video_thumbnail.decode('utf-8'),
	2057	'description': video_description,
	2058	'thumbnail': video_thumbnail,
	2059	'description': video_description,
	2060	'player_url': None,
	2061	})
	2062	except UnavailableVideoError:
	2063	self._downloader.trouble(u'ERROR: unable to download video')
	2064
	2065
	2066	class GenericIE(InfoExtractor):
	2067	"""Generic last-resort information extractor."""
	2068
	2069	def __init__(self, downloader=None):
	2070	InfoExtractor.__init__(self, downloader)
	2071
	2072	@staticmethod
	2073	def suitable(url):
	2074	return True
	2075
	2076	def report_download_webpage(self, video_id):
	2077	"""Report webpage download."""
	2078	self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
	2079	self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
	2080
	2081	def report_extraction(self, video_id):
	2082	"""Report information extraction."""
	2083	self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
	2084
	2085	def _real_initialize(self):
	2086	return
	2087
	2088	def _real_extract(self, url):
	2089	# At this point we have a new video
	2090	self._downloader.increment_downloads()
	2091
	2092	video_id = url.split('/')[-1]
	2093	request = urllib2.Request(url)
	2094	try:
	2095	self.report_download_webpage(video_id)
	2096	webpage = urllib2.urlopen(request).read()
	2097	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2098	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	2099	return
	2100	except ValueError, err:
	2101	# since this is the last-resort InfoExtractor, if
	2102	# this error is thrown, it'll be thrown here
	2103	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2104	return
	2105
	2106	self.report_extraction(video_id)
	2107	# Start with something easy: JW Player in SWFObject
	2108	mobj = re.search(r'flashvars: [\'"](?:.&)?file=(http[^\'"&])', webpage)
	2109	if mobj is None:
	2110	# Broaden the search a little bit
	2111	mobj = re.search(r'[^A-Za-z0-9]?(?:file\|source)=(http[^\'"&]*)', webpage)
	2112	if mobj is None:
	2113	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2114	return
	2115
	2116	# It's possible that one of the regexes
	2117	# matched, but returned an empty group:
	2118	if mobj.group(1) is None:
	2119	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2120	return
	2121
	2122	video_url = urllib.unquote(mobj.group(1))
	2123	video_id = os.path.basename(video_url)
	2124
	2125	# here's a fun little line of code for you:
	2126	video_extension = os.path.splitext(video_id)[1][1:]
	2127	video_id = os.path.splitext(video_id)[0]
	2128
	2129	# it's tempting to parse this further, but you would
	2130	# have to take into account all the variations like
	2131	# Video Title - Site Name
	2132	# Site Name \| Video Title
	2133	# Video Title - Tagline \| Site Name
	2134	# and so on and so forth; it's just not practical
	2135	mobj = re.search(r'<title>(.*)</title>', webpage)
	2136	if mobj is None:
	2137	self._downloader.trouble(u'ERROR: unable to extract title')
	2138	return
	2139	video_title = mobj.group(1).decode('utf-8')
	2140	video_title = sanitize_title(video_title)
	2141	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	2142
	2143	# video uploader is domain name
	2144	mobj = re.match(r'(?:https?://)?([^/])/.', url)
	2145	if mobj is None:
	2146	self._downloader.trouble(u'ERROR: unable to extract title')
	2147	return
	2148	video_uploader = mobj.group(1).decode('utf-8')
	2149
	2150	try:
	2151	# Process video information
	2152	self._downloader.process_info({
	2153	'id': video_id.decode('utf-8'),
	2154	'url': video_url.decode('utf-8'),
	2155	'uploader': video_uploader,
	2156	'upload_date': u'NA',
	2157	'title': video_title,
	2158	'stitle': simple_title,
	2159	'ext': video_extension.decode('utf-8'),
	2160	'format': u'NA',
	2161	'player_url': None,
	2162	})
	2163	except UnavailableVideoError, err:
	2164	self._downloader.trouble(u'\nERROR: unable to download video')
	2165
	2166
	2167	class YoutubeSearchIE(InfoExtractor):
	2168	"""Information Extractor for YouTube search queries."""
	2169	_VALID_QUERY = r'ytsearch(\d+\|all)?:[\s\S]+'
	2170	_TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
	2171	_VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
	2172	_MORE_PAGES_INDICATOR = r'(?m)>\sNext\s</a>'
	2173	_youtube_ie = None
	2174	_max_youtube_results = 1000
	2175
	2176	def __init__(self, youtube_ie, downloader=None):
	2177	InfoExtractor.__init__(self, downloader)
	2178	self._youtube_ie = youtube_ie
	2179
	2180	@staticmethod
	2181	def suitable(url):
	2182	return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
	2183
	2184	def report_download_page(self, query, pagenum):
	2185	"""Report attempt to download playlist page with given number."""
	2186	query = query.decode(preferredencoding())
	2187	self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
	2188
	2189	def _real_initialize(self):
	2190	self._youtube_ie.initialize()
	2191
	2192	def _real_extract(self, query):
	2193	mobj = re.match(self._VALID_QUERY, query)
	2194	if mobj is None:
	2195	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2196	return
	2197
	2198	prefix, query = query.split(':')
	2199	prefix = prefix[8:]
	2200	query = query.encode('utf-8')
	2201	if prefix == '':
	2202	self._download_n_results(query, 1)
	2203	return
	2204	elif prefix == 'all':
	2205	self._download_n_results(query, self._max_youtube_results)
	2206	return
	2207	else:
	2208	try:
	2209	n = long(prefix)
	2210	if n <= 0:
	2211	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2212	return
	2213	elif n > self._max_youtube_results:
	2214	self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
	2215	n = self._max_youtube_results
	2216	self._download_n_results(query, n)
	2217	return
	2218	except ValueError: # parsing prefix as integer fails
	2219	self._download_n_results(query, 1)
	2220	return
	2221
	2222	def _download_n_results(self, query, n):
	2223	"""Downloads a specified number of results for a query"""
	2224
	2225	video_ids = []
	2226	already_seen = set()
	2227	pagenum = 1
	2228
	2229	while True:
	2230	self.report_download_page(query, pagenum)
	2231	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2232	request = urllib2.Request(result_url)
	2233	try:
	2234	page = urllib2.urlopen(request).read()
	2235	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2236	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2237	return
	2238
	2239	# Extract video identifiers
	2240	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2241	video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
	2242	if video_id not in already_seen:
	2243	video_ids.append(video_id)
	2244	already_seen.add(video_id)
	2245	if len(video_ids) == n:
	2246	# Specified n videos reached
	2247	for id in video_ids:
	2248	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2249	return
	2250
	2251	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2252	for id in video_ids:
	2253	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2254	return
	2255
	2256	pagenum = pagenum + 1
	2257
	2258
	2259	class GoogleSearchIE(InfoExtractor):
	2260	"""Information Extractor for Google Video search queries."""
	2261	_VALID_QUERY = r'gvsearch(\d+\|all)?:[\s\S]+'
	2262	_TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
	2263	_VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
	2264	_MORE_PAGES_INDICATOR = r'<span>Next</span>'
	2265	_google_ie = None
	2266	_max_google_results = 1000
	2267
	2268	def __init__(self, google_ie, downloader=None):
	2269	InfoExtractor.__init__(self, downloader)
	2270	self._google_ie = google_ie
	2271
	2272	@staticmethod
	2273	def suitable(url):
	2274	return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
	2275
	2276	def report_download_page(self, query, pagenum):
	2277	"""Report attempt to download playlist page with given number."""
	2278	query = query.decode(preferredencoding())
	2279	self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
	2280
	2281	def _real_initialize(self):
	2282	self._google_ie.initialize()
	2283
	2284	def _real_extract(self, query):
	2285	mobj = re.match(self._VALID_QUERY, query)
	2286	if mobj is None:
	2287	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2288	return
	2289
	2290	prefix, query = query.split(':')
	2291	prefix = prefix[8:]
	2292	query = query.encode('utf-8')
	2293	if prefix == '':
	2294	self._download_n_results(query, 1)
	2295	return
	2296	elif prefix == 'all':
	2297	self._download_n_results(query, self._max_google_results)
	2298	return
	2299	else:
	2300	try:
	2301	n = long(prefix)
	2302	if n <= 0:
	2303	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2304	return
	2305	elif n > self._max_google_results:
	2306	self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
	2307	n = self._max_google_results
	2308	self._download_n_results(query, n)
	2309	return
	2310	except ValueError: # parsing prefix as integer fails
	2311	self._download_n_results(query, 1)
	2312	return
	2313
	2314	def _download_n_results(self, query, n):
	2315	"""Downloads a specified number of results for a query"""
	2316
	2317	video_ids = []
	2318	already_seen = set()
	2319	pagenum = 1
	2320
	2321	while True:
	2322	self.report_download_page(query, pagenum)
	2323	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2324	request = urllib2.Request(result_url)
	2325	try:
	2326	page = urllib2.urlopen(request).read()
	2327	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2328	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2329	return
	2330
	2331	# Extract video identifiers
	2332	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2333	video_id = mobj.group(1)
	2334	if video_id not in already_seen:
	2335	video_ids.append(video_id)
	2336	already_seen.add(video_id)
	2337	if len(video_ids) == n:
	2338	# Specified n videos reached
	2339	for id in video_ids:
	2340	self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
	2341	return
	2342
	2343	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2344	for id in video_ids:
	2345	self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
	2346	return
	2347
	2348	pagenum = pagenum + 1
	2349
	2350
	2351	class YahooSearchIE(InfoExtractor):
	2352	"""Information Extractor for Yahoo! Video search queries."""
	2353	_VALID_QUERY = r'yvsearch(\d+\|all)?:[\s\S]+'
	2354	_TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
	2355	_VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
	2356	_MORE_PAGES_INDICATOR = r'\s*Next'
	2357	_yahoo_ie = None
	2358	_max_yahoo_results = 1000
	2359
	2360	def __init__(self, yahoo_ie, downloader=None):
	2361	InfoExtractor.__init__(self, downloader)
	2362	self._yahoo_ie = yahoo_ie
	2363
	2364	@staticmethod
	2365	def suitable(url):
	2366	return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
	2367
	2368	def report_download_page(self, query, pagenum):
	2369	"""Report attempt to download playlist page with given number."""
	2370	query = query.decode(preferredencoding())
	2371	self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
	2372
	2373	def _real_initialize(self):
	2374	self._yahoo_ie.initialize()
	2375
	2376	def _real_extract(self, query):
	2377	mobj = re.match(self._VALID_QUERY, query)
	2378	if mobj is None:
	2379	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2380	return
	2381
	2382	prefix, query = query.split(':')
	2383	prefix = prefix[8:]
	2384	query = query.encode('utf-8')
	2385	if prefix == '':
	2386	self._download_n_results(query, 1)
	2387	return
	2388	elif prefix == 'all':
	2389	self._download_n_results(query, self._max_yahoo_results)
	2390	return
	2391	else:
	2392	try:
	2393	n = long(prefix)
	2394	if n <= 0:
	2395	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2396	return
	2397	elif n > self._max_yahoo_results:
	2398	self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
	2399	n = self._max_yahoo_results
	2400	self._download_n_results(query, n)
	2401	return
	2402	except ValueError: # parsing prefix as integer fails
	2403	self._download_n_results(query, 1)
	2404	return
	2405
	2406	def _download_n_results(self, query, n):
	2407	"""Downloads a specified number of results for a query"""
	2408
	2409	video_ids = []
	2410	already_seen = set()
	2411	pagenum = 1
	2412
	2413	while True:
	2414	self.report_download_page(query, pagenum)
	2415	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2416	request = urllib2.Request(result_url)
	2417	try:
	2418	page = urllib2.urlopen(request).read()
	2419	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2420	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2421	return
	2422
	2423	# Extract video identifiers
	2424	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2425	video_id = mobj.group(1)
	2426	if video_id not in already_seen:
	2427	video_ids.append(video_id)
	2428	already_seen.add(video_id)
	2429	if len(video_ids) == n:
	2430	# Specified n videos reached
	2431	for id in video_ids:
	2432	self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
	2433	return
	2434
	2435	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2436	for id in video_ids:
	2437	self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
	2438	return
	2439
	2440	pagenum = pagenum + 1
	2441
	2442
	2443	class YoutubePlaylistIE(InfoExtractor):
	2444	"""Information Extractor for YouTube playlists."""
	2445
	2446	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list\|my_playlists\|artist\|playlist)\?.?(p\|a\|list)=\|user/.?/user/\|p/\|user/.?#[pg]/c/)([0-9A-Za-z]+)(?:/.?/([0-9A-Za-z_-]+))?.*'
	2447	_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
	2448	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
	2449	_MORE_PAGES_INDICATOR = r'(?m)>\sNext\s</a>'
	2450	_youtube_ie = None
	2451
	2452	def __init__(self, youtube_ie, downloader=None):
	2453	InfoExtractor.__init__(self, downloader)
	2454	self._youtube_ie = youtube_ie
	2455
	2456	@staticmethod
	2457	def suitable(url):
	2458	return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
	2459
	2460	def report_download_page(self, playlist_id, pagenum):
	2461	"""Report attempt to download playlist page with given number."""
	2462	self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
	2463
	2464	def _real_initialize(self):
	2465	self._youtube_ie.initialize()
	2466
	2467	def _real_extract(self, url):
	2468	# Extract playlist id
	2469	mobj = re.match(self._VALID_URL, url)
	2470	if mobj is None:
	2471	self._downloader.trouble(u'ERROR: invalid url: %s' % url)
	2472	return
	2473
	2474	# Single video case
	2475	if mobj.group(3) is not None:
	2476	self._youtube_ie.extract(mobj.group(3))
	2477	return
	2478
	2479	# Download playlist pages
	2480	# prefix is 'p' as default for playlists but there are other types that need extra care
	2481	playlist_prefix = mobj.group(1)
	2482	if playlist_prefix == 'a':
	2483	playlist_access = 'artist'
	2484	else:
	2485	playlist_prefix = 'p'
	2486	playlist_access = 'view_play_list'
	2487	playlist_id = mobj.group(2)
	2488	video_ids = []
	2489	pagenum = 1
	2490
	2491	while True:
	2492	self.report_download_page(playlist_id, pagenum)
	2493	request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
	2494	try:
	2495	page = urllib2.urlopen(request).read()
	2496	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2497	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2498	return
	2499
	2500	# Extract video identifiers
	2501	ids_in_page = []
	2502	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2503	if mobj.group(1) not in ids_in_page:
	2504	ids_in_page.append(mobj.group(1))
	2505	video_ids.extend(ids_in_page)
	2506
	2507	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2508	break
	2509	pagenum = pagenum + 1
	2510
	2511	playliststart = self._downloader.params.get('playliststart', 1) - 1
	2512	playlistend = self._downloader.params.get('playlistend', -1)
	2513	video_ids = video_ids[playliststart:playlistend]
	2514
	2515	for id in video_ids:
	2516	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2517	return
	2518
	2519
	2520	class YoutubeUserIE(InfoExtractor):
	2521	"""Information Extractor for YouTube users."""
	2522
	2523	_VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)\|ytuser:)([A-Za-z0-9_-]+)'
	2524	_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
	2525	_GDATA_PAGE_SIZE = 50
	2526	_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
	2527	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
	2528	_youtube_ie = None
	2529
	2530	def __init__(self, youtube_ie, downloader=None):
	2531	InfoExtractor.__init__(self, downloader)
	2532	self._youtube_ie = youtube_ie
	2533
	2534	@staticmethod
	2535	def suitable(url):
	2536	return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
	2537
	2538	def report_download_page(self, username, start_index):
	2539	"""Report attempt to download user page."""
	2540	self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
	2541	(username, start_index, start_index + self._GDATA_PAGE_SIZE))
	2542
	2543	def _real_initialize(self):
	2544	self._youtube_ie.initialize()
	2545
	2546	def _real_extract(self, url):
	2547	# Extract username
	2548	mobj = re.match(self._VALID_URL, url)
	2549	if mobj is None:
	2550	self._downloader.trouble(u'ERROR: invalid url: %s' % url)
	2551	return
	2552
	2553	username = mobj.group(1)
	2554
	2555	# Download video ids using YouTube Data API. Result size per
	2556	# query is limited (currently to 50 videos) so we need to query
	2557	# page by page until there are no video ids - it means we got
	2558	# all of them.
	2559
	2560	video_ids = []
	2561	pagenum = 0
	2562
	2563	while True:
	2564	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	2565	self.report_download_page(username, start_index)
	2566
	2567	request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
	2568
	2569	try:
	2570	page = urllib2.urlopen(request).read()
	2571	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2572	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2573	return
	2574
	2575	# Extract video identifiers
	2576	ids_in_page = []
	2577
	2578	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2579	if mobj.group(1) not in ids_in_page:
	2580	ids_in_page.append(mobj.group(1))
	2581
	2582	video_ids.extend(ids_in_page)
	2583
	2584	# A little optimization - if current page is not
	2585	# "full", ie. does not contain PAGE_SIZE video ids then
	2586	# we can assume that this page is the last one - there
	2587	# are no more ids on further pages - no need to query
	2588	# again.
	2589
	2590	if len(ids_in_page) < self._GDATA_PAGE_SIZE:
	2591	break
	2592
	2593	pagenum += 1
	2594
	2595	all_ids_count = len(video_ids)
	2596	playliststart = self._downloader.params.get('playliststart', 1) - 1
	2597	playlistend = self._downloader.params.get('playlistend', -1)
	2598
	2599	if playlistend == -1:
	2600	video_ids = video_ids[playliststart:]
	2601	else:
	2602	video_ids = video_ids[playliststart:playlistend]
	2603
	2604	self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
	2605	(username, all_ids_count, len(video_ids)))
	2606
	2607	for video_id in video_ids:
	2608	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
	2609
	2610
	2611	class DepositFilesIE(InfoExtractor):
	2612	"""Information extractor for depositfiles.com"""
	2613
	2614	_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
	2615
	2616	def __init__(self, downloader=None):
	2617	InfoExtractor.__init__(self, downloader)
	2618
	2619	@staticmethod
	2620	def suitable(url):
	2621	return (re.match(DepositFilesIE._VALID_URL, url) is not None)
	2622
	2623	def report_download_webpage(self, file_id):
	2624	"""Report webpage download."""
	2625	self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
	2626
	2627	def report_extraction(self, file_id):
	2628	"""Report information extraction."""
	2629	self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
	2630
	2631	def _real_initialize(self):
	2632	return
	2633
	2634	def _real_extract(self, url):
	2635	# At this point we have a new file
	2636	self._downloader.increment_downloads()
	2637
	2638	file_id = url.split('/')[-1]
	2639	# Rebuild url in english locale
	2640	url = 'http://depositfiles.com/en/files/' + file_id
	2641
	2642	# Retrieve file webpage with 'Free download' button pressed
	2643	free_download_indication = { 'gateway_result' : '1' }
	2644	request = urllib2.Request(url, urllib.urlencode(free_download_indication))
	2645	try:
	2646	self.report_download_webpage(file_id)
	2647	webpage = urllib2.urlopen(request).read()
	2648	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2649	self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
	2650	return
	2651
	2652	# Search for the real file URL
	2653	mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
	2654	if (mobj is None) or (mobj.group(1) is None):
	2655	# Try to figure out reason of the error.
	2656	mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
	2657	if (mobj is not None) and (mobj.group(1) is not None):
	2658	restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
	2659	self._downloader.trouble(u'ERROR: %s' % restriction_message)
	2660	else:
	2661	self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
	2662	return
	2663
	2664	file_url = mobj.group(1)
	2665	file_extension = os.path.splitext(file_url)[1][1:]
	2666
	2667	# Search for file title
	2668	mobj = re.search(r'<b title="(.*?)">', webpage)
	2669	if mobj is None:
	2670	self._downloader.trouble(u'ERROR: unable to extract title')
	2671	return
	2672	file_title = mobj.group(1).decode('utf-8')
	2673
	2674	try:
	2675	# Process file information
	2676	self._downloader.process_info({
	2677	'id': file_id.decode('utf-8'),
	2678	'url': file_url.decode('utf-8'),
	2679	'uploader': u'NA',
	2680	'upload_date': u'NA',
	2681	'title': file_title,
	2682	'stitle': file_title,
	2683	'ext': file_extension.decode('utf-8'),
	2684	'format': u'NA',
	2685	'player_url': None,
	2686	})
	2687	except UnavailableVideoError, err:
	2688	self._downloader.trouble(u'ERROR: unable to download file')
	2689
	2690
	2691	class FacebookIE(InfoExtractor):
	2692	"""Information Extractor for Facebook"""
	2693
	2694	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.?)v=(?P<ID>\d+)(?:.)'
	2695	_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
	2696	_NETRC_MACHINE = 'facebook'
	2697	_available_formats = ['highqual', 'lowqual']
	2698	_video_extensions = {
	2699	'highqual': 'mp4',
	2700	'lowqual': 'mp4',
	2701	}
	2702
	2703	def __init__(self, downloader=None):
	2704	InfoExtractor.__init__(self, downloader)
	2705
	2706	@staticmethod
	2707	def suitable(url):
	2708	return (re.match(FacebookIE._VALID_URL, url) is not None)
	2709
	2710	def _reporter(self, message):
	2711	"""Add header and report message."""
	2712	self._downloader.to_screen(u'[facebook] %s' % message)
	2713
	2714	def report_login(self):
	2715	"""Report attempt to log in."""
	2716	self._reporter(u'Logging in')
	2717
	2718	def report_video_webpage_download(self, video_id):
	2719	"""Report attempt to download video webpage."""
	2720	self._reporter(u'%s: Downloading video webpage' % video_id)
	2721
	2722	def report_information_extraction(self, video_id):
	2723	"""Report attempt to extract video information."""
	2724	self._reporter(u'%s: Extracting video information' % video_id)
	2725
	2726	def _parse_page(self, video_webpage):
	2727	"""Extract video information from page"""
	2728	# General data
	2729	data = {'title': r'class="video_title datawrap">(.*?)</',
	2730	'description': r'<div class="datawrap">(.*?)</div>',
	2731	'owner': r'$"video_owner_name", "(.*?)"$',
	2732	'upload_date': r'data-date="(.*?)"',
	2733	'thumbnail': r'$"thumb_url", "(?P<THUMB>.*?)"$',
	2734	}
	2735	video_info = {}
	2736	for piece in data.keys():
	2737	mobj = re.search(data[piece], video_webpage)
	2738	if mobj is not None:
	2739	video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
	2740
	2741	# Video urls
	2742	video_urls = {}
	2743	for fmt in self._available_formats:
	2744	mobj = re.search(r'$"%s_src\", "(.+?)"$' % fmt, video_webpage)
	2745	if mobj is not None:
	2746	# URL is in a Javascript segment inside an escaped Unicode format within
	2747	# the generally utf-8 page
	2748	video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
	2749	video_info['video_urls'] = video_urls
	2750
	2751	return video_info
	2752
	2753	def _real_initialize(self):
	2754	if self._downloader is None:
	2755	return
	2756
	2757	useremail = None
	2758	password = None
	2759	downloader_params = self._downloader.params
	2760
	2761	# Attempt to use provided username and password or .netrc data
	2762	if downloader_params.get('username', None) is not None:
	2763	useremail = downloader_params['username']
	2764	password = downloader_params['password']
	2765	elif downloader_params.get('usenetrc', False):
	2766	try:
	2767	info = netrc.netrc().authenticators(self._NETRC_MACHINE)
	2768	if info is not None:
	2769	useremail = info[0]
	2770	password = info[2]
	2771	else:
	2772	raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
	2773	except (IOError, netrc.NetrcParseError), err:
	2774	self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
	2775	return
	2776
	2777	if useremail is None:
	2778	return
	2779
	2780	# Log in
	2781	login_form = {
	2782	'email': useremail,
	2783	'pass': password,
	2784	'login': 'Log+In'
	2785	}
	2786	request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
	2787	try:
	2788	self.report_login()
	2789	login_results = urllib2.urlopen(request).read()
	2790	if re.search(r'<form(.)name="login"(.)</form>', login_results) is not None:
	2791	self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
	2792	return
	2793	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2794	self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
	2795	return
	2796
	2797	def _real_extract(self, url):
	2798	mobj = re.match(self._VALID_URL, url)
	2799	if mobj is None:
	2800	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	2801	return
	2802	video_id = mobj.group('ID')
	2803
	2804	# Get video webpage
	2805	self.report_video_webpage_download(video_id)
	2806	request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
	2807	try:
	2808	page = urllib2.urlopen(request)
	2809	video_webpage = page.read()
	2810	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2811	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	2812	return
	2813
	2814	# Start extracting information
	2815	self.report_information_extraction(video_id)
	2816
	2817	# Extract information
	2818	video_info = self._parse_page(video_webpage)
	2819
	2820	# uploader
	2821	if 'owner' not in video_info:
	2822	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	2823	return
	2824	video_uploader = video_info['owner']
	2825
	2826	# title
	2827	if 'title' not in video_info:
	2828	self._downloader.trouble(u'ERROR: unable to extract video title')
	2829	return
	2830	video_title = video_info['title']
	2831	video_title = video_title.decode('utf-8')
	2832	video_title = sanitize_title(video_title)
	2833
	2834	# simplified title
	2835	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
	2836	simple_title = simple_title.strip(ur'_')
	2837
	2838	# thumbnail image
	2839	if 'thumbnail' not in video_info:
	2840	self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
	2841	video_thumbnail = ''
	2842	else:
	2843	video_thumbnail = video_info['thumbnail']
	2844
	2845	# upload date
	2846	upload_date = u'NA'
	2847	if 'upload_date' in video_info:
	2848	upload_time = video_info['upload_date']
	2849	timetuple = email.utils.parsedate_tz(upload_time)
	2850	if timetuple is not None:
	2851	try:
	2852	upload_date = time.strftime('%Y%m%d', timetuple[0:9])
	2853	except:
	2854	pass
	2855
	2856	# description
	2857	video_description = video_info.get('description', 'No description available.')
	2858
	2859	url_map = video_info['video_urls']
	2860	if len(url_map.keys()) > 0:
	2861	# Decide which formats to download
	2862	req_format = self._downloader.params.get('format', None)
	2863	format_limit = self._downloader.params.get('format_limit', None)
	2864
	2865	if format_limit is not None and format_limit in self._available_formats:
	2866	format_list = self._available_formats[self._available_formats.index(format_limit):]
	2867	else:
	2868	format_list = self._available_formats
	2869	existing_formats = [x for x in format_list if x in url_map]
	2870	if len(existing_formats) == 0:
	2871	self._downloader.trouble(u'ERROR: no known formats available for video')
	2872	return
	2873	if req_format is None:
	2874	video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
	2875	elif req_format == '-1':
	2876	video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
	2877	else:
	2878	# Specific format
	2879	if req_format not in url_map:
	2880	self._downloader.trouble(u'ERROR: requested format not available')
	2881	return
	2882	video_url_list = [(req_format, url_map[req_format])] # Specific format
	2883
	2884	for format_param, video_real_url in video_url_list:
	2885
	2886	# At this point we have a new video
	2887	self._downloader.increment_downloads()
	2888
	2889	# Extension
	2890	video_extension = self._video_extensions.get(format_param, 'mp4')
	2891
	2892	try:
	2893	# Process video information
	2894	self._downloader.process_info({
	2895	'id': video_id.decode('utf-8'),
	2896	'url': video_real_url.decode('utf-8'),
	2897	'uploader': video_uploader.decode('utf-8'),
	2898	'upload_date': upload_date,
	2899	'title': video_title,
	2900	'stitle': simple_title,
	2901	'ext': video_extension.decode('utf-8'),
	2902	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	2903	'thumbnail': video_thumbnail.decode('utf-8'),
	2904	'description': video_description.decode('utf-8'),
	2905	'player_url': None,
	2906	})
	2907	except UnavailableVideoError, err:
	2908	self._downloader.trouble(u'\nERROR: unable to download video')
	2909
	2910	class BlipTVIE(InfoExtractor):
	2911	"""Information extractor for blip.tv"""
	2912
	2913	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
	2914	_URL_EXT = r'^.*\.([a-z0-9]+)$'
	2915
	2916	@staticmethod
	2917	def suitable(url):
	2918	return (re.match(BlipTVIE._VALID_URL, url) is not None)
	2919
	2920	def report_extraction(self, file_id):
	2921	"""Report information extraction."""
	2922	self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id)
	2923
	2924	def _simplify_title(self, title):
	2925	res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
	2926	res = res.strip(ur'_')
	2927	return res
	2928
	2929	def _real_extract(self, url):
	2930	mobj = re.match(self._VALID_URL, url)
	2931	if mobj is None:
	2932	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	2933	return
	2934
	2935	if '?' in url:
	2936	cchar = '&'
	2937	else:
	2938	cchar = '?'
	2939	json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
	2940	request = urllib2.Request(json_url)
	2941	self.report_extraction(mobj.group(1))
	2942	try:
	2943	json_code = urllib2.urlopen(request).read()
	2944	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2945	self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
	2946	return
	2947	try:
	2948	json_data = json.loads(json_code)
	2949	if 'Post' in json_data:
	2950	data = json_data['Post']
	2951	else:
	2952	data = json_data
	2953
	2954	upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
	2955	video_url = data['media']['url']
	2956	umobj = re.match(self._URL_EXT, video_url)
	2957	if umobj is None:
	2958	raise ValueError('Can not determine filename extension')
	2959	ext = umobj.group(1)
	2960
	2961	self._downloader.increment_downloads()
	2962
	2963	info = {
	2964	'id': data['item_id'],
	2965	'url': video_url,
	2966	'uploader': data['display_name'],
	2967	'upload_date': upload_date,
	2968	'title': data['title'],
	2969	'stitle': self._simplify_title(data['title']),
	2970	'ext': ext,
	2971	'format': data['media']['mimeType'],
	2972	'thumbnail': data['thumbnailUrl'],
	2973	'description': data['description'],
	2974	'player_url': data['embedUrl']
	2975	}
	2976	except (ValueError,KeyError), err:
	2977	self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
	2978	return
	2979
	2980	try:
	2981	self._downloader.process_info(info)
	2982	except UnavailableVideoError, err:
	2983	self._downloader.trouble(u'\nERROR: unable to download video')
	2984
	2985
	2986	class MyVideoIE(InfoExtractor):
	2987	"""Information Extractor for myvideo.de."""
	2988
	2989	_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
	2990
	2991	def __init__(self, downloader=None):
	2992	InfoExtractor.__init__(self, downloader)
	2993
	2994	@staticmethod
	2995	def suitable(url):
	2996	return (re.match(MyVideoIE._VALID_URL, url) is not None)
	2997
	2998	def report_download_webpage(self, video_id):
	2999	"""Report webpage download."""
	3000	self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
	3001
	3002	def report_extraction(self, video_id):
	3003	"""Report information extraction."""
	3004	self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
	3005
	3006	def _real_initialize(self):
	3007	return
	3008
	3009	def _real_extract(self,url):
	3010	mobj = re.match(self._VALID_URL, url)
	3011	if mobj is None:
	3012	self._download.trouble(u'ERROR: invalid URL: %s' % url)
	3013	return
	3014
	3015	video_id = mobj.group(1)
	3016	simple_title = mobj.group(2).decode('utf-8')
	3017	# should actually not be necessary
	3018	simple_title = sanitize_title(simple_title)
	3019	simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
	3020
	3021	# Get video webpage
	3022	request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
	3023	try:
	3024	self.report_download_webpage(video_id)
	3025	webpage = urllib2.urlopen(request).read()
	3026	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3027	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	3028	return
	3029
	3030	self.report_extraction(video_id)
	3031	mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
	3032	webpage)
	3033	if mobj is None:
	3034	self._downloader.trouble(u'ERROR: unable to extract media URL')
	3035	return
	3036	video_url = mobj.group(1) + ('/%s.flv' % video_id)
	3037
	3038	mobj = re.search('<title>([^<]+)</title>', webpage)
	3039	if mobj is None:
	3040	self._downloader.trouble(u'ERROR: unable to extract title')
	3041	return
	3042
	3043	video_title = mobj.group(1)
	3044	video_title = sanitize_title(video_title)
	3045
	3046	try:
	3047	print(video_url)
	3048	self._downloader.process_info({
	3049	'id': video_id,
	3050	'url': video_url,
	3051	'uploader': u'NA',
	3052	'upload_date': u'NA',
	3053	'title': video_title,
	3054	'stitle': simple_title,
	3055	'ext': u'flv',
	3056	'format': u'NA',
	3057	'player_url': None,
	3058	})
	3059	except UnavailableVideoError:
	3060	self._downloader.trouble(u'\nERROR: Unable to download video')
	3061
	3062	class ComedyCentralIE(InfoExtractor):
	3063	"""Information extractor for The Daily Show and Colbert Report """
	3064
	3065	_VALID_URL = r'^(:(?P<shortname>tds\|thedailyshow\|cr\|colbert\|colbertnation\|colbertreport))\|(https?://)?(www\.)(?P<showname>thedailyshow\|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
	3066
	3067	@staticmethod
	3068	def suitable(url):
	3069	return (re.match(ComedyCentralIE._VALID_URL, url) is not None)
	3070
	3071	def report_extraction(self, episode_id):
	3072	self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
	3073
	3074	def report_config_download(self, episode_id):
	3075	self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
	3076
	3077	def report_index_download(self, episode_id):
	3078	self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
	3079
	3080	def report_player_url(self, episode_id):
	3081	self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
	3082
	3083	def _simplify_title(self, title):
	3084	res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
	3085	res = res.strip(ur'_')
	3086	return res
	3087
	3088	def _real_extract(self, url):
	3089	mobj = re.match(self._VALID_URL, url)
	3090	if mobj is None:
	3091	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3092	return
	3093
	3094	if mobj.group('shortname'):
	3095	if mobj.group('shortname') in ('tds', 'thedailyshow'):
	3096	url = 'http://www.thedailyshow.com/full-episodes/'
	3097	else:
	3098	url = 'http://www.colbertnation.com/full-episodes/'
	3099	mobj = re.match(self._VALID_URL, url)
	3100	assert mobj is not None
	3101
	3102	dlNewest = not mobj.group('episode')
	3103	if dlNewest:
	3104	epTitle = mobj.group('showname')
	3105	else:
	3106	epTitle = mobj.group('episode')
	3107
	3108	req = urllib2.Request(url)
	3109	self.report_extraction(epTitle)
	3110	try:
	3111	htmlHandle = urllib2.urlopen(req)
	3112	html = htmlHandle.read()
	3113	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3114	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
	3115	return
	3116	if dlNewest:
	3117	url = htmlHandle.geturl()
	3118	mobj = re.match(self._VALID_URL, url)
	3119	if mobj is None:
	3120	self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
	3121	return
	3122	if mobj.group('episode') == '':
	3123	self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
	3124	return
	3125	epTitle = mobj.group('episode')
	3126
	3127	mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]episode.?:.*?))"/>', html)
	3128	if len(mMovieParams) == 0:
	3129	self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
	3130	return
	3131
	3132	playerUrl_raw = mMovieParams[0][0]
	3133	self.report_player_url(epTitle)
	3134	try:
	3135	urlHandle = urllib2.urlopen(playerUrl_raw)
	3136	playerUrl = urlHandle.geturl()
	3137	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3138	self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
	3139	return
	3140
	3141	uri = mMovieParams[0][1]
	3142	indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
	3143	self.report_index_download(epTitle)
	3144	try:
	3145	indexXml = urllib2.urlopen(indexUrl).read()
	3146	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3147	self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
	3148	return
	3149
	3150	idoc = xml.etree.ElementTree.fromstring(indexXml)
	3151	itemEls = idoc.findall('.//item')
	3152	for itemEl in itemEls:
	3153	mediaId = itemEl.findall('./guid')[0].text
	3154	shortMediaId = mediaId.split(':')[-1]
	3155	showId = mediaId.split(':')[-2].replace('.com', '')
	3156	officialTitle = itemEl.findall('./title')[0].text
	3157	officialDate = itemEl.findall('./pubDate')[0].text
	3158
	3159	configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
	3160	urllib.urlencode({'uri': mediaId}))
	3161	configReq = urllib2.Request(configUrl)
	3162	self.report_config_download(epTitle)
	3163	try:
	3164	configXml = urllib2.urlopen(configReq).read()
	3165	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3166	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
	3167	return
	3168
	3169	cdoc = xml.etree.ElementTree.fromstring(configXml)
	3170	turls = []
	3171	for rendition in cdoc.findall('.//rendition'):
	3172	finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
	3173	turls.append(finfo)
	3174
	3175	if len(turls) == 0:
	3176	self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
	3177	continue
	3178
	3179	# For now, just pick the highest bitrate
	3180	format,video_url = turls[-1]
	3181
	3182	self._downloader.increment_downloads()
	3183
	3184	effTitle = showId + '-' + epTitle
	3185	info = {
	3186	'id': shortMediaId,
	3187	'url': video_url,
	3188	'uploader': showId,
	3189	'upload_date': officialDate,
	3190	'title': effTitle,
	3191	'stitle': self._simplify_title(effTitle),
	3192	'ext': 'mp4',
	3193	'format': format,
	3194	'thumbnail': None,
	3195	'description': officialTitle,
	3196	'player_url': playerUrl
	3197	}
	3198
	3199	try:
	3200	self._downloader.process_info(info)
	3201	except UnavailableVideoError, err:
	3202	self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
	3203	continue
	3204
	3205
	3206	class EscapistIE(InfoExtractor):
	3207	"""Information extractor for The Escapist """
	3208
	3209	_VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?].*$'
	3210
	3211	@staticmethod
	3212	def suitable(url):
	3213	return (re.match(EscapistIE._VALID_URL, url) is not None)
	3214
	3215	def report_extraction(self, showName):
	3216	self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
	3217
	3218	def report_config_download(self, showName):
	3219	self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
	3220
	3221	def _simplify_title(self, title):
	3222	res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
	3223	res = res.strip(ur'_')
	3224	return res
	3225
	3226	def _real_extract(self, url):
	3227	htmlParser = HTMLParser.HTMLParser()
	3228
	3229	mobj = re.match(self._VALID_URL, url)
	3230	if mobj is None:
	3231	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3232	return
	3233	showName = mobj.group('showname')
	3234	videoId = mobj.group('episode')
	3235
	3236	self.report_extraction(showName)
	3237	try:
	3238	webPage = urllib2.urlopen(url).read()
	3239	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3240	self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
	3241	return
	3242
	3243	descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
	3244	description = htmlParser.unescape(descMatch.group(1))
	3245	imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
	3246	imgUrl = htmlParser.unescape(imgMatch.group(1))
	3247	playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
	3248	playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
	3249	configUrlMatch = re.search('config=(.*)$', playerUrl)
	3250	configUrl = urllib2.unquote(configUrlMatch.group(1))
	3251
	3252	self.report_config_download(showName)
	3253	try:
	3254	configJSON = urllib2.urlopen(configUrl).read()
	3255	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3256	self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
	3257	return
	3258
	3259	# Technically, it's JavaScript, not JSON
	3260	configJSON = configJSON.replace("'", '"')
	3261
	3262	try:
	3263	config = json.loads(configJSON)
	3264	except (ValueError,), err:
	3265	self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
	3266	return
	3267
	3268	playlist = config['playlist']
	3269	videoUrl = playlist[1]['url']
	3270
	3271	self._downloader.increment_downloads()
	3272	info = {
	3273	'id': videoId,
	3274	'url': videoUrl,
	3275	'uploader': showName,
	3276	'upload_date': None,
	3277	'title': showName,
	3278	'stitle': self._simplify_title(showName),
	3279	'ext': 'flv',
	3280	'format': 'flv',
	3281	'thumbnail': imgUrl,
	3282	'description': description,
	3283	'player_url': playerUrl,
	3284	}
	3285
	3286	try:
	3287	self._downloader.process_info(info)
	3288	except UnavailableVideoError, err:
	3289	self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
	3290
	3291
	3292
	3293	class PostProcessor(object):
	3294	"""Post Processor class.
	3295
	3296	PostProcessor objects can be added to downloaders with their
	3297	add_post_processor() method. When the downloader has finished a
	3298	successful download, it will take its internal chain of PostProcessors
	3299	and start calling the run() method on each one of them, first with
	3300	an initial argument and then with the returned value of the previous
	3301	PostProcessor.
	3302
	3303	The chain will be stopped if one of them ever returns None or the end
	3304	of the chain is reached.
	3305
	3306	PostProcessor objects follow a "mutual registration" process similar
	3307	to InfoExtractor objects.
	3308	"""
	3309
	3310	_downloader = None
	3311
	3312	def __init__(self, downloader=None):
	3313	self._downloader = downloader
	3314
	3315	def set_downloader(self, downloader):
	3316	"""Sets the downloader for this PP."""
	3317	self._downloader = downloader
	3318
	3319	def run(self, information):
	3320	"""Run the PostProcessor.
	3321
	3322	The "information" argument is a dictionary like the ones
	3323	composed by InfoExtractors. The only difference is that this
	3324	one has an extra field called "filepath" that points to the
	3325	downloaded file.
	3326
	3327	When this method returns None, the postprocessing chain is
	3328	stopped. However, this method may return an information
	3329	dictionary that will be passed to the next postprocessing
	3330	object in the chain. It can be the one it received after
	3331	changing some fields.
	3332
	3333	In addition, this method may raise a PostProcessingError
	3334	exception that will be taken into account by the downloader
	3335	it was called from.
	3336	"""
	3337	return information # by default, do nothing
	3338
	3339
	3340	class FFmpegExtractAudioPP(PostProcessor):
	3341
	3342	def __init__(self, downloader=None, preferredcodec=None):
	3343	PostProcessor.__init__(self, downloader)
	3344	if preferredcodec is None:
	3345	preferredcodec = 'best'
	3346	self._preferredcodec = preferredcodec
	3347
	3348	@staticmethod
	3349	def get_audio_codec(path):
	3350	try:
	3351	cmd = ['ffprobe', '-show_streams', '--', path]
	3352	handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
	3353	output = handle.communicate()[0]
	3354	if handle.wait() != 0:
	3355	return None
	3356	except (IOError, OSError):
	3357	return None
	3358	audio_codec = None
	3359	for line in output.split('\n'):
	3360	if line.startswith('codec_name='):
	3361	audio_codec = line.split('=')[1].strip()
	3362	elif line.strip() == 'codec_type=audio' and audio_codec is not None:
	3363	return audio_codec
	3364	return None
	3365
	3366	@staticmethod
	3367	def run_ffmpeg(path, out_path, codec, more_opts):
	3368	try:
	3369	cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
	3370	ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
	3371	return (ret == 0)
	3372	except (IOError, OSError):
	3373	return False
	3374
	3375	def run(self, information):
	3376	path = information['filepath']
	3377
	3378	filecodec = self.get_audio_codec(path)
	3379	if filecodec is None:
	3380	self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
	3381	return None
	3382
	3383	more_opts = []
	3384	if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
	3385	if filecodec == 'aac' or filecodec == 'mp3':
	3386	# Lossless if possible
	3387	acodec = 'copy'
	3388	extension = filecodec
	3389	if filecodec == 'aac':
	3390	more_opts = ['-f', 'adts']
	3391	else:
	3392	# MP3 otherwise.
	3393	acodec = 'libmp3lame'
	3394	extension = 'mp3'
	3395	more_opts = ['-ab', '128k']
	3396	else:
	3397	# We convert the audio (lossy)
	3398	acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
	3399	extension = self._preferredcodec
	3400	more_opts = ['-ab', '128k']
	3401	if self._preferredcodec == 'aac':
	3402	more_opts += ['-f', 'adts']
	3403
	3404	(prefix, ext) = os.path.splitext(path)
	3405	new_path = prefix + '.' + extension
	3406	self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
	3407	status = self.run_ffmpeg(path, new_path, acodec, more_opts)
	3408
	3409	if not status:
	3410	self._downloader.to_stderr(u'WARNING: error running ffmpeg')
	3411	return None
	3412
	3413	try:
	3414	os.remove(path)
	3415	except (IOError, OSError):
	3416	self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
	3417	return None
	3418
	3419	information['filepath'] = new_path
	3420	return information
	3421
	3422
	3423	def updateSelf(downloader, filename):
	3424	''' Update the program file with the latest version from the repository '''
	3425	# Note: downloader only used for options
	3426	if not os.access(filename, os.W_OK):
	3427	sys.exit('ERROR: no write permissions on %s' % filename)
	3428
	3429	downloader.to_screen('Updating to latest version...')
	3430
	3431	try:
	3432	try:
	3433	urlh = urllib.urlopen(UPDATE_URL)
	3434	newcontent = urlh.read()
	3435	finally:
	3436	urlh.close()
	3437	except (IOError, OSError), err:
	3438	sys.exit('ERROR: unable to download latest version')
	3439
	3440	try:
	3441	outf = open(filename, 'wb')
	3442	try:
	3443	outf.write(newcontent)
	3444	finally:
	3445	outf.close()
	3446	except (IOError, OSError), err:
	3447	sys.exit('ERROR: unable to overwrite current version')
	3448
	3449	downloader.to_screen('Updated youtube-dl. Restart to use the new version.')
	3450
	3451	def parseOpts():
	3452	# Deferred imports
	3453	import getpass
	3454	import optparse
	3455
	3456	def _format_option_string(option):
	3457	''' ('-o', '--option') -> -o, --format METAVAR'''
	3458
	3459	opts = []
	3460
	3461	if option._short_opts: opts.append(option._short_opts[0])
	3462	if option._long_opts: opts.append(option._long_opts[0])
	3463	if len(opts) > 1: opts.insert(1, ', ')
	3464
	3465	if option.takes_value(): opts.append(' %s' % option.metavar)
	3466
	3467	return "".join(opts)
	3468
	3469	def _find_term_columns():
	3470	columns = os.environ.get('COLUMNS', None)
	3471	if columns:
	3472	return int(columns)
	3473
	3474	try:
	3475	sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	3476	out,err = sp.communicate()
	3477	return int(out.split()[1])
	3478	except:
	3479	pass
	3480	return None
	3481
	3482	max_width = 80
	3483	max_help_position = 80
	3484
	3485	# No need to wrap help messages if we're on a wide console
	3486	columns = _find_term_columns()
	3487	if columns: max_width = columns
	3488
	3489	fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
	3490	fmt.format_option_strings = _format_option_string
	3491
	3492	kw = {
	3493	'version' : __version__,
	3494	'formatter' : fmt,
	3495	'usage' : '%prog [options] url [url...]',
	3496	'conflict_handler' : 'resolve',
	3497	}
	3498
	3499	parser = optparse.OptionParser(**kw)
	3500
	3501	# option groups
	3502	general = optparse.OptionGroup(parser, 'General Options')
	3503	selection = optparse.OptionGroup(parser, 'Video Selection')
	3504	authentication = optparse.OptionGroup(parser, 'Authentication Options')
	3505	video_format = optparse.OptionGroup(parser, 'Video Format Options')
	3506	postproc = optparse.OptionGroup(parser, 'Post-processing Options')
	3507	filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
	3508	verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
	3509
	3510	general.add_option('-h', '--help',
	3511	action='help', help='print this help text and exit')
	3512	general.add_option('-v', '--version',
	3513	action='version', help='print program version and exit')
	3514	general.add_option('-U', '--update',
	3515	action='store_true', dest='update_self', help='update this program to latest version')
	3516	general.add_option('-i', '--ignore-errors',
	3517	action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
	3518	general.add_option('-r', '--rate-limit',
	3519	dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
	3520	general.add_option('-R', '--retries',
	3521	dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
	3522	general.add_option('--dump-user-agent',
	3523	action='store_true', dest='dump_user_agent',
	3524	help='display the current browser identification', default=False)
	3525
	3526	selection.add_option('--playlist-start',
	3527	dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
	3528	selection.add_option('--playlist-end',
	3529	dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
	3530	selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
	3531	selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
	3532
	3533	authentication.add_option('-u', '--username',
	3534	dest='username', metavar='USERNAME', help='account username')
	3535	authentication.add_option('-p', '--password',
	3536	dest='password', metavar='PASSWORD', help='account password')
	3537	authentication.add_option('-n', '--netrc',
	3538	action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
	3539
	3540
	3541	video_format.add_option('-f', '--format',
	3542	action='store', dest='format', metavar='FORMAT', help='video format code')
	3543	video_format.add_option('--all-formats',
	3544	action='store_const', dest='format', help='download all available video formats', const='-1')
	3545	video_format.add_option('--max-quality',
	3546	action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
	3547
	3548
	3549	verbosity.add_option('-q', '--quiet',
	3550	action='store_true', dest='quiet', help='activates quiet mode', default=False)
	3551	verbosity.add_option('-s', '--simulate',
	3552	action='store_true', dest='simulate', help='do not download video', default=False)
	3553	verbosity.add_option('-g', '--get-url',
	3554	action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
	3555	verbosity.add_option('-e', '--get-title',
	3556	action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
	3557	verbosity.add_option('--get-thumbnail',
	3558	action='store_true', dest='getthumbnail',
	3559	help='simulate, quiet but print thumbnail URL', default=False)
	3560	verbosity.add_option('--get-description',
	3561	action='store_true', dest='getdescription',
	3562	help='simulate, quiet but print video description', default=False)
	3563	verbosity.add_option('--get-filename',
	3564	action='store_true', dest='getfilename',
	3565	help='simulate, quiet but print output filename', default=False)
	3566	verbosity.add_option('--no-progress',
	3567	action='store_true', dest='noprogress', help='do not print progress bar', default=False)
	3568	verbosity.add_option('--console-title',
	3569	action='store_true', dest='consoletitle',
	3570	help='display progress in console titlebar', default=False)
	3571
	3572
	3573	filesystem.add_option('-t', '--title',
	3574	action='store_true', dest='usetitle', help='use title in file name', default=False)
	3575	filesystem.add_option('-l', '--literal',
	3576	action='store_true', dest='useliteral', help='use literal title in file name', default=False)
	3577	filesystem.add_option('-A', '--auto-number',
	3578	action='store_true', dest='autonumber',
	3579	help='number downloaded files starting from 00000', default=False)
	3580	filesystem.add_option('-o', '--output',
	3581	dest='outtmpl', metavar='TEMPLATE', help='output filename template')
	3582	filesystem.add_option('-a', '--batch-file',
	3583	dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
	3584	filesystem.add_option('-w', '--no-overwrites',
	3585	action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
	3586	filesystem.add_option('-c', '--continue',
	3587	action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
	3588	filesystem.add_option('--cookies',
	3589	dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
	3590	filesystem.add_option('--no-part',
	3591	action='store_true', dest='nopart', help='do not use .part files', default=False)
	3592	filesystem.add_option('--no-mtime',
	3593	action='store_false', dest='updatetime',
	3594	help='do not use the Last-modified header to set the file modification time', default=True)
	3595	filesystem.add_option('--write-description',
	3596	action='store_true', dest='writedescription',
	3597	help='write video description to a .description file', default=False)
	3598	filesystem.add_option('--write-info-json',
	3599	action='store_true', dest='writeinfojson',
	3600	help='write video metadata to a .info.json file', default=False)
	3601
	3602
	3603	postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
	3604	help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
	3605	postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
	3606	help='"best", "aac" or "mp3"; best by default')
	3607
	3608
	3609	parser.add_option_group(general)
	3610	parser.add_option_group(selection)
	3611	parser.add_option_group(filesystem)
	3612	parser.add_option_group(verbosity)
	3613	parser.add_option_group(video_format)
	3614	parser.add_option_group(authentication)
	3615	parser.add_option_group(postproc)
	3616
	3617	opts, args = parser.parse_args()
	3618
	3619	return parser, opts, args
	3620
	3621	def main():
	3622	parser, opts, args = parseOpts()
	3623
	3624	# Open appropriate CookieJar
	3625	if opts.cookiefile is None:
	3626	jar = cookielib.CookieJar()
	3627	else:
	3628	try:
	3629	jar = cookielib.MozillaCookieJar(opts.cookiefile)
	3630	if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
	3631	jar.load()
	3632	except (IOError, OSError), err:
	3633	sys.exit(u'ERROR: unable to open cookie file')
	3634
	3635	# Dump user agent
	3636	if opts.dump_user_agent:
	3637	print std_headers['User-Agent']
	3638	sys.exit(0)
	3639
	3640	# General configuration
	3641	cookie_processor = urllib2.HTTPCookieProcessor(jar)
	3642	opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
	3643	urllib2.install_opener(opener)
	3644	socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
	3645
	3646	# Batch file verification
	3647	batchurls = []
	3648	if opts.batchfile is not None:
	3649	try:
	3650	if opts.batchfile == '-':
	3651	batchfd = sys.stdin
	3652	else:
	3653	batchfd = open(opts.batchfile, 'r')
	3654	batchurls = batchfd.readlines()
	3655	batchurls = [x.strip() for x in batchurls]
	3656	batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
	3657	except IOError:
	3658	sys.exit(u'ERROR: batch file could not be read')
	3659	all_urls = batchurls + args
	3660
	3661	# Conflicting, missing and erroneous options
	3662	if opts.usenetrc and (opts.username is not None or opts.password is not None):
	3663	parser.error(u'using .netrc conflicts with giving username/password')
	3664	if opts.password is not None and opts.username is None:
	3665	parser.error(u'account username missing')
	3666	if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
	3667	parser.error(u'using output template conflicts with using title, literal title or auto number')
	3668	if opts.usetitle and opts.useliteral:
	3669	parser.error(u'using title conflicts with using literal title')
	3670	if opts.username is not None and opts.password is None:
	3671	opts.password = getpass.getpass(u'Type account password and press return:')
	3672	if opts.ratelimit is not None:
	3673	numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
	3674	if numeric_limit is None:
	3675	parser.error(u'invalid rate limit specified')
	3676	opts.ratelimit = numeric_limit
	3677	if opts.retries is not None:
	3678	try:
	3679	opts.retries = long(opts.retries)
	3680	except (TypeError, ValueError), err:
	3681	parser.error(u'invalid retry count specified')
	3682	try:
	3683	opts.playliststart = int(opts.playliststart)
	3684	if opts.playliststart <= 0:
	3685	raise ValueError(u'Playlist start must be positive')
	3686	except (TypeError, ValueError), err:
	3687	parser.error(u'invalid playlist start number specified')
	3688	try:
	3689	opts.playlistend = int(opts.playlistend)
	3690	if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
	3691	raise ValueError(u'Playlist end must be greater than playlist start')
	3692	except (TypeError, ValueError), err:
	3693	parser.error(u'invalid playlist end number specified')
	3694	if opts.extractaudio:
	3695	if opts.audioformat not in ['best', 'aac', 'mp3']:
	3696	parser.error(u'invalid audio format specified')
	3697
	3698	# Information extractors
	3699	youtube_ie = YoutubeIE()
	3700	google_ie = GoogleIE()
	3701	yahoo_ie = YahooIE()
	3702	extractors = [ # Order does matter
	3703	youtube_ie,
	3704	MetacafeIE(youtube_ie),
	3705	DailymotionIE(),
	3706	YoutubePlaylistIE(youtube_ie),
	3707	YoutubeUserIE(youtube_ie),
	3708	YoutubeSearchIE(youtube_ie),
	3709	google_ie,
	3710	GoogleSearchIE(google_ie),
	3711	PhotobucketIE(),
	3712	yahoo_ie,
	3713	YahooSearchIE(yahoo_ie),
	3714	DepositFilesIE(),
	3715	FacebookIE(),
	3716	BlipTVIE(),
	3717	VimeoIE(),
	3718	MyVideoIE(),
	3719	ComedyCentralIE(),
	3720	EscapistIE(),
	3721
	3722	GenericIE()
	3723	]
	3724
	3725	# File downloader
	3726	fd = FileDownloader({
	3727	'usenetrc': opts.usenetrc,
	3728	'username': opts.username,
	3729	'password': opts.password,
	3730	'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
	3731	'forceurl': opts.geturl,
	3732	'forcetitle': opts.gettitle,
	3733	'forcethumbnail': opts.getthumbnail,
	3734	'forcedescription': opts.getdescription,
	3735	'forcefilename': opts.getfilename,
	3736	'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
	3737	'format': opts.format,
	3738	'format_limit': opts.format_limit,
	3739	'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
	3740	or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
	3741	or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
	3742	or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
	3743	or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
	3744	or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
	3745	or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
	3746	or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
	3747	or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
	3748	or u'%(id)s.%(ext)s'),
	3749	'ignoreerrors': opts.ignoreerrors,
	3750	'ratelimit': opts.ratelimit,
	3751	'nooverwrites': opts.nooverwrites,
	3752	'retries': opts.retries,
	3753	'continuedl': opts.continue_dl,
	3754	'noprogress': opts.noprogress,
	3755	'playliststart': opts.playliststart,
	3756	'playlistend': opts.playlistend,
	3757	'logtostderr': opts.outtmpl == '-',
	3758	'consoletitle': opts.consoletitle,
	3759	'nopart': opts.nopart,
	3760	'updatetime': opts.updatetime,
	3761	'writedescription': opts.writedescription,
	3762	'writeinfojson': opts.writeinfojson,
	3763	'matchtitle': opts.matchtitle,
	3764	'rejecttitle': opts.rejecttitle,
	3765	})
	3766	for extractor in extractors:
	3767	fd.add_info_extractor(extractor)
	3768
	3769	# PostProcessors
	3770	if opts.extractaudio:
	3771	fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
	3772
	3773	# Update version
	3774	if opts.update_self:
	3775	updateSelf(fd, sys.argv[0])
	3776
	3777	# Maybe do nothing
	3778	if len(all_urls) < 1:
	3779	if not opts.update_self:
	3780	parser.error(u'you must provide at least one URL')
	3781	else:
	3782	sys.exit()
	3783	retcode = fd.download(all_urls)
	3784
	3785	# Dump cookie jar if requested
	3786	if opts.cookiefile is not None:
	3787	try:
	3788	jar.save()
	3789	except (IOError, OSError), err:
	3790	sys.exit(u'ERROR: unable to save cookie jar')
	3791
	3792	sys.exit(retcode)
	3793
	3794
	3795	if __name__ == '__main__':
	3796	try:
	3797	main()
	3798	except DownloadError:
	3799	sys.exit(1)
	3800	except SameFileError:
	3801	sys.exit(u'ERROR: fixed output name but more than one file to download')
	3802	except KeyboardInterrupt:
	3803	sys.exit(u'\nERROR: Interrupted by user')
	3804
	3805	# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: