jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	__author__ = (
	5	'Ricardo Garcia Gonzalez',
	6	'Danny Colligan',
	7	'Benjamin Johnson',
	8	'Vasyl\' Vavrychuk',
	9	'Witold Baryluk',
	10	'Paweł Paprota',
	11	'Gergely Imreh',
	12	'Rogério Brito',
	13	'Philipp Hagemeister',
	14	'Sören Schulze',
	15	'Kevin Ngo',
	16	'Ori Avtalion',
	17	'shizeeg',
	18	)
	19
	20	__license__ = 'Public Domain'
	21	__version__ = '2011.11.23'
	22
	23	UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
	24
	25	import cookielib
	26	import datetime
	27	import gzip
	28	import htmlentitydefs
	29	import HTMLParser
	30	import httplib
	31	import locale
	32	import math
	33	import netrc
	34	import os
	35	import os.path
	36	import re
	37	import socket
	38	import string
	39	import subprocess
	40	import sys
	41	import time
	42	import urllib
	43	import urllib2
	44	import warnings
	45	import zlib
	46
	47	if os.name == 'nt':
	48	import ctypes
	49
	50	try:
	51	import email.utils
	52	except ImportError: # Python 2.4
	53	import email.Utils
	54	try:
	55	import cStringIO as StringIO
	56	except ImportError:
	57	import StringIO
	58
	59	# parse_qs was moved from the cgi module to the urlparse module recently.
	60	try:
	61	from urlparse import parse_qs
	62	except ImportError:
	63	from cgi import parse_qs
	64
	65	try:
	66	import lxml.etree
	67	except ImportError:
	68	pass # Handled below
	69
	70	try:
	71	import xml.etree.ElementTree
	72	except ImportError: # Python<2.5: Not officially supported, but let it slip
	73	warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
	74
	75	std_headers = {
	76	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
	77	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	78	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	79	'Accept-Encoding': 'gzip, deflate',
	80	'Accept-Language': 'en-us,en;q=0.5',
	81	}
	82
	83	try:
	84	import json
	85	except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
	86	import re
	87	class json(object):
	88	@staticmethod
	89	def loads(s):
	90	s = s.decode('UTF-8')
	91	def raiseError(msg, i):
	92	raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
	93	def skipSpace(i, expectMore=True):
	94	while i < len(s) and s[i] in ' \t\r\n':
	95	i += 1
	96	if expectMore:
	97	if i >= len(s):
	98	raiseError('Premature end', i)
	99	return i
	100	def decodeEscape(match):
	101	esc = match.group(1)
	102	_STATIC = {
	103	'"': '"',
	104	'\\': '\\',
	105	'/': '/',
	106	'b': unichr(0x8),
	107	'f': unichr(0xc),
	108	'n': '\n',
	109	'r': '\r',
	110	't': '\t',
	111	}
	112	if esc in _STATIC:
	113	return _STATIC[esc]
	114	if esc[0] == 'u':
	115	if len(esc) == 1+4:
	116	return unichr(int(esc[1:5], 16))
	117	if len(esc) == 5+6 and esc[5:7] == '\\u':
	118	hi = int(esc[1:5], 16)
	119	low = int(esc[7:11], 16)
	120	return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
	121	raise ValueError('Unknown escape ' + str(esc))
	122	def parseString(i):
	123	i += 1
	124	e = i
	125	while True:
	126	e = s.index('"', e)
	127	bslashes = 0
	128	while s[e-bslashes-1] == '\\':
	129	bslashes += 1
	130	if bslashes % 2 == 1:
	131	e += 1
	132	continue
	133	break
	134	rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}\|u[0-9a-fA-F]{4}\|.\|$)')
	135	stri = rexp.sub(decodeEscape, s[i:e])
	136	return (e+1,stri)
	137	def parseObj(i):
	138	i += 1
	139	res = {}
	140	i = skipSpace(i)
	141	if s[i] == '}': # Empty dictionary
	142	return (i+1,res)
	143	while True:
	144	if s[i] != '"':
	145	raiseError('Expected a string object key', i)
	146	i,key = parseString(i)
	147	i = skipSpace(i)
	148	if i >= len(s) or s[i] != ':':
	149	raiseError('Expected a colon', i)
	150	i,val = parse(i+1)
	151	res[key] = val
	152	i = skipSpace(i)
	153	if s[i] == '}':
	154	return (i+1, res)
	155	if s[i] != ',':
	156	raiseError('Expected comma or closing curly brace', i)
	157	i = skipSpace(i+1)
	158	def parseArray(i):
	159	res = []
	160	i = skipSpace(i+1)
	161	if s[i] == ']': # Empty array
	162	return (i+1,res)
	163	while True:
	164	i,val = parse(i)
	165	res.append(val)
	166	i = skipSpace(i) # Raise exception if premature end
	167	if s[i] == ']':
	168	return (i+1, res)
	169	if s[i] != ',':
	170	raiseError('Expected a comma or closing bracket', i)
	171	i = skipSpace(i+1)
	172	def parseDiscrete(i):
	173	for k,v in {'true': True, 'false': False, 'null': None}.items():
	174	if s.startswith(k, i):
	175	return (i+len(k), v)
	176	raiseError('Not a boolean (or null)', i)
	177	def parseNumber(i):
	178	mobj = re.match('^(-?(0\|[1-9][0-9])(\.[0-9])?([eE][+-]?[0-9]+)?)', s[i:])
	179	if mobj is None:
	180	raiseError('Not a number', i)
	181	nums = mobj.group(1)
	182	if '.' in nums or 'e' in nums or 'E' in nums:
	183	return (i+len(nums), float(nums))
	184	return (i+len(nums), int(nums))
	185	CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
	186	def parse(i):
	187	i = skipSpace(i)
	188	i,res = CHARMAP.get(s[i], parseNumber)(i)
	189	i = skipSpace(i, False)
	190	return (i,res)
	191	i,res = parse(0)
	192	if i < len(s):
	193	raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
	194	return res
	195
	196	def preferredencoding():
	197	"""Get preferred encoding.
	198
	199	Returns the best encoding scheme for the system, based on
	200	locale.getpreferredencoding() and some further tweaks.
	201	"""
	202	def yield_preferredencoding():
	203	try:
	204	pref = locale.getpreferredencoding()
	205	u'TEST'.encode(pref)
	206	except:
	207	pref = 'UTF-8'
	208	while True:
	209	yield pref
	210	return yield_preferredencoding().next()
	211
	212
	213	def htmlentity_transform(matchobj):
	214	"""Transforms an HTML entity to a Unicode character.
	215
	216	This function receives a match object and is intended to be used with
	217	the re.sub() function.
	218	"""
	219	entity = matchobj.group(1)
	220
	221	# Known non-numeric HTML entity
	222	if entity in htmlentitydefs.name2codepoint:
	223	return unichr(htmlentitydefs.name2codepoint[entity])
	224
	225	# Unicode character
	226	mobj = re.match(ur'(?u)#(x?\d+)', entity)
	227	if mobj is not None:
	228	numstr = mobj.group(1)
	229	if numstr.startswith(u'x'):
	230	base = 16
	231	numstr = u'0%s' % numstr
	232	else:
	233	base = 10
	234	return unichr(long(numstr, base))
	235
	236	# Unknown entity in name, return its literal representation
	237	return (u'&%s;' % entity)
	238
	239
	240	def sanitize_title(utitle):
	241	"""Sanitizes a video title so it could be used as part of a filename."""
	242	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
	243	return utitle.replace(unicode(os.sep), u'%')
	244
	245
	246	def sanitize_open(filename, open_mode):
	247	"""Try to open the given filename, and slightly tweak it if this fails.
	248
	249	Attempts to open the given filename. If this fails, it tries to change
	250	the filename slightly, step by step, until it's either able to open it
	251	or it fails and raises a final exception, like the standard open()
	252	function.
	253
	254	It returns the tuple (stream, definitive_file_name).
	255	"""
	256	try:
	257	if filename == u'-':
	258	if sys.platform == 'win32':
	259	import msvcrt
	260	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	261	return (sys.stdout, filename)
	262	stream = open(filename, open_mode)
	263	return (stream, filename)
	264	except (IOError, OSError), err:
	265	# In case of error, try to remove win32 forbidden chars
	266	filename = re.sub(ur'[/<>:"\\|\?\*]', u'#', filename)
	267
	268	# An exception here should be caught in the caller
	269	stream = open(filename, open_mode)
	270	return (stream, filename)
	271
	272
	273	def timeconvert(timestr):
	274	"""Convert RFC 2822 defined time string into system timestamp"""
	275	timestamp = None
	276	timetuple = email.utils.parsedate_tz(timestr)
	277	if timetuple is not None:
	278	timestamp = email.utils.mktime_tz(timetuple)
	279	return timestamp
	280
	281	def _simplify_title(title):
	282	expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
	283	return expr.sub(u'_', title).strip(u'_')
	284
	285	class DownloadError(Exception):
	286	"""Download Error exception.
	287
	288	This exception may be thrown by FileDownloader objects if they are not
	289	configured to continue on errors. They will contain the appropriate
	290	error message.
	291	"""
	292	pass
	293
	294
	295	class SameFileError(Exception):
	296	"""Same File exception.
	297
	298	This exception will be thrown by FileDownloader objects if they detect
	299	multiple files would have to be downloaded to the same file on disk.
	300	"""
	301	pass
	302
	303
	304	class PostProcessingError(Exception):
	305	"""Post Processing exception.
	306
	307	This exception may be raised by PostProcessor's .run() method to
	308	indicate an error in the postprocessing task.
	309	"""
	310	pass
	311
	312
	313	class UnavailableVideoError(Exception):
	314	"""Unavailable Format exception.
	315
	316	This exception will be thrown when a video is requested
	317	in a format that is not available for that video.
	318	"""
	319	pass
	320
	321
	322	class ContentTooShortError(Exception):
	323	"""Content Too Short exception.
	324
	325	This exception may be raised by FileDownloader objects when a file they
	326	download is too small for what the server announced first, indicating
	327	the connection was probably interrupted.
	328	"""
	329	# Both in bytes
	330	downloaded = None
	331	expected = None
	332
	333	def __init__(self, downloaded, expected):
	334	self.downloaded = downloaded
	335	self.expected = expected
	336
	337
	338	class YoutubeDLHandler(urllib2.HTTPHandler):
	339	"""Handler for HTTP requests and responses.
	340
	341	This class, when installed with an OpenerDirector, automatically adds
	342	the standard headers to every HTTP request and handles gzipped and
	343	deflated responses from web servers. If compression is to be avoided in
	344	a particular request, the original request in the program code only has
	345	to include the HTTP header "Youtubedl-No-Compression", which will be
	346	removed before making the real request.
	347
	348	Part of this code was copied from:
	349
	350	http://techknack.net/python-urllib2-handlers/
	351
	352	Andrew Rowls, the author of that code, agreed to release it to the
	353	public domain.
	354	"""
	355
	356	@staticmethod
	357	def deflate(data):
	358	try:
	359	return zlib.decompress(data, -zlib.MAX_WBITS)
	360	except zlib.error:
	361	return zlib.decompress(data)
	362
	363	@staticmethod
	364	def addinfourl_wrapper(stream, headers, url, code):
	365	if hasattr(urllib2.addinfourl, 'getcode'):
	366	return urllib2.addinfourl(stream, headers, url, code)
	367	ret = urllib2.addinfourl(stream, headers, url)
	368	ret.code = code
	369	return ret
	370
	371	def http_request(self, req):
	372	for h in std_headers:
	373	if h in req.headers:
	374	del req.headers[h]
	375	req.add_header(h, std_headers[h])
	376	if 'Youtubedl-no-compression' in req.headers:
	377	if 'Accept-encoding' in req.headers:
	378	del req.headers['Accept-encoding']
	379	del req.headers['Youtubedl-no-compression']
	380	return req
	381
	382	def http_response(self, req, resp):
	383	old_resp = resp
	384	# gzip
	385	if resp.headers.get('Content-encoding', '') == 'gzip':
	386	gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
	387	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	388	resp.msg = old_resp.msg
	389	# deflate
	390	if resp.headers.get('Content-encoding', '') == 'deflate':
	391	gz = StringIO.StringIO(self.deflate(resp.read()))
	392	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	393	resp.msg = old_resp.msg
	394	return resp
	395
	396
	397	class FileDownloader(object):
	398	"""File Downloader class.
	399
	400	File downloader objects are the ones responsible of downloading the
	401	actual video file and writing it to disk if the user has requested
	402	it, among some other tasks. In most cases there should be one per
	403	program. As, given a video URL, the downloader doesn't know how to
	404	extract all the needed information, task that InfoExtractors do, it
	405	has to pass the URL to one of them.
	406
	407	For this, file downloader objects have a method that allows
	408	InfoExtractors to be registered in a given order. When it is passed
	409	a URL, the file downloader handles it to the first InfoExtractor it
	410	finds that reports being able to handle it. The InfoExtractor extracts
	411	all the information about the video or videos the URL refers to, and
	412	asks the FileDownloader to process the video information, possibly
	413	downloading the video.
	414
	415	File downloaders accept a lot of parameters. In order not to saturate
	416	the object constructor with arguments, it receives a dictionary of
	417	options instead. These options are available through the params
	418	attribute for the InfoExtractors to use. The FileDownloader also
	419	registers itself as the downloader in charge for the InfoExtractors
	420	that are added to it, so this is a "mutual registration".
	421
	422	Available options:
	423
	424	username: Username for authentication purposes.
	425	password: Password for authentication purposes.
	426	usenetrc: Use netrc for authentication instead.
	427	quiet: Do not print messages to stdout.
	428	forceurl: Force printing final URL.
	429	forcetitle: Force printing title.
	430	forcethumbnail: Force printing thumbnail URL.
	431	forcedescription: Force printing description.
	432	forcefilename: Force printing final filename.
	433	simulate: Do not download the video files.
	434	format: Video format code.
	435	format_limit: Highest quality format to try.
	436	outtmpl: Template for output names.
	437	ignoreerrors: Do not stop on download errors.
	438	ratelimit: Download speed limit, in bytes/sec.
	439	nooverwrites: Prevent overwriting files.
	440	retries: Number of times to retry for HTTP error 5xx
	441	continuedl: Try to continue downloads if possible.
	442	noprogress: Do not print the progress bar.
	443	playliststart: Playlist item to start at.
	444	playlistend: Playlist item to end at.
	445	matchtitle: Download only matching titles.
	446	rejecttitle: Reject downloads for matching titles.
	447	logtostderr: Log messages to stderr instead of stdout.
	448	consoletitle: Display progress in console window's titlebar.
	449	nopart: Do not use temporary .part files.
	450	updatetime: Use the Last-modified header to set output file timestamps.
	451	writedescription: Write the video description to a .description file
	452	writeinfojson: Write the video description to a .info.json file
	453	"""
	454
	455	params = None
	456	_ies = []
	457	_pps = []
	458	_download_retcode = None
	459	_num_downloads = None
	460	_screen_file = None
	461
	462	def __init__(self, params):
	463	"""Create a FileDownloader object with the given options."""
	464	self._ies = []
	465	self._pps = []
	466	self._download_retcode = 0
	467	self._num_downloads = 0
	468	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	469	self.params = params
	470
	471	@staticmethod
	472	def format_bytes(bytes):
	473	if bytes is None:
	474	return 'N/A'
	475	if type(bytes) is str:
	476	bytes = float(bytes)
	477	if bytes == 0.0:
	478	exponent = 0
	479	else:
	480	exponent = long(math.log(bytes, 1024.0))
	481	suffix = 'bkMGTPEZY'[exponent]
	482	converted = float(bytes) / float(1024 ** exponent)
	483	return '%.2f%s' % (converted, suffix)
	484
	485	@staticmethod
	486	def calc_percent(byte_counter, data_len):
	487	if data_len is None:
	488	return '---.-%'
	489	return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
	490
	491	@staticmethod
	492	def calc_eta(start, now, total, current):
	493	if total is None:
	494	return '--:--'
	495	dif = now - start
	496	if current == 0 or dif < 0.001: # One millisecond
	497	return '--:--'
	498	rate = float(current) / dif
	499	eta = long((float(total) - float(current)) / rate)
	500	(eta_mins, eta_secs) = divmod(eta, 60)
	501	if eta_mins > 99:
	502	return '--:--'
	503	return '%02d:%02d' % (eta_mins, eta_secs)
	504
	505	@staticmethod
	506	def calc_speed(start, now, bytes):
	507	dif = now - start
	508	if bytes == 0 or dif < 0.001: # One millisecond
	509	return '%10s' % '---b/s'
	510	return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
	511
	512	@staticmethod
	513	def best_block_size(elapsed_time, bytes):
	514	new_min = max(bytes / 2.0, 1.0)
	515	new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
	516	if elapsed_time < 0.001:
	517	return long(new_max)
	518	rate = bytes / elapsed_time
	519	if rate > new_max:
	520	return long(new_max)
	521	if rate < new_min:
	522	return long(new_min)
	523	return long(rate)
	524
	525	@staticmethod
	526	def parse_bytes(bytestr):
	527	"""Parse a string indicating a byte quantity into a long integer."""
	528	matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
	529	if matchobj is None:
	530	return None
	531	number = float(matchobj.group(1))
	532	multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
	533	return long(round(number * multiplier))
	534
	535	def add_info_extractor(self, ie):
	536	"""Add an InfoExtractor object to the end of the list."""
	537	self._ies.append(ie)
	538	ie.set_downloader(self)
	539
	540	def add_post_processor(self, pp):
	541	"""Add a PostProcessor object to the end of the chain."""
	542	self._pps.append(pp)
	543	pp.set_downloader(self)
	544
	545	def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
	546	"""Print message to stdout if not in quiet mode."""
	547	try:
	548	if not self.params.get('quiet', False):
	549	terminator = [u'\n', u''][skip_eol]
	550	print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
	551	self._screen_file.flush()
	552	except (UnicodeEncodeError), err:
	553	if not ignore_encoding_errors:
	554	raise
	555
	556	def to_stderr(self, message):
	557	"""Print message to stderr."""
	558	print >>sys.stderr, message.encode(preferredencoding())
	559
	560	def to_cons_title(self, message):
	561	"""Set console/terminal window title to message."""
	562	if not self.params.get('consoletitle', False):
	563	return
	564	if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
	565	# c_wchar_p() might not be necessary if `message` is
	566	# already of type unicode()
	567	ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
	568	elif 'TERM' in os.environ:
	569	sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
	570
	571	def fixed_template(self):
	572	"""Checks if the output template is fixed."""
	573	return (re.search(ur'(?u)%$.+?$s', self.params['outtmpl']) is None)
	574
	575	def trouble(self, message=None):
	576	"""Determine action to take when a download problem appears.
	577
	578	Depending on if the downloader has been configured to ignore
	579	download errors or not, this method may throw an exception or
	580	not when errors are found, after printing the message.
	581	"""
	582	if message is not None:
	583	self.to_stderr(message)
	584	if not self.params.get('ignoreerrors', False):
	585	raise DownloadError(message)
	586	self._download_retcode = 1
	587
	588	def slow_down(self, start_time, byte_counter):
	589	"""Sleep if the download speed is over the rate limit."""
	590	rate_limit = self.params.get('ratelimit', None)
	591	if rate_limit is None or byte_counter == 0:
	592	return
	593	now = time.time()
	594	elapsed = now - start_time
	595	if elapsed <= 0.0:
	596	return
	597	speed = float(byte_counter) / elapsed
	598	if speed > rate_limit:
	599	time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
	600
	601	def temp_name(self, filename):
	602	"""Returns a temporary filename for the given filename."""
	603	if self.params.get('nopart', False) or filename == u'-' or \
	604	(os.path.exists(filename) and not os.path.isfile(filename)):
	605	return filename
	606	return filename + u'.part'
	607
	608	def undo_temp_name(self, filename):
	609	if filename.endswith(u'.part'):
	610	return filename[:-len(u'.part')]
	611	return filename
	612
	613	def try_rename(self, old_filename, new_filename):
	614	try:
	615	if old_filename == new_filename:
	616	return
	617	os.rename(old_filename, new_filename)
	618	except (IOError, OSError), err:
	619	self.trouble(u'ERROR: unable to rename file')
	620
	621	def try_utime(self, filename, last_modified_hdr):
	622	"""Try to set the last-modified time of the given file."""
	623	if last_modified_hdr is None:
	624	return
	625	if not os.path.isfile(filename):
	626	return
	627	timestr = last_modified_hdr
	628	if timestr is None:
	629	return
	630	filetime = timeconvert(timestr)
	631	if filetime is None:
	632	return filetime
	633	try:
	634	os.utime(filename, (time.time(), filetime))
	635	except:
	636	pass
	637	return filetime
	638
	639	def report_writedescription(self, descfn):
	640	""" Report that the description file is being written """
	641	self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
	642
	643	def report_writeinfojson(self, infofn):
	644	""" Report that the metadata file has been written """
	645	self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
	646
	647	def report_destination(self, filename):
	648	"""Report destination filename."""
	649	self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
	650
	651	def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
	652	"""Report download progress."""
	653	if self.params.get('noprogress', False):
	654	return
	655	self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
	656	(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
	657	self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
	658	(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
	659
	660	def report_resuming_byte(self, resume_len):
	661	"""Report attempt to resume at given byte."""
	662	self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
	663
	664	def report_retry(self, count, retries):
	665	"""Report retry in case of HTTP error 5xx"""
	666	self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
	667
	668	def report_file_already_downloaded(self, file_name):
	669	"""Report file has already been fully downloaded."""
	670	try:
	671	self.to_screen(u'[download] %s has already been downloaded' % file_name)
	672	except (UnicodeEncodeError), err:
	673	self.to_screen(u'[download] The file has already been downloaded')
	674
	675	def report_unable_to_resume(self):
	676	"""Report it was impossible to resume download."""
	677	self.to_screen(u'[download] Unable to resume')
	678
	679	def report_finish(self):
	680	"""Report download finished."""
	681	if self.params.get('noprogress', False):
	682	self.to_screen(u'[download] Download completed')
	683	else:
	684	self.to_screen(u'')
	685
	686	def increment_downloads(self):
	687	"""Increment the ordinal that assigns a number to each file."""
	688	self._num_downloads += 1
	689
	690	def prepare_filename(self, info_dict):
	691	"""Generate the output filename."""
	692	try:
	693	template_dict = dict(info_dict)
	694	template_dict['epoch'] = unicode(long(time.time()))
	695	template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
	696	filename = self.params['outtmpl'] % template_dict
	697	return filename
	698	except (ValueError, KeyError), err:
	699	self.trouble(u'ERROR: invalid system charset or erroneous output template')
	700	return None
	701
	702	def _match_entry(self, info_dict):
	703	""" Returns None iff the file should be downloaded """
	704
	705	title = info_dict['title']
	706	matchtitle = self.params.get('matchtitle', False)
	707	if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
	708	return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
	709	rejecttitle = self.params.get('rejecttitle', False)
	710	if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
	711	return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
	712	return None
	713
	714	def process_info(self, info_dict):
	715	"""Process a single dictionary returned by an InfoExtractor."""
	716
	717	reason = self._match_entry(info_dict)
	718	if reason is not None:
	719	self.to_screen(u'[download] ' + reason)
	720	return
	721
	722	max_downloads = self.params.get('max_downloads')
	723	if max_downloads is not None:
	724	if self._num_downloads > int(max_downloads):
	725	self.to_screen(u'[download] Maximum number of downloads reached. Skipping ' + info_dict['title'])
	726	return
	727
	728	filename = self.prepare_filename(info_dict)
	729
	730	# Forced printings
	731	if self.params.get('forcetitle', False):
	732	print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
	733	if self.params.get('forceurl', False):
	734	print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
	735	if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
	736	print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
	737	if self.params.get('forcedescription', False) and 'description' in info_dict:
	738	print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
	739	if self.params.get('forcefilename', False) and filename is not None:
	740	print filename.encode(preferredencoding(), 'xmlcharrefreplace')
	741	if self.params.get('forceformat', False):
	742	print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
	743
	744	# Do nothing else if in simulate mode
	745	if self.params.get('simulate', False):
	746	return
	747
	748	if filename is None:
	749	return
	750
	751	if self.params.get('nooverwrites', False) and os.path.exists(filename):
	752	self.to_stderr(u'WARNING: file exists and will be skipped')
	753	return
	754
	755	try:
	756	dn = os.path.dirname(filename)
	757	if dn != '' and not os.path.exists(dn):
	758	os.makedirs(dn)
	759	except (OSError, IOError), err:
	760	self.trouble(u'ERROR: unable to create directory ' + unicode(err))
	761	return
	762
	763	if self.params.get('writedescription', False):
	764	try:
	765	descfn = filename + '.description'
	766	self.report_writedescription(descfn)
	767	descfile = open(descfn, 'wb')
	768	try:
	769	descfile.write(info_dict['description'].encode('utf-8'))
	770	finally:
	771	descfile.close()
	772	except (OSError, IOError):
	773	self.trouble(u'ERROR: Cannot write description file ' + descfn)
	774	return
	775
	776	if self.params.get('writeinfojson', False):
	777	infofn = filename + '.info.json'
	778	self.report_writeinfojson(infofn)
	779	try:
	780	json.dump
	781	except (NameError,AttributeError):
	782	self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
	783	return
	784	try:
	785	infof = open(infofn, 'wb')
	786	try:
	787	json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
	788	json.dump(json_info_dict, infof)
	789	finally:
	790	infof.close()
	791	except (OSError, IOError):
	792	self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
	793	return
	794
	795	if not self.params.get('skip_download', False):
	796	try:
	797	success = self._do_download(filename, info_dict)
	798	except (OSError, IOError), err:
	799	raise UnavailableVideoError
	800	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	801	self.trouble(u'ERROR: unable to download video data: %s' % str(err))
	802	return
	803	except (ContentTooShortError, ), err:
	804	self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
	805	return
	806
	807	if success:
	808	try:
	809	self.post_process(filename, info_dict)
	810	except (PostProcessingError), err:
	811	self.trouble(u'ERROR: postprocessing: %s' % str(err))
	812	return
	813
	814	def download(self, url_list):
	815	"""Download a given list of URLs."""
	816	if len(url_list) > 1 and self.fixed_template():
	817	raise SameFileError(self.params['outtmpl'])
	818
	819	for url in url_list:
	820	suitable_found = False
	821	for ie in self._ies:
	822	# Go to next InfoExtractor if not suitable
	823	if not ie.suitable(url):
	824	continue
	825
	826	# Suitable InfoExtractor found
	827	suitable_found = True
	828
	829	# Extract information from URL and process it
	830	ie.extract(url)
	831
	832	# Suitable InfoExtractor had been found; go to next URL
	833	break
	834
	835	if not suitable_found:
	836	self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
	837
	838	return self._download_retcode
	839
	840	def post_process(self, filename, ie_info):
	841	"""Run the postprocessing chain on the given file."""
	842	info = dict(ie_info)
	843	info['filepath'] = filename
	844	for pp in self._pps:
	845	info = pp.run(info)
	846	if info is None:
	847	break
	848
	849	def _download_with_rtmpdump(self, filename, url, player_url):
	850	self.report_destination(filename)
	851	tmpfilename = self.temp_name(filename)
	852
	853	# Check for rtmpdump first
	854	try:
	855	subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
	856	except (OSError, IOError):
	857	self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
	858	return False
	859
	860	# Download using rtmpdump. rtmpdump returns exit code 2 when
	861	# the connection was interrumpted and resuming appears to be
	862	# possible. This is part of rtmpdump's normal usage, AFAIK.
	863	basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
	864	retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
	865	while retval == 2 or retval == 1:
	866	prevsize = os.path.getsize(tmpfilename)
	867	self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
	868	time.sleep(5.0) # This seems to be needed
	869	retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
	870	cursize = os.path.getsize(tmpfilename)
	871	if prevsize == cursize and retval == 1:
	872	break
	873	# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
	874	if prevsize == cursize and retval == 2 and cursize > 1024:
	875	self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
	876	retval = 0
	877	break
	878	if retval == 0:
	879	self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
	880	self.try_rename(tmpfilename, filename)
	881	return True
	882	else:
	883	self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
	884	return False
	885
	886	def _do_download(self, filename, info_dict):
	887	url = info_dict['url']
	888	player_url = info_dict.get('player_url', None)
	889
	890	# Check file already present
	891	if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
	892	self.report_file_already_downloaded(filename)
	893	return True
	894
	895	# Attempt to download using rtmpdump
	896	if url.startswith('rtmp'):
	897	return self._download_with_rtmpdump(filename, url, player_url)
	898
	899	tmpfilename = self.temp_name(filename)
	900	stream = None
	901
	902	# Do not include the Accept-Encoding header
	903	headers = {'Youtubedl-no-compression': 'True'}
	904	basic_request = urllib2.Request(url, None, headers)
	905	request = urllib2.Request(url, None, headers)
	906
	907	# Establish possible resume length
	908	if os.path.isfile(tmpfilename):
	909	resume_len = os.path.getsize(tmpfilename)
	910	else:
	911	resume_len = 0
	912
	913	open_mode = 'wb'
	914	if resume_len != 0:
	915	if self.params.get('continuedl', False):
	916	self.report_resuming_byte(resume_len)
	917	request.add_header('Range','bytes=%d-' % resume_len)
	918	open_mode = 'ab'
	919	else:
	920	resume_len = 0
	921
	922	count = 0
	923	retries = self.params.get('retries', 0)
	924	while count <= retries:
	925	# Establish connection
	926	try:
	927	if count == 0 and 'urlhandle' in info_dict:
	928	data = info_dict['urlhandle']
	929	data = urllib2.urlopen(request)
	930	break
	931	except (urllib2.HTTPError, ), err:
	932	if (err.code < 500 or err.code >= 600) and err.code != 416:
	933	# Unexpected HTTP error
	934	raise
	935	elif err.code == 416:
	936	# Unable to resume (requested range not satisfiable)
	937	try:
	938	# Open the connection again without the range header
	939	data = urllib2.urlopen(basic_request)
	940	content_length = data.info()['Content-Length']
	941	except (urllib2.HTTPError, ), err:
	942	if err.code < 500 or err.code >= 600:
	943	raise
	944	else:
	945	# Examine the reported length
	946	if (content_length is not None and
	947	(resume_len - 100 < long(content_length) < resume_len + 100)):
	948	# The file had already been fully downloaded.
	949	# Explanation to the above condition: in issue #175 it was revealed that
	950	# YouTube sometimes adds or removes a few bytes from the end of the file,
	951	# changing the file size slightly and causing problems for some users. So
	952	# I decided to implement a suggested change and consider the file
	953	# completely downloaded if the file size differs less than 100 bytes from
	954	# the one in the hard drive.
	955	self.report_file_already_downloaded(filename)
	956	self.try_rename(tmpfilename, filename)
	957	return True
	958	else:
	959	# The length does not match, we start the download over
	960	self.report_unable_to_resume()
	961	open_mode = 'wb'
	962	break
	963	# Retry
	964	count += 1
	965	if count <= retries:
	966	self.report_retry(count, retries)
	967
	968	if count > retries:
	969	self.trouble(u'ERROR: giving up after %s retries' % retries)
	970	return False
	971
	972	data_len = data.info().get('Content-length', None)
	973	if data_len is not None:
	974	data_len = long(data_len) + resume_len
	975	data_len_str = self.format_bytes(data_len)
	976	byte_counter = 0 + resume_len
	977	block_size = 1024
	978	start = time.time()
	979	while True:
	980	# Download and write
	981	before = time.time()
	982	data_block = data.read(block_size)
	983	after = time.time()
	984	if len(data_block) == 0:
	985	break
	986	byte_counter += len(data_block)
	987
	988	# Open file just in time
	989	if stream is None:
	990	try:
	991	(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
	992	assert stream is not None
	993	filename = self.undo_temp_name(tmpfilename)
	994	self.report_destination(filename)
	995	except (OSError, IOError), err:
	996	self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
	997	return False
	998	try:
	999	stream.write(data_block)
	1000	except (IOError, OSError), err:
	1001	self.trouble(u'\nERROR: unable to write data: %s' % str(err))
	1002	return False
	1003	block_size = self.best_block_size(after - before, len(data_block))
	1004
	1005	# Progress message
	1006	speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
	1007	if data_len is None:
	1008	self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
	1009	else:
	1010	percent_str = self.calc_percent(byte_counter, data_len)
	1011	eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
	1012	self.report_progress(percent_str, data_len_str, speed_str, eta_str)
	1013
	1014	# Apply rate limit
	1015	self.slow_down(start, byte_counter - resume_len)
	1016
	1017	if stream is None:
	1018	self.trouble(u'\nERROR: Did not get any data blocks')
	1019	return False
	1020	stream.close()
	1021	self.report_finish()
	1022	if data_len is not None and byte_counter != data_len:
	1023	raise ContentTooShortError(byte_counter, long(data_len))
	1024	self.try_rename(tmpfilename, filename)
	1025
	1026	# Update file modification time
	1027	if self.params.get('updatetime', True):
	1028	info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
	1029
	1030	return True
	1031
	1032
	1033	class InfoExtractor(object):
	1034	"""Information Extractor class.
	1035
	1036	Information extractors are the classes that, given a URL, extract
	1037	information from the video (or videos) the URL refers to. This
	1038	information includes the real video URL, the video title and simplified
	1039	title, author and others. The information is stored in a dictionary
	1040	which is then passed to the FileDownloader. The FileDownloader
	1041	processes this information possibly downloading the video to the file
	1042	system, among other possible outcomes. The dictionaries must include
	1043	the following fields:
	1044
	1045	id: Video identifier.
	1046	url: Final video URL.
	1047	uploader: Nickname of the video uploader.
	1048	title: Literal title.
	1049	stitle: Simplified title.
	1050	ext: Video filename extension.
	1051	format: Video format.
	1052	player_url: SWF Player URL (may be None).
	1053
	1054	The following fields are optional. Their primary purpose is to allow
	1055	youtube-dl to serve as the backend for a video search function, such
	1056	as the one in youtube2mp3. They are only used when their respective
	1057	forced printing functions are called:
	1058
	1059	thumbnail: Full URL to a video thumbnail image.
	1060	description: One-line video description.
	1061
	1062	Subclasses of this one should re-define the _real_initialize() and
	1063	_real_extract() methods and define a _VALID_URL regexp.
	1064	Probably, they should also be added to the list of extractors.
	1065	"""
	1066
	1067	_ready = False
	1068	_downloader = None
	1069
	1070	def __init__(self, downloader=None):
	1071	"""Constructor. Receives an optional downloader."""
	1072	self._ready = False
	1073	self.set_downloader(downloader)
	1074
	1075	def suitable(self, url):
	1076	"""Receives a URL and returns True if suitable for this IE."""
	1077	return re.match(self._VALID_URL, url) is not None
	1078
	1079	def initialize(self):
	1080	"""Initializes an instance (authentication, etc)."""
	1081	if not self._ready:
	1082	self._real_initialize()
	1083	self._ready = True
	1084
	1085	def extract(self, url):
	1086	"""Extracts URL information and returns it in list of dicts."""
	1087	self.initialize()
	1088	return self._real_extract(url)
	1089
	1090	def set_downloader(self, downloader):
	1091	"""Sets the downloader for this IE."""
	1092	self._downloader = downloader
	1093
	1094	def _real_initialize(self):
	1095	"""Real initialization process. Redefine in subclasses."""
	1096	pass
	1097
	1098	def _real_extract(self, url):
	1099	"""Real extraction process. Redefine in subclasses."""
	1100	pass
	1101
	1102
	1103	class YoutubeIE(InfoExtractor):
	1104	"""Information extractor for youtube.com."""
	1105
	1106	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/\|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list\|my_playlists\|artist\|playlist)(?:(?:(?:v\|embed\|e)/)\|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?\|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
	1107	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
	1108	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
	1109	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
	1110	_NETRC_MACHINE = 'youtube'
	1111	# Listed in order of quality
	1112	_available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
	1113	_video_extensions = {
	1114	'13': '3gp',
	1115	'17': 'mp4',
	1116	'18': 'mp4',
	1117	'22': 'mp4',
	1118	'37': 'mp4',
	1119	'38': 'video', # You actually don't know if this will be MOV, AVI or whatever
	1120	'43': 'webm',
	1121	'44': 'webm',
	1122	'45': 'webm',
	1123	}
	1124	_video_dimensions = {
	1125	'5': '240x400',
	1126	'6': '???',
	1127	'13': '???',
	1128	'17': '144x176',
	1129	'18': '360x640',
	1130	'22': '720x1280',
	1131	'34': '360x640',
	1132	'35': '480x854',
	1133	'37': '1080x1920',
	1134	'38': '3072x4096',
	1135	'43': '360x640',
	1136	'44': '480x854',
	1137	'45': '720x1280',
	1138	}
	1139	IE_NAME = u'youtube'
	1140
	1141	def report_lang(self):
	1142	"""Report attempt to set language."""
	1143	self._downloader.to_screen(u'[youtube] Setting language')
	1144
	1145	def report_login(self):
	1146	"""Report attempt to log in."""
	1147	self._downloader.to_screen(u'[youtube] Logging in')
	1148
	1149	def report_age_confirmation(self):
	1150	"""Report attempt to confirm age."""
	1151	self._downloader.to_screen(u'[youtube] Confirming age')
	1152
	1153	def report_video_webpage_download(self, video_id):
	1154	"""Report attempt to download video webpage."""
	1155	self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
	1156
	1157	def report_video_info_webpage_download(self, video_id):
	1158	"""Report attempt to download video info webpage."""
	1159	self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
	1160
	1161	def report_information_extraction(self, video_id):
	1162	"""Report attempt to extract video information."""
	1163	self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
	1164
	1165	def report_unavailable_format(self, video_id, format):
	1166	"""Report extracted video URL."""
	1167	self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
	1168
	1169	def report_rtmp_download(self):
	1170	"""Indicate the download will use the RTMP protocol."""
	1171	self._downloader.to_screen(u'[youtube] RTMP download detected')
	1172
	1173	def _print_formats(self, formats):
	1174	print 'Available formats:'
	1175	for x in formats:
	1176	print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
	1177
	1178	def _real_initialize(self):
	1179	if self._downloader is None:
	1180	return
	1181
	1182	username = None
	1183	password = None
	1184	downloader_params = self._downloader.params
	1185
	1186	# Attempt to use provided username and password or .netrc data
	1187	if downloader_params.get('username', None) is not None:
	1188	username = downloader_params['username']
	1189	password = downloader_params['password']
	1190	elif downloader_params.get('usenetrc', False):
	1191	try:
	1192	info = netrc.netrc().authenticators(self._NETRC_MACHINE)
	1193	if info is not None:
	1194	username = info[0]
	1195	password = info[2]
	1196	else:
	1197	raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
	1198	except (IOError, netrc.NetrcParseError), err:
	1199	self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
	1200	return
	1201
	1202	# Set language
	1203	request = urllib2.Request(self._LANG_URL)
	1204	try:
	1205	self.report_lang()
	1206	urllib2.urlopen(request).read()
	1207	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1208	self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
	1209	return
	1210
	1211	# No authentication to be performed
	1212	if username is None:
	1213	return
	1214
	1215	# Log in
	1216	login_form = {
	1217	'current_form': 'loginForm',
	1218	'next': '/',
	1219	'action_login': 'Log In',
	1220	'username': username,
	1221	'password': password,
	1222	}
	1223	request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
	1224	try:
	1225	self.report_login()
	1226	login_results = urllib2.urlopen(request).read()
	1227	if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
	1228	self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
	1229	return
	1230	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1231	self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
	1232	return
	1233
	1234	# Confirm age
	1235	age_form = {
	1236	'next_url': '/',
	1237	'action_confirm': 'Confirm',
	1238	}
	1239	request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
	1240	try:
	1241	self.report_age_confirmation()
	1242	age_results = urllib2.urlopen(request).read()
	1243	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1244	self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
	1245	return
	1246
	1247	def _real_extract(self, url):
	1248	# Extract video id from URL
	1249	mobj = re.match(self._VALID_URL, url)
	1250	if mobj is None:
	1251	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1252	return
	1253	video_id = mobj.group(2)
	1254
	1255	# Get video webpage
	1256	self.report_video_webpage_download(video_id)
	1257	request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
	1258	try:
	1259	video_webpage = urllib2.urlopen(request).read()
	1260	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1261	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	1262	return
	1263
	1264	# Attempt to extract SWF player URL
	1265	mobj = re.search(r'swfConfig.?"(http:\\/\\/.?watch.?-.?\.swf)"', video_webpage)
	1266	if mobj is not None:
	1267	player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
	1268	else:
	1269	player_url = None
	1270
	1271	# Get video info
	1272	self.report_video_info_webpage_download(video_id)
	1273	for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
	1274	video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
	1275	% (video_id, el_type))
	1276	request = urllib2.Request(video_info_url)
	1277	try:
	1278	video_info_webpage = urllib2.urlopen(request).read()
	1279	video_info = parse_qs(video_info_webpage)
	1280	if 'token' in video_info:
	1281	break
	1282	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1283	self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
	1284	return
	1285	if 'token' not in video_info:
	1286	if 'reason' in video_info:
	1287	self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
	1288	else:
	1289	self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
	1290	return
	1291
	1292	# Start extracting information
	1293	self.report_information_extraction(video_id)
	1294
	1295	# uploader
	1296	if 'author' not in video_info:
	1297	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1298	return
	1299	video_uploader = urllib.unquote_plus(video_info['author'][0])
	1300
	1301	# title
	1302	if 'title' not in video_info:
	1303	self._downloader.trouble(u'ERROR: unable to extract video title')
	1304	return
	1305	video_title = urllib.unquote_plus(video_info['title'][0])
	1306	video_title = video_title.decode('utf-8')
	1307	video_title = sanitize_title(video_title)
	1308
	1309	# simplified title
	1310	simple_title = _simplify_title(video_title)
	1311
	1312	# thumbnail image
	1313	if 'thumbnail_url' not in video_info:
	1314	self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
	1315	video_thumbnail = ''
	1316	else: # don't panic if we can't find it
	1317	video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
	1318
	1319	# upload date
	1320	upload_date = u'NA'
	1321	mobj = re.search(r'id="eow-date.?>(.?)</span>', video_webpage, re.DOTALL)
	1322	if mobj is not None:
	1323	upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
	1324	format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
	1325	for expression in format_expressions:
	1326	try:
	1327	upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
	1328	except:
	1329	pass
	1330
	1331	# description
	1332	try:
	1333	lxml.etree
	1334	except NameError:
	1335	video_description = u'No description available.'
	1336	if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
	1337	mobj = re.search(r'<meta name="description" content="(.)"(?:\s/)?>', video_webpage)
	1338	if mobj is not None:
	1339	video_description = mobj.group(1).decode('utf-8')
	1340	else:
	1341	html_parser = lxml.etree.HTMLParser(encoding='utf-8')
	1342	vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
	1343	video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
	1344	# TODO use another parser
	1345
	1346	# token
	1347	video_token = urllib.unquote_plus(video_info['token'][0])
	1348
	1349	# Decide which formats to download
	1350	req_format = self._downloader.params.get('format', None)
	1351
	1352	if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
	1353	self.report_rtmp_download()
	1354	video_url_list = [(None, video_info['conn'][0])]
	1355	elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
	1356	url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
	1357	url_data = [parse_qs(uds) for uds in url_data_strs]
	1358	url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
	1359	url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
	1360
	1361	format_limit = self._downloader.params.get('format_limit', None)
	1362	if format_limit is not None and format_limit in self._available_formats:
	1363	format_list = self._available_formats[self._available_formats.index(format_limit):]
	1364	else:
	1365	format_list = self._available_formats
	1366	existing_formats = [x for x in format_list if x in url_map]
	1367	if len(existing_formats) == 0:
	1368	self._downloader.trouble(u'ERROR: no known formats available for video')
	1369	return
	1370	if self._downloader.params.get('listformats', None):
	1371	self._print_formats(existing_formats)
	1372	return
	1373	if req_format is None or req_format == 'best':
	1374	video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
	1375	elif req_format == 'worst':
	1376	video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
	1377	elif req_format in ('-1', 'all'):
	1378	video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
	1379	else:
	1380	# Specific formats. We pick the first in a slash-delimeted sequence.
	1381	# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
	1382	req_formats = req_format.split('/')
	1383	video_url_list = None
	1384	for rf in req_formats:
	1385	if rf in url_map:
	1386	video_url_list = [(rf, url_map[rf])]
	1387	break
	1388	if video_url_list is None:
	1389	self._downloader.trouble(u'ERROR: requested format not available')
	1390	return
	1391	else:
	1392	self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
	1393	return
	1394
	1395	for format_param, video_real_url in video_url_list:
	1396	# At this point we have a new video
	1397	self._downloader.increment_downloads()
	1398
	1399	# Extension
	1400	video_extension = self._video_extensions.get(format_param, 'flv')
	1401
	1402	try:
	1403	# Process video information
	1404	self._downloader.process_info({
	1405	'id': video_id.decode('utf-8'),
	1406	'url': video_real_url.decode('utf-8'),
	1407	'uploader': video_uploader.decode('utf-8'),
	1408	'upload_date': upload_date,
	1409	'title': video_title,
	1410	'stitle': simple_title,
	1411	'ext': video_extension.decode('utf-8'),
	1412	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	1413	'thumbnail': video_thumbnail.decode('utf-8'),
	1414	'description': video_description,
	1415	'player_url': player_url,
	1416	})
	1417	except UnavailableVideoError, err:
	1418	self._downloader.trouble(u'\nERROR: unable to download video')
	1419
	1420
	1421	class MetacafeIE(InfoExtractor):
	1422	"""Information Extractor for metacafe.com."""
	1423
	1424	_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
	1425	_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
	1426	_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
	1427	_youtube_ie = None
	1428	IE_NAME = u'metacafe'
	1429
	1430	def __init__(self, youtube_ie, downloader=None):
	1431	InfoExtractor.__init__(self, downloader)
	1432	self._youtube_ie = youtube_ie
	1433
	1434	def report_disclaimer(self):
	1435	"""Report disclaimer retrieval."""
	1436	self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
	1437
	1438	def report_age_confirmation(self):
	1439	"""Report attempt to confirm age."""
	1440	self._downloader.to_screen(u'[metacafe] Confirming age')
	1441
	1442	def report_download_webpage(self, video_id):
	1443	"""Report webpage download."""
	1444	self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
	1445
	1446	def report_extraction(self, video_id):
	1447	"""Report information extraction."""
	1448	self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
	1449
	1450	def _real_initialize(self):
	1451	# Retrieve disclaimer
	1452	request = urllib2.Request(self._DISCLAIMER)
	1453	try:
	1454	self.report_disclaimer()
	1455	disclaimer = urllib2.urlopen(request).read()
	1456	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1457	self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
	1458	return
	1459
	1460	# Confirm age
	1461	disclaimer_form = {
	1462	'filters': '0',
	1463	'submit': "Continue - I'm over 18",
	1464	}
	1465	request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
	1466	try:
	1467	self.report_age_confirmation()
	1468	disclaimer = urllib2.urlopen(request).read()
	1469	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1470	self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
	1471	return
	1472
	1473	def _real_extract(self, url):
	1474	# Extract id and simplified title from URL
	1475	mobj = re.match(self._VALID_URL, url)
	1476	if mobj is None:
	1477	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1478	return
	1479
	1480	video_id = mobj.group(1)
	1481
	1482	# Check if video comes from YouTube
	1483	mobj2 = re.match(r'^yt-(.*)$', video_id)
	1484	if mobj2 is not None:
	1485	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
	1486	return
	1487
	1488	# At this point we have a new video
	1489	self._downloader.increment_downloads()
	1490
	1491	simple_title = mobj.group(2).decode('utf-8')
	1492
	1493	# Retrieve video webpage to extract further information
	1494	request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
	1495	try:
	1496	self.report_download_webpage(video_id)
	1497	webpage = urllib2.urlopen(request).read()
	1498	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1499	self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
	1500	return
	1501
	1502	# Extract URL, uploader and title from webpage
	1503	self.report_extraction(video_id)
	1504	mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
	1505	if mobj is not None:
	1506	mediaURL = urllib.unquote(mobj.group(1))
	1507	video_extension = mediaURL[-3:]
	1508
	1509	# Extract gdaKey if available
	1510	mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
	1511	if mobj is None:
	1512	video_url = mediaURL
	1513	else:
	1514	gdaKey = mobj.group(1)
	1515	video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
	1516	else:
	1517	mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
	1518	if mobj is None:
	1519	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1520	return
	1521	vardict = parse_qs(mobj.group(1))
	1522	if 'mediaData' not in vardict:
	1523	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1524	return
	1525	mobj = re.search(r'"mediaURL":"(http.?)","key":"(.?)"', vardict['mediaData'][0])
	1526	if mobj is None:
	1527	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1528	return
	1529	mediaURL = mobj.group(1).replace('\\/', '/')
	1530	video_extension = mediaURL[-3:]
	1531	video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
	1532
	1533	mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
	1534	if mobj is None:
	1535	self._downloader.trouble(u'ERROR: unable to extract title')
	1536	return
	1537	video_title = mobj.group(1).decode('utf-8')
	1538	video_title = sanitize_title(video_title)
	1539
	1540	mobj = re.search(r'(?ms)By:\s<a .?>(.+?)<', webpage)
	1541	if mobj is None:
	1542	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1543	return
	1544	video_uploader = mobj.group(1)
	1545
	1546	try:
	1547	# Process video information
	1548	self._downloader.process_info({
	1549	'id': video_id.decode('utf-8'),
	1550	'url': video_url.decode('utf-8'),
	1551	'uploader': video_uploader.decode('utf-8'),
	1552	'upload_date': u'NA',
	1553	'title': video_title,
	1554	'stitle': simple_title,
	1555	'ext': video_extension.decode('utf-8'),
	1556	'format': u'NA',
	1557	'player_url': None,
	1558	})
	1559	except UnavailableVideoError:
	1560	self._downloader.trouble(u'\nERROR: unable to download video')
	1561
	1562
	1563	class DailymotionIE(InfoExtractor):
	1564	"""Information Extractor for Dailymotion"""
	1565
	1566	_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
	1567	IE_NAME = u'dailymotion'
	1568
	1569	def __init__(self, downloader=None):
	1570	InfoExtractor.__init__(self, downloader)
	1571
	1572	def report_download_webpage(self, video_id):
	1573	"""Report webpage download."""
	1574	self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
	1575
	1576	def report_extraction(self, video_id):
	1577	"""Report information extraction."""
	1578	self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
	1579
	1580	def _real_extract(self, url):
	1581	# Extract id and simplified title from URL
	1582	mobj = re.match(self._VALID_URL, url)
	1583	if mobj is None:
	1584	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	1585	return
	1586
	1587	# At this point we have a new video
	1588	self._downloader.increment_downloads()
	1589	video_id = mobj.group(1)
	1590
	1591	simple_title = mobj.group(2).decode('utf-8')
	1592	video_extension = 'flv'
	1593
	1594	# Retrieve video webpage to extract further information
	1595	request = urllib2.Request(url)
	1596	request.add_header('Cookie', 'family_filter=off')
	1597	try:
	1598	self.report_download_webpage(video_id)
	1599	webpage = urllib2.urlopen(request).read()
	1600	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1601	self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
	1602	return
	1603
	1604	# Extract URL, uploader and title from webpage
	1605	self.report_extraction(video_id)
	1606	mobj = re.search(r'(?i)addVariable$\"sequence\"\s,\s\"([^\"]+?)\"$', webpage)
	1607	if mobj is None:
	1608	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1609	return
	1610	sequence = urllib.unquote(mobj.group(1))
	1611	mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
	1612	if mobj is None:
	1613	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1614	return
	1615	mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
	1616
	1617	# if needed add http://www.dailymotion.com/ if relative URL
	1618
	1619	video_url = mediaURL
	1620
	1621	mobj = re.search(r'(?im)<title>\s(.+)\s-\s*Video\s+Dailymotion</title>', webpage)
	1622	if mobj is None:
	1623	self._downloader.trouble(u'ERROR: unable to extract title')
	1624	return
	1625	video_title = mobj.group(1).decode('utf-8')
	1626	video_title = sanitize_title(video_title)
	1627
	1628	mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
	1629	if mobj is None:
	1630	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	1631	return
	1632	video_uploader = mobj.group(1)
	1633
	1634	try:
	1635	# Process video information
	1636	self._downloader.process_info({
	1637	'id': video_id.decode('utf-8'),
	1638	'url': video_url.decode('utf-8'),
	1639	'uploader': video_uploader.decode('utf-8'),
	1640	'upload_date': u'NA',
	1641	'title': video_title,
	1642	'stitle': simple_title,
	1643	'ext': video_extension.decode('utf-8'),
	1644	'format': u'NA',
	1645	'player_url': None,
	1646	})
	1647	except UnavailableVideoError:
	1648	self._downloader.trouble(u'\nERROR: unable to download video')
	1649
	1650
	1651	class GoogleIE(InfoExtractor):
	1652	"""Information extractor for video.google.com."""
	1653
	1654	_VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?\|co\.(?:uk\|jp\|kr\|cr)\|ca\|de\|es\|fr\|it\|nl\|pl)/videoplay\?docid=([^\&]+).*'
	1655	IE_NAME = u'video.google'
	1656
	1657	def __init__(self, downloader=None):
	1658	InfoExtractor.__init__(self, downloader)
	1659
	1660	def report_download_webpage(self, video_id):
	1661	"""Report webpage download."""
	1662	self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
	1663
	1664	def report_extraction(self, video_id):
	1665	"""Report information extraction."""
	1666	self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
	1667
	1668	def _real_extract(self, url):
	1669	# Extract id from URL
	1670	mobj = re.match(self._VALID_URL, url)
	1671	if mobj is None:
	1672	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1673	return
	1674
	1675	# At this point we have a new video
	1676	self._downloader.increment_downloads()
	1677	video_id = mobj.group(1)
	1678
	1679	video_extension = 'mp4'
	1680
	1681	# Retrieve video webpage to extract further information
	1682	request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
	1683	try:
	1684	self.report_download_webpage(video_id)
	1685	webpage = urllib2.urlopen(request).read()
	1686	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1687	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1688	return
	1689
	1690	# Extract URL, uploader, and title from webpage
	1691	self.report_extraction(video_id)
	1692	mobj = re.search(r"download_url:'([^']+)'", webpage)
	1693	if mobj is None:
	1694	video_extension = 'flv'
	1695	mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
	1696	if mobj is None:
	1697	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1698	return
	1699	mediaURL = urllib.unquote(mobj.group(1))
	1700	mediaURL = mediaURL.replace('\\x3d', '\x3d')
	1701	mediaURL = mediaURL.replace('\\x26', '\x26')
	1702
	1703	video_url = mediaURL
	1704
	1705	mobj = re.search(r'<title>(.*)</title>', webpage)
	1706	if mobj is None:
	1707	self._downloader.trouble(u'ERROR: unable to extract title')
	1708	return
	1709	video_title = mobj.group(1).decode('utf-8')
	1710	video_title = sanitize_title(video_title)
	1711	simple_title = _simplify_title(video_title)
	1712
	1713	# Extract video description
	1714	mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
	1715	if mobj is None:
	1716	self._downloader.trouble(u'ERROR: unable to extract video description')
	1717	return
	1718	video_description = mobj.group(1).decode('utf-8')
	1719	if not video_description:
	1720	video_description = 'No description available.'
	1721
	1722	# Extract video thumbnail
	1723	if self._downloader.params.get('forcethumbnail', False):
	1724	request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
	1725	try:
	1726	webpage = urllib2.urlopen(request).read()
	1727	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1728	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1729	return
	1730	mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
	1731	if mobj is None:
	1732	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	1733	return
	1734	video_thumbnail = mobj.group(1)
	1735	else: # we need something to pass to process_info
	1736	video_thumbnail = ''
	1737
	1738	try:
	1739	# Process video information
	1740	self._downloader.process_info({
	1741	'id': video_id.decode('utf-8'),
	1742	'url': video_url.decode('utf-8'),
	1743	'uploader': u'NA',
	1744	'upload_date': u'NA',
	1745	'title': video_title,
	1746	'stitle': simple_title,
	1747	'ext': video_extension.decode('utf-8'),
	1748	'format': u'NA',
	1749	'player_url': None,
	1750	})
	1751	except UnavailableVideoError:
	1752	self._downloader.trouble(u'\nERROR: unable to download video')
	1753
	1754
	1755	class PhotobucketIE(InfoExtractor):
	1756	"""Information extractor for photobucket.com."""
	1757
	1758	_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.[\?\&]current=(.\.flv)'
	1759	IE_NAME = u'photobucket'
	1760
	1761	def __init__(self, downloader=None):
	1762	InfoExtractor.__init__(self, downloader)
	1763
	1764	def report_download_webpage(self, video_id):
	1765	"""Report webpage download."""
	1766	self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
	1767
	1768	def report_extraction(self, video_id):
	1769	"""Report information extraction."""
	1770	self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
	1771
	1772	def _real_extract(self, url):
	1773	# Extract id from URL
	1774	mobj = re.match(self._VALID_URL, url)
	1775	if mobj is None:
	1776	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1777	return
	1778
	1779	# At this point we have a new video
	1780	self._downloader.increment_downloads()
	1781	video_id = mobj.group(1)
	1782
	1783	video_extension = 'flv'
	1784
	1785	# Retrieve video webpage to extract further information
	1786	request = urllib2.Request(url)
	1787	try:
	1788	self.report_download_webpage(video_id)
	1789	webpage = urllib2.urlopen(request).read()
	1790	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1791	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1792	return
	1793
	1794	# Extract URL, uploader, and title from webpage
	1795	self.report_extraction(video_id)
	1796	mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
	1797	if mobj is None:
	1798	self._downloader.trouble(u'ERROR: unable to extract media URL')
	1799	return
	1800	mediaURL = urllib.unquote(mobj.group(1))
	1801
	1802	video_url = mediaURL
	1803
	1804	mobj = re.search(r'<title>(.) video by (.) - Photobucket</title>', webpage)
	1805	if mobj is None:
	1806	self._downloader.trouble(u'ERROR: unable to extract title')
	1807	return
	1808	video_title = mobj.group(1).decode('utf-8')
	1809	video_title = sanitize_title(video_title)
	1810	simple_title = _simplify_title(vide_title)
	1811
	1812	video_uploader = mobj.group(2).decode('utf-8')
	1813
	1814	try:
	1815	# Process video information
	1816	self._downloader.process_info({
	1817	'id': video_id.decode('utf-8'),
	1818	'url': video_url.decode('utf-8'),
	1819	'uploader': video_uploader,
	1820	'upload_date': u'NA',
	1821	'title': video_title,
	1822	'stitle': simple_title,
	1823	'ext': video_extension.decode('utf-8'),
	1824	'format': u'NA',
	1825	'player_url': None,
	1826	})
	1827	except UnavailableVideoError:
	1828	self._downloader.trouble(u'\nERROR: unable to download video')
	1829
	1830
	1831	class YahooIE(InfoExtractor):
	1832	"""Information extractor for video.yahoo.com."""
	1833
	1834	# _VALID_URL matches all Yahoo! Video URLs
	1835	# _VPAGE_URL matches only the extractable '/watch/' URLs
	1836	_VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch\|network)/([0-9]+)(?:/\|\?v=)([0-9]+)(?:[#\?].*)?'
	1837	_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
	1838	IE_NAME = u'video.yahoo'
	1839
	1840	def __init__(self, downloader=None):
	1841	InfoExtractor.__init__(self, downloader)
	1842
	1843	def report_download_webpage(self, video_id):
	1844	"""Report webpage download."""
	1845	self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
	1846
	1847	def report_extraction(self, video_id):
	1848	"""Report information extraction."""
	1849	self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
	1850
	1851	def _real_extract(self, url, new_video=True):
	1852	# Extract ID from URL
	1853	mobj = re.match(self._VALID_URL, url)
	1854	if mobj is None:
	1855	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	1856	return
	1857
	1858	# At this point we have a new video
	1859	self._downloader.increment_downloads()
	1860	video_id = mobj.group(2)
	1861	video_extension = 'flv'
	1862
	1863	# Rewrite valid but non-extractable URLs as
	1864	# extractable English language /watch/ URLs
	1865	if re.match(self._VPAGE_URL, url) is None:
	1866	request = urllib2.Request(url)
	1867	try:
	1868	webpage = urllib2.urlopen(request).read()
	1869	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1870	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1871	return
	1872
	1873	mobj = re.search(r'$"id", "([0-9]+)"$;', webpage)
	1874	if mobj is None:
	1875	self._downloader.trouble(u'ERROR: Unable to extract id field')
	1876	return
	1877	yahoo_id = mobj.group(1)
	1878
	1879	mobj = re.search(r'$"vid", "([0-9]+)"$;', webpage)
	1880	if mobj is None:
	1881	self._downloader.trouble(u'ERROR: Unable to extract vid field')
	1882	return
	1883	yahoo_vid = mobj.group(1)
	1884
	1885	url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
	1886	return self._real_extract(url, new_video=False)
	1887
	1888	# Retrieve video webpage to extract further information
	1889	request = urllib2.Request(url)
	1890	try:
	1891	self.report_download_webpage(video_id)
	1892	webpage = urllib2.urlopen(request).read()
	1893	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1894	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1895	return
	1896
	1897	# Extract uploader and title from webpage
	1898	self.report_extraction(video_id)
	1899	mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
	1900	if mobj is None:
	1901	self._downloader.trouble(u'ERROR: unable to extract video title')
	1902	return
	1903	video_title = mobj.group(1).decode('utf-8')
	1904	simple_title = _simplify_title(video_title)
	1905
	1906	mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people\|profile)/[0-9]+" beacon=".">(.)</a></h2>', webpage)
	1907	if mobj is None:
	1908	self._downloader.trouble(u'ERROR: unable to extract video uploader')
	1909	return
	1910	video_uploader = mobj.group(1).decode('utf-8')
	1911
	1912	# Extract video thumbnail
	1913	mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
	1914	if mobj is None:
	1915	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	1916	return
	1917	video_thumbnail = mobj.group(1).decode('utf-8')
	1918
	1919	# Extract video description
	1920	mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
	1921	if mobj is None:
	1922	self._downloader.trouble(u'ERROR: unable to extract video description')
	1923	return
	1924	video_description = mobj.group(1).decode('utf-8')
	1925	if not video_description:
	1926	video_description = 'No description available.'
	1927
	1928	# Extract video height and width
	1929	mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
	1930	if mobj is None:
	1931	self._downloader.trouble(u'ERROR: unable to extract video height')
	1932	return
	1933	yv_video_height = mobj.group(1)
	1934
	1935	mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
	1936	if mobj is None:
	1937	self._downloader.trouble(u'ERROR: unable to extract video width')
	1938	return
	1939	yv_video_width = mobj.group(1)
	1940
	1941	# Retrieve video playlist to extract media URL
	1942	# I'm not completely sure what all these options are, but we
	1943	# seem to need most of them, otherwise the server sends a 401.
	1944	yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
	1945	yv_bitrate = '700' # according to Wikipedia this is hard-coded
	1946	request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
	1947	'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
	1948	'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
	1949	try:
	1950	self.report_download_webpage(video_id)
	1951	webpage = urllib2.urlopen(request).read()
	1952	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	1953	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	1954	return
	1955
	1956	# Extract media URL from playlist XML
	1957	mobj = re.search(r'<STREAM APP="(http://.)" FULLPATH="/?(/.\.flv\?[^"]*)"', webpage)
	1958	if mobj is None:
	1959	self._downloader.trouble(u'ERROR: Unable to extract media URL')
	1960	return
	1961	video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
	1962	video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
	1963
	1964	try:
	1965	# Process video information
	1966	self._downloader.process_info({
	1967	'id': video_id.decode('utf-8'),
	1968	'url': video_url,
	1969	'uploader': video_uploader,
	1970	'upload_date': u'NA',
	1971	'title': video_title,
	1972	'stitle': simple_title,
	1973	'ext': video_extension.decode('utf-8'),
	1974	'thumbnail': video_thumbnail.decode('utf-8'),
	1975	'description': video_description,
	1976	'thumbnail': video_thumbnail,
	1977	'player_url': None,
	1978	})
	1979	except UnavailableVideoError:
	1980	self._downloader.trouble(u'\nERROR: unable to download video')
	1981
	1982
	1983	class VimeoIE(InfoExtractor):
	1984	"""Information extractor for vimeo.com."""
	1985
	1986	# _VALID_URL matches Vimeo URLs
	1987	_VALID_URL = r'(?:https?://)?(?:(?:www\|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
	1988	IE_NAME = u'vimeo'
	1989
	1990	def __init__(self, downloader=None):
	1991	InfoExtractor.__init__(self, downloader)
	1992
	1993	def report_download_webpage(self, video_id):
	1994	"""Report webpage download."""
	1995	self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
	1996
	1997	def report_extraction(self, video_id):
	1998	"""Report information extraction."""
	1999	self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
	2000
	2001	def _real_extract(self, url, new_video=True):
	2002	# Extract ID from URL
	2003	mobj = re.match(self._VALID_URL, url)
	2004	if mobj is None:
	2005	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2006	return
	2007
	2008	# At this point we have a new video
	2009	self._downloader.increment_downloads()
	2010	video_id = mobj.group(1)
	2011
	2012	# Retrieve video webpage to extract further information
	2013	request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
	2014	try:
	2015	self.report_download_webpage(video_id)
	2016	webpage = urllib2.urlopen(request).read()
	2017	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2018	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	2019	return
	2020
	2021	# Now we begin extracting as much information as we can from what we
	2022	# retrieved. First we extract the information common to all extractors,
	2023	# and latter we extract those that are Vimeo specific.
	2024	self.report_extraction(video_id)
	2025
	2026	# Extract title
	2027	mobj = re.search(r'<caption>(.*?)</caption>', webpage)
	2028	if mobj is None:
	2029	self._downloader.trouble(u'ERROR: unable to extract video title')
	2030	return
	2031	video_title = mobj.group(1).decode('utf-8')
	2032	simple_title = _simplify_title(video_title)
	2033
	2034	# Extract uploader
	2035	mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
	2036	if mobj is None:
	2037	self._downloader.trouble(u'ERROR: unable to extract video uploader')
	2038	return
	2039	video_uploader = mobj.group(1).decode('utf-8')
	2040
	2041	# Extract video thumbnail
	2042	mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
	2043	if mobj is None:
	2044	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	2045	return
	2046	video_thumbnail = mobj.group(1).decode('utf-8')
	2047
	2048	# # Extract video description
	2049	# mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
	2050	# if mobj is None:
	2051	# self._downloader.trouble(u'ERROR: unable to extract video description')
	2052	# return
	2053	# video_description = mobj.group(1).decode('utf-8')
	2054	# if not video_description: video_description = 'No description available.'
	2055	video_description = 'Foo.'
	2056
	2057	# Vimeo specific: extract request signature
	2058	mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
	2059	if mobj is None:
	2060	self._downloader.trouble(u'ERROR: unable to extract request signature')
	2061	return
	2062	sig = mobj.group(1).decode('utf-8')
	2063
	2064	# Vimeo specific: extract video quality information
	2065	mobj = re.search(r'<isHD>(\d+)</isHD>', webpage)
	2066	if mobj is None:
	2067	self._downloader.trouble(u'ERROR: unable to extract video quality information')
	2068	return
	2069	quality = mobj.group(1).decode('utf-8')
	2070
	2071	if int(quality) == 1:
	2072	quality = 'hd'
	2073	else:
	2074	quality = 'sd'
	2075
	2076	# Vimeo specific: Extract request signature expiration
	2077	mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
	2078	if mobj is None:
	2079	self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
	2080	return
	2081	sig_exp = mobj.group(1).decode('utf-8')
	2082
	2083	video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id, sig, sig_exp, quality)
	2084
	2085	try:
	2086	# Process video information
	2087	self._downloader.process_info({
	2088	'id': video_id.decode('utf-8'),
	2089	'url': video_url,
	2090	'uploader': video_uploader,
	2091	'upload_date': u'NA',
	2092	'title': video_title,
	2093	'stitle': simple_title,
	2094	'ext': u'mp4',
	2095	'thumbnail': video_thumbnail.decode('utf-8'),
	2096	'description': video_description,
	2097	'thumbnail': video_thumbnail,
	2098	'description': video_description,
	2099	'player_url': None,
	2100	})
	2101	except UnavailableVideoError:
	2102	self._downloader.trouble(u'ERROR: unable to download video')
	2103
	2104
	2105	class GenericIE(InfoExtractor):
	2106	"""Generic last-resort information extractor."""
	2107
	2108	_VALID_URL = r'.*'
	2109	IE_NAME = u'generic'
	2110
	2111	def __init__(self, downloader=None):
	2112	InfoExtractor.__init__(self, downloader)
	2113
	2114	def report_download_webpage(self, video_id):
	2115	"""Report webpage download."""
	2116	self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
	2117	self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
	2118
	2119	def report_extraction(self, video_id):
	2120	"""Report information extraction."""
	2121	self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
	2122
	2123	def _real_extract(self, url):
	2124	# At this point we have a new video
	2125	self._downloader.increment_downloads()
	2126
	2127	video_id = url.split('/')[-1]
	2128	request = urllib2.Request(url)
	2129	try:
	2130	self.report_download_webpage(video_id)
	2131	webpage = urllib2.urlopen(request).read()
	2132	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2133	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	2134	return
	2135	except ValueError, err:
	2136	# since this is the last-resort InfoExtractor, if
	2137	# this error is thrown, it'll be thrown here
	2138	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2139	return
	2140
	2141	self.report_extraction(video_id)
	2142	# Start with something easy: JW Player in SWFObject
	2143	mobj = re.search(r'flashvars: [\'"](?:.&)?file=(http[^\'"&])', webpage)
	2144	if mobj is None:
	2145	# Broaden the search a little bit
	2146	mobj = re.search(r'[^A-Za-z0-9]?(?:file\|source)=(http[^\'"&]*)', webpage)
	2147	if mobj is None:
	2148	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2149	return
	2150
	2151	# It's possible that one of the regexes
	2152	# matched, but returned an empty group:
	2153	if mobj.group(1) is None:
	2154	self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
	2155	return
	2156
	2157	video_url = urllib.unquote(mobj.group(1))
	2158	video_id = os.path.basename(video_url)
	2159
	2160	# here's a fun little line of code for you:
	2161	video_extension = os.path.splitext(video_id)[1][1:]
	2162	video_id = os.path.splitext(video_id)[0]
	2163
	2164	# it's tempting to parse this further, but you would
	2165	# have to take into account all the variations like
	2166	# Video Title - Site Name
	2167	# Site Name \| Video Title
	2168	# Video Title - Tagline \| Site Name
	2169	# and so on and so forth; it's just not practical
	2170	mobj = re.search(r'<title>(.*)</title>', webpage)
	2171	if mobj is None:
	2172	self._downloader.trouble(u'ERROR: unable to extract title')
	2173	return
	2174	video_title = mobj.group(1).decode('utf-8')
	2175	video_title = sanitize_title(video_title)
	2176	simple_title = _simplify_title(video_title)
	2177
	2178	# video uploader is domain name
	2179	mobj = re.match(r'(?:https?://)?([^/])/.', url)
	2180	if mobj is None:
	2181	self._downloader.trouble(u'ERROR: unable to extract title')
	2182	return
	2183	video_uploader = mobj.group(1).decode('utf-8')
	2184
	2185	try:
	2186	# Process video information
	2187	self._downloader.process_info({
	2188	'id': video_id.decode('utf-8'),
	2189	'url': video_url.decode('utf-8'),
	2190	'uploader': video_uploader,
	2191	'upload_date': u'NA',
	2192	'title': video_title,
	2193	'stitle': simple_title,
	2194	'ext': video_extension.decode('utf-8'),
	2195	'format': u'NA',
	2196	'player_url': None,
	2197	})
	2198	except UnavailableVideoError, err:
	2199	self._downloader.trouble(u'\nERROR: unable to download video')
	2200
	2201
	2202	class YoutubeSearchIE(InfoExtractor):
	2203	"""Information Extractor for YouTube search queries."""
	2204	_VALID_URL = r'ytsearch(\d+\|all)?:[\s\S]+'
	2205	_TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
	2206	_VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
	2207	_MORE_PAGES_INDICATOR = r'(?m)>\sNext\s</a>'
	2208	_youtube_ie = None
	2209	_max_youtube_results = 1000
	2210	IE_NAME = u'youtube:search'
	2211
	2212	def __init__(self, youtube_ie, downloader=None):
	2213	InfoExtractor.__init__(self, downloader)
	2214	self._youtube_ie = youtube_ie
	2215
	2216	def report_download_page(self, query, pagenum):
	2217	"""Report attempt to download playlist page with given number."""
	2218	query = query.decode(preferredencoding())
	2219	self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
	2220
	2221	def _real_initialize(self):
	2222	self._youtube_ie.initialize()
	2223
	2224	def _real_extract(self, query):
	2225	mobj = re.match(self._VALID_URL, query)
	2226	if mobj is None:
	2227	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2228	return
	2229
	2230	prefix, query = query.split(':')
	2231	prefix = prefix[8:]
	2232	query = query.encode('utf-8')
	2233	if prefix == '':
	2234	self._download_n_results(query, 1)
	2235	return
	2236	elif prefix == 'all':
	2237	self._download_n_results(query, self._max_youtube_results)
	2238	return
	2239	else:
	2240	try:
	2241	n = long(prefix)
	2242	if n <= 0:
	2243	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2244	return
	2245	elif n > self._max_youtube_results:
	2246	self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
	2247	n = self._max_youtube_results
	2248	self._download_n_results(query, n)
	2249	return
	2250	except ValueError: # parsing prefix as integer fails
	2251	self._download_n_results(query, 1)
	2252	return
	2253
	2254	def _download_n_results(self, query, n):
	2255	"""Downloads a specified number of results for a query"""
	2256
	2257	video_ids = []
	2258	already_seen = set()
	2259	pagenum = 1
	2260
	2261	while True:
	2262	self.report_download_page(query, pagenum)
	2263	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2264	request = urllib2.Request(result_url)
	2265	try:
	2266	page = urllib2.urlopen(request).read()
	2267	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2268	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2269	return
	2270
	2271	# Extract video identifiers
	2272	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2273	video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
	2274	if video_id not in already_seen:
	2275	video_ids.append(video_id)
	2276	already_seen.add(video_id)
	2277	if len(video_ids) == n:
	2278	# Specified n videos reached
	2279	for id in video_ids:
	2280	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2281	return
	2282
	2283	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2284	for id in video_ids:
	2285	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2286	return
	2287
	2288	pagenum = pagenum + 1
	2289
	2290
	2291	class GoogleSearchIE(InfoExtractor):
	2292	"""Information Extractor for Google Video search queries."""
	2293	_VALID_URL = r'gvsearch(\d+\|all)?:[\s\S]+'
	2294	_TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
	2295	_VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
	2296	_MORE_PAGES_INDICATOR = r'<span>Next</span>'
	2297	_google_ie = None
	2298	_max_google_results = 1000
	2299	IE_NAME = u'video.google:search'
	2300
	2301	def __init__(self, google_ie, downloader=None):
	2302	InfoExtractor.__init__(self, downloader)
	2303	self._google_ie = google_ie
	2304
	2305	def report_download_page(self, query, pagenum):
	2306	"""Report attempt to download playlist page with given number."""
	2307	query = query.decode(preferredencoding())
	2308	self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
	2309
	2310	def _real_initialize(self):
	2311	self._google_ie.initialize()
	2312
	2313	def _real_extract(self, query):
	2314	mobj = re.match(self._VALID_URL, query)
	2315	if mobj is None:
	2316	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2317	return
	2318
	2319	prefix, query = query.split(':')
	2320	prefix = prefix[8:]
	2321	query = query.encode('utf-8')
	2322	if prefix == '':
	2323	self._download_n_results(query, 1)
	2324	return
	2325	elif prefix == 'all':
	2326	self._download_n_results(query, self._max_google_results)
	2327	return
	2328	else:
	2329	try:
	2330	n = long(prefix)
	2331	if n <= 0:
	2332	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2333	return
	2334	elif n > self._max_google_results:
	2335	self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
	2336	n = self._max_google_results
	2337	self._download_n_results(query, n)
	2338	return
	2339	except ValueError: # parsing prefix as integer fails
	2340	self._download_n_results(query, 1)
	2341	return
	2342
	2343	def _download_n_results(self, query, n):
	2344	"""Downloads a specified number of results for a query"""
	2345
	2346	video_ids = []
	2347	already_seen = set()
	2348	pagenum = 1
	2349
	2350	while True:
	2351	self.report_download_page(query, pagenum)
	2352	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2353	request = urllib2.Request(result_url)
	2354	try:
	2355	page = urllib2.urlopen(request).read()
	2356	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2357	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2358	return
	2359
	2360	# Extract video identifiers
	2361	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2362	video_id = mobj.group(1)
	2363	if video_id not in already_seen:
	2364	video_ids.append(video_id)
	2365	already_seen.add(video_id)
	2366	if len(video_ids) == n:
	2367	# Specified n videos reached
	2368	for id in video_ids:
	2369	self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
	2370	return
	2371
	2372	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2373	for id in video_ids:
	2374	self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
	2375	return
	2376
	2377	pagenum = pagenum + 1
	2378
	2379
	2380	class YahooSearchIE(InfoExtractor):
	2381	"""Information Extractor for Yahoo! Video search queries."""
	2382	_VALID_URL = r'yvsearch(\d+\|all)?:[\s\S]+'
	2383	_TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
	2384	_VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
	2385	_MORE_PAGES_INDICATOR = r'\s*Next'
	2386	_yahoo_ie = None
	2387	_max_yahoo_results = 1000
	2388	IE_NAME = u'video.yahoo:search'
	2389
	2390	def __init__(self, yahoo_ie, downloader=None):
	2391	InfoExtractor.__init__(self, downloader)
	2392	self._yahoo_ie = yahoo_ie
	2393
	2394	def report_download_page(self, query, pagenum):
	2395	"""Report attempt to download playlist page with given number."""
	2396	query = query.decode(preferredencoding())
	2397	self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
	2398
	2399	def _real_initialize(self):
	2400	self._yahoo_ie.initialize()
	2401
	2402	def _real_extract(self, query):
	2403	mobj = re.match(self._VALID_URL, query)
	2404	if mobj is None:
	2405	self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
	2406	return
	2407
	2408	prefix, query = query.split(':')
	2409	prefix = prefix[8:]
	2410	query = query.encode('utf-8')
	2411	if prefix == '':
	2412	self._download_n_results(query, 1)
	2413	return
	2414	elif prefix == 'all':
	2415	self._download_n_results(query, self._max_yahoo_results)
	2416	return
	2417	else:
	2418	try:
	2419	n = long(prefix)
	2420	if n <= 0:
	2421	self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
	2422	return
	2423	elif n > self._max_yahoo_results:
	2424	self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
	2425	n = self._max_yahoo_results
	2426	self._download_n_results(query, n)
	2427	return
	2428	except ValueError: # parsing prefix as integer fails
	2429	self._download_n_results(query, 1)
	2430	return
	2431
	2432	def _download_n_results(self, query, n):
	2433	"""Downloads a specified number of results for a query"""
	2434
	2435	video_ids = []
	2436	already_seen = set()
	2437	pagenum = 1
	2438
	2439	while True:
	2440	self.report_download_page(query, pagenum)
	2441	result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
	2442	request = urllib2.Request(result_url)
	2443	try:
	2444	page = urllib2.urlopen(request).read()
	2445	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2446	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2447	return
	2448
	2449	# Extract video identifiers
	2450	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2451	video_id = mobj.group(1)
	2452	if video_id not in already_seen:
	2453	video_ids.append(video_id)
	2454	already_seen.add(video_id)
	2455	if len(video_ids) == n:
	2456	# Specified n videos reached
	2457	for id in video_ids:
	2458	self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
	2459	return
	2460
	2461	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2462	for id in video_ids:
	2463	self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
	2464	return
	2465
	2466	pagenum = pagenum + 1
	2467
	2468
	2469	class YoutubePlaylistIE(InfoExtractor):
	2470	"""Information Extractor for YouTube playlists."""
	2471
	2472	_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course\|view_play_list\|my_playlists\|artist\|playlist)\?.?(p\|a\|list)=\|user/.?/user/\|p/\|user/.?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.?/([0-9A-Za-z_-]+))?.*'
	2473	_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
	2474	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
	2475	_MORE_PAGES_INDICATOR = r'(?m)>\sNext\s</a>'
	2476	_youtube_ie = None
	2477	IE_NAME = u'youtube:playlist'
	2478
	2479	def __init__(self, youtube_ie, downloader=None):
	2480	InfoExtractor.__init__(self, downloader)
	2481	self._youtube_ie = youtube_ie
	2482
	2483	def report_download_page(self, playlist_id, pagenum):
	2484	"""Report attempt to download playlist page with given number."""
	2485	self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
	2486
	2487	def _real_initialize(self):
	2488	self._youtube_ie.initialize()
	2489
	2490	def _real_extract(self, url):
	2491	# Extract playlist id
	2492	mobj = re.match(self._VALID_URL, url)
	2493	if mobj is None:
	2494	self._downloader.trouble(u'ERROR: invalid url: %s' % url)
	2495	return
	2496
	2497	# Single video case
	2498	if mobj.group(3) is not None:
	2499	self._youtube_ie.extract(mobj.group(3))
	2500	return
	2501
	2502	# Download playlist pages
	2503	# prefix is 'p' as default for playlists but there are other types that need extra care
	2504	playlist_prefix = mobj.group(1)
	2505	if playlist_prefix == 'a':
	2506	playlist_access = 'artist'
	2507	else:
	2508	playlist_prefix = 'p'
	2509	playlist_access = 'view_play_list'
	2510	playlist_id = mobj.group(2)
	2511	video_ids = []
	2512	pagenum = 1
	2513
	2514	while True:
	2515	self.report_download_page(playlist_id, pagenum)
	2516	url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
	2517	request = urllib2.Request(url)
	2518	try:
	2519	page = urllib2.urlopen(request).read()
	2520	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2521	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2522	return
	2523
	2524	# Extract video identifiers
	2525	ids_in_page = []
	2526	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2527	if mobj.group(1) not in ids_in_page:
	2528	ids_in_page.append(mobj.group(1))
	2529	video_ids.extend(ids_in_page)
	2530
	2531	if re.search(self._MORE_PAGES_INDICATOR, page) is None:
	2532	break
	2533	pagenum = pagenum + 1
	2534
	2535	playliststart = self._downloader.params.get('playliststart', 1) - 1
	2536	playlistend = self._downloader.params.get('playlistend', -1)
	2537	video_ids = video_ids[playliststart:playlistend]
	2538
	2539	for id in video_ids:
	2540	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
	2541	return
	2542
	2543
	2544	class YoutubeUserIE(InfoExtractor):
	2545	"""Information Extractor for YouTube users."""
	2546
	2547	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)\|ytuser:)([A-Za-z0-9_-]+)'
	2548	_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
	2549	_GDATA_PAGE_SIZE = 50
	2550	_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
	2551	_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
	2552	_youtube_ie = None
	2553	IE_NAME = u'youtube:user'
	2554
	2555	def __init__(self, youtube_ie, downloader=None):
	2556	InfoExtractor.__init__(self, downloader)
	2557	self._youtube_ie = youtube_ie
	2558
	2559	def report_download_page(self, username, start_index):
	2560	"""Report attempt to download user page."""
	2561	self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
	2562	(username, start_index, start_index + self._GDATA_PAGE_SIZE))
	2563
	2564	def _real_initialize(self):
	2565	self._youtube_ie.initialize()
	2566
	2567	def _real_extract(self, url):
	2568	# Extract username
	2569	mobj = re.match(self._VALID_URL, url)
	2570	if mobj is None:
	2571	self._downloader.trouble(u'ERROR: invalid url: %s' % url)
	2572	return
	2573
	2574	username = mobj.group(1)
	2575
	2576	# Download video ids using YouTube Data API. Result size per
	2577	# query is limited (currently to 50 videos) so we need to query
	2578	# page by page until there are no video ids - it means we got
	2579	# all of them.
	2580
	2581	video_ids = []
	2582	pagenum = 0
	2583
	2584	while True:
	2585	start_index = pagenum * self._GDATA_PAGE_SIZE + 1
	2586	self.report_download_page(username, start_index)
	2587
	2588	request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
	2589
	2590	try:
	2591	page = urllib2.urlopen(request).read()
	2592	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2593	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
	2594	return
	2595
	2596	# Extract video identifiers
	2597	ids_in_page = []
	2598
	2599	for mobj in re.finditer(self._VIDEO_INDICATOR, page):
	2600	if mobj.group(1) not in ids_in_page:
	2601	ids_in_page.append(mobj.group(1))
	2602
	2603	video_ids.extend(ids_in_page)
	2604
	2605	# A little optimization - if current page is not
	2606	# "full", ie. does not contain PAGE_SIZE video ids then
	2607	# we can assume that this page is the last one - there
	2608	# are no more ids on further pages - no need to query
	2609	# again.
	2610
	2611	if len(ids_in_page) < self._GDATA_PAGE_SIZE:
	2612	break
	2613
	2614	pagenum += 1
	2615
	2616	all_ids_count = len(video_ids)
	2617	playliststart = self._downloader.params.get('playliststart', 1) - 1
	2618	playlistend = self._downloader.params.get('playlistend', -1)
	2619
	2620	if playlistend == -1:
	2621	video_ids = video_ids[playliststart:]
	2622	else:
	2623	video_ids = video_ids[playliststart:playlistend]
	2624
	2625	self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
	2626	(username, all_ids_count, len(video_ids)))
	2627
	2628	for video_id in video_ids:
	2629	self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
	2630
	2631
	2632	class DepositFilesIE(InfoExtractor):
	2633	"""Information extractor for depositfiles.com"""
	2634
	2635	_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
	2636	IE_NAME = u'DepositFiles'
	2637
	2638	def __init__(self, downloader=None):
	2639	InfoExtractor.__init__(self, downloader)
	2640
	2641	def report_download_webpage(self, file_id):
	2642	"""Report webpage download."""
	2643	self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
	2644
	2645	def report_extraction(self, file_id):
	2646	"""Report information extraction."""
	2647	self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
	2648
	2649	def _real_extract(self, url):
	2650	# At this point we have a new file
	2651	self._downloader.increment_downloads()
	2652
	2653	file_id = url.split('/')[-1]
	2654	# Rebuild url in english locale
	2655	url = 'http://depositfiles.com/en/files/' + file_id
	2656
	2657	# Retrieve file webpage with 'Free download' button pressed
	2658	free_download_indication = { 'gateway_result' : '1' }
	2659	request = urllib2.Request(url, urllib.urlencode(free_download_indication))
	2660	try:
	2661	self.report_download_webpage(file_id)
	2662	webpage = urllib2.urlopen(request).read()
	2663	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2664	self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
	2665	return
	2666
	2667	# Search for the real file URL
	2668	mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
	2669	if (mobj is None) or (mobj.group(1) is None):
	2670	# Try to figure out reason of the error.
	2671	mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
	2672	if (mobj is not None) and (mobj.group(1) is not None):
	2673	restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
	2674	self._downloader.trouble(u'ERROR: %s' % restriction_message)
	2675	else:
	2676	self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
	2677	return
	2678
	2679	file_url = mobj.group(1)
	2680	file_extension = os.path.splitext(file_url)[1][1:]
	2681
	2682	# Search for file title
	2683	mobj = re.search(r'<b title="(.*?)">', webpage)
	2684	if mobj is None:
	2685	self._downloader.trouble(u'ERROR: unable to extract title')
	2686	return
	2687	file_title = mobj.group(1).decode('utf-8')
	2688
	2689	try:
	2690	# Process file information
	2691	self._downloader.process_info({
	2692	'id': file_id.decode('utf-8'),
	2693	'url': file_url.decode('utf-8'),
	2694	'uploader': u'NA',
	2695	'upload_date': u'NA',
	2696	'title': file_title,
	2697	'stitle': file_title,
	2698	'ext': file_extension.decode('utf-8'),
	2699	'format': u'NA',
	2700	'player_url': None,
	2701	})
	2702	except UnavailableVideoError, err:
	2703	self._downloader.trouble(u'ERROR: unable to download file')
	2704
	2705
	2706	class FacebookIE(InfoExtractor):
	2707	"""Information Extractor for Facebook"""
	2708
	2709	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video\|photo)\.php\?(?:.?)v=(?P<ID>\d+)(?:.)'
	2710	_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
	2711	_NETRC_MACHINE = 'facebook'
	2712	_available_formats = ['video', 'highqual', 'lowqual']
	2713	_video_extensions = {
	2714	'video': 'mp4',
	2715	'highqual': 'mp4',
	2716	'lowqual': 'mp4',
	2717	}
	2718	IE_NAME = u'facebook'
	2719
	2720	def __init__(self, downloader=None):
	2721	InfoExtractor.__init__(self, downloader)
	2722
	2723	def _reporter(self, message):
	2724	"""Add header and report message."""
	2725	self._downloader.to_screen(u'[facebook] %s' % message)
	2726
	2727	def report_login(self):
	2728	"""Report attempt to log in."""
	2729	self._reporter(u'Logging in')
	2730
	2731	def report_video_webpage_download(self, video_id):
	2732	"""Report attempt to download video webpage."""
	2733	self._reporter(u'%s: Downloading video webpage' % video_id)
	2734
	2735	def report_information_extraction(self, video_id):
	2736	"""Report attempt to extract video information."""
	2737	self._reporter(u'%s: Extracting video information' % video_id)
	2738
	2739	def _parse_page(self, video_webpage):
	2740	"""Extract video information from page"""
	2741	# General data
	2742	data = {'title': r'$"video_title", "(.*?)"$',
	2743	'description': r'<div class="datawrap">(.*?)</div>',
	2744	'owner': r'$"video_owner_name", "(.*?)"$',
	2745	'thumbnail': r'$"thumb_url", "(?P<THUMB>.*?)"$',
	2746	}
	2747	video_info = {}
	2748	for piece in data.keys():
	2749	mobj = re.search(data[piece], video_webpage)
	2750	if mobj is not None:
	2751	video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
	2752
	2753	# Video urls
	2754	video_urls = {}
	2755	for fmt in self._available_formats:
	2756	mobj = re.search(r'$"%s_src\", "(.+?)"$' % fmt, video_webpage)
	2757	if mobj is not None:
	2758	# URL is in a Javascript segment inside an escaped Unicode format within
	2759	# the generally utf-8 page
	2760	video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
	2761	video_info['video_urls'] = video_urls
	2762
	2763	return video_info
	2764
	2765	def _real_initialize(self):
	2766	if self._downloader is None:
	2767	return
	2768
	2769	useremail = None
	2770	password = None
	2771	downloader_params = self._downloader.params
	2772
	2773	# Attempt to use provided username and password or .netrc data
	2774	if downloader_params.get('username', None) is not None:
	2775	useremail = downloader_params['username']
	2776	password = downloader_params['password']
	2777	elif downloader_params.get('usenetrc', False):
	2778	try:
	2779	info = netrc.netrc().authenticators(self._NETRC_MACHINE)
	2780	if info is not None:
	2781	useremail = info[0]
	2782	password = info[2]
	2783	else:
	2784	raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
	2785	except (IOError, netrc.NetrcParseError), err:
	2786	self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
	2787	return
	2788
	2789	if useremail is None:
	2790	return
	2791
	2792	# Log in
	2793	login_form = {
	2794	'email': useremail,
	2795	'pass': password,
	2796	'login': 'Log+In'
	2797	}
	2798	request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
	2799	try:
	2800	self.report_login()
	2801	login_results = urllib2.urlopen(request).read()
	2802	if re.search(r'<form(.)name="login"(.)</form>', login_results) is not None:
	2803	self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
	2804	return
	2805	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2806	self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
	2807	return
	2808
	2809	def _real_extract(self, url):
	2810	mobj = re.match(self._VALID_URL, url)
	2811	if mobj is None:
	2812	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	2813	return
	2814	video_id = mobj.group('ID')
	2815
	2816	# Get video webpage
	2817	self.report_video_webpage_download(video_id)
	2818	request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
	2819	try:
	2820	page = urllib2.urlopen(request)
	2821	video_webpage = page.read()
	2822	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2823	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	2824	return
	2825
	2826	# Start extracting information
	2827	self.report_information_extraction(video_id)
	2828
	2829	# Extract information
	2830	video_info = self._parse_page(video_webpage)
	2831
	2832	# uploader
	2833	if 'owner' not in video_info:
	2834	self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
	2835	return
	2836	video_uploader = video_info['owner']
	2837
	2838	# title
	2839	if 'title' not in video_info:
	2840	self._downloader.trouble(u'ERROR: unable to extract video title')
	2841	return
	2842	video_title = video_info['title']
	2843	video_title = video_title.decode('utf-8')
	2844	video_title = sanitize_title(video_title)
	2845
	2846	simple_title = _simplify_title(video_title)
	2847
	2848	# thumbnail image
	2849	if 'thumbnail' not in video_info:
	2850	self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
	2851	video_thumbnail = ''
	2852	else:
	2853	video_thumbnail = video_info['thumbnail']
	2854
	2855	# upload date
	2856	upload_date = u'NA'
	2857	if 'upload_date' in video_info:
	2858	upload_time = video_info['upload_date']
	2859	timetuple = email.utils.parsedate_tz(upload_time)
	2860	if timetuple is not None:
	2861	try:
	2862	upload_date = time.strftime('%Y%m%d', timetuple[0:9])
	2863	except:
	2864	pass
	2865
	2866	# description
	2867	video_description = video_info.get('description', 'No description available.')
	2868
	2869	url_map = video_info['video_urls']
	2870	if len(url_map.keys()) > 0:
	2871	# Decide which formats to download
	2872	req_format = self._downloader.params.get('format', None)
	2873	format_limit = self._downloader.params.get('format_limit', None)
	2874
	2875	if format_limit is not None and format_limit in self._available_formats:
	2876	format_list = self._available_formats[self._available_formats.index(format_limit):]
	2877	else:
	2878	format_list = self._available_formats
	2879	existing_formats = [x for x in format_list if x in url_map]
	2880	if len(existing_formats) == 0:
	2881	self._downloader.trouble(u'ERROR: no known formats available for video')
	2882	return
	2883	if req_format is None:
	2884	video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
	2885	elif req_format == 'worst':
	2886	video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
	2887	elif req_format == '-1':
	2888	video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
	2889	else:
	2890	# Specific format
	2891	if req_format not in url_map:
	2892	self._downloader.trouble(u'ERROR: requested format not available')
	2893	return
	2894	video_url_list = [(req_format, url_map[req_format])] # Specific format
	2895
	2896	for format_param, video_real_url in video_url_list:
	2897
	2898	# At this point we have a new video
	2899	self._downloader.increment_downloads()
	2900
	2901	# Extension
	2902	video_extension = self._video_extensions.get(format_param, 'mp4')
	2903
	2904	try:
	2905	# Process video information
	2906	self._downloader.process_info({
	2907	'id': video_id.decode('utf-8'),
	2908	'url': video_real_url.decode('utf-8'),
	2909	'uploader': video_uploader.decode('utf-8'),
	2910	'upload_date': upload_date,
	2911	'title': video_title,
	2912	'stitle': simple_title,
	2913	'ext': video_extension.decode('utf-8'),
	2914	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	2915	'thumbnail': video_thumbnail.decode('utf-8'),
	2916	'description': video_description.decode('utf-8'),
	2917	'player_url': None,
	2918	})
	2919	except UnavailableVideoError, err:
	2920	self._downloader.trouble(u'\nERROR: unable to download video')
	2921
	2922	class BlipTVIE(InfoExtractor):
	2923	"""Information extractor for blip.tv"""
	2924
	2925	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
	2926	_URL_EXT = r'^.*\.([a-z0-9]+)$'
	2927	IE_NAME = u'blip.tv'
	2928
	2929	def report_extraction(self, file_id):
	2930	"""Report information extraction."""
	2931	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
	2932
	2933	def report_direct_download(self, title):
	2934	"""Report information extraction."""
	2935	self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title))
	2936
	2937	def _real_extract(self, url):
	2938	mobj = re.match(self._VALID_URL, url)
	2939	if mobj is None:
	2940	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	2941	return
	2942
	2943	if '?' in url:
	2944	cchar = '&'
	2945	else:
	2946	cchar = '?'
	2947	json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
	2948	request = urllib2.Request(json_url)
	2949	self.report_extraction(mobj.group(1))
	2950	info = None
	2951	try:
	2952	urlh = urllib2.urlopen(request)
	2953	if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
	2954	basename = url.split('/')[-1]
	2955	title,ext = os.path.splitext(basename)
	2956	title = title.decode('UTF-8')
	2957	ext = ext.replace('.', '')
	2958	self.report_direct_download(title)
	2959	info = {
	2960	'id': title,
	2961	'url': url,
	2962	'title': title,
	2963	'stitle': _simplify_title(title),
	2964	'ext': ext,
	2965	'urlhandle': urlh
	2966	}
	2967	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2968	self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
	2969	return
	2970	if info is None: # Regular URL
	2971	try:
	2972	json_code = urlh.read()
	2973	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	2974	self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
	2975	return
	2976
	2977	try:
	2978	json_data = json.loads(json_code)
	2979	if 'Post' in json_data:
	2980	data = json_data['Post']
	2981	else:
	2982	data = json_data
	2983
	2984	upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
	2985	video_url = data['media']['url']
	2986	umobj = re.match(self._URL_EXT, video_url)
	2987	if umobj is None:
	2988	raise ValueError('Can not determine filename extension')
	2989	ext = umobj.group(1)
	2990
	2991	info = {
	2992	'id': data['item_id'],
	2993	'url': video_url,
	2994	'uploader': data['display_name'],
	2995	'upload_date': upload_date,
	2996	'title': data['title'],
	2997	'stitle': _simplify_title(data['title']),
	2998	'ext': ext,
	2999	'format': data['media']['mimeType'],
	3000	'thumbnail': data['thumbnailUrl'],
	3001	'description': data['description'],
	3002	'player_url': data['embedUrl']
	3003	}
	3004	except (ValueError,KeyError), err:
	3005	self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
	3006	return
	3007
	3008	self._downloader.increment_downloads()
	3009
	3010	try:
	3011	self._downloader.process_info(info)
	3012	except UnavailableVideoError, err:
	3013	self._downloader.trouble(u'\nERROR: unable to download video')
	3014
	3015
	3016	class MyVideoIE(InfoExtractor):
	3017	"""Information Extractor for myvideo.de."""
	3018
	3019	_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
	3020	IE_NAME = u'myvideo'
	3021
	3022	def __init__(self, downloader=None):
	3023	InfoExtractor.__init__(self, downloader)
	3024
	3025	def report_download_webpage(self, video_id):
	3026	"""Report webpage download."""
	3027	self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
	3028
	3029	def report_extraction(self, video_id):
	3030	"""Report information extraction."""
	3031	self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
	3032
	3033	def _real_extract(self,url):
	3034	mobj = re.match(self._VALID_URL, url)
	3035	if mobj is None:
	3036	self._download.trouble(u'ERROR: invalid URL: %s' % url)
	3037	return
	3038
	3039	video_id = mobj.group(1)
	3040
	3041	# Get video webpage
	3042	request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
	3043	try:
	3044	self.report_download_webpage(video_id)
	3045	webpage = urllib2.urlopen(request).read()
	3046	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3047	self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
	3048	return
	3049
	3050	self.report_extraction(video_id)
	3051	mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
	3052	webpage)
	3053	if mobj is None:
	3054	self._downloader.trouble(u'ERROR: unable to extract media URL')
	3055	return
	3056	video_url = mobj.group(1) + ('/%s.flv' % video_id)
	3057
	3058	mobj = re.search('<title>([^<]+)</title>', webpage)
	3059	if mobj is None:
	3060	self._downloader.trouble(u'ERROR: unable to extract title')
	3061	return
	3062
	3063	video_title = mobj.group(1)
	3064	video_title = sanitize_title(video_title)
	3065
	3066	simple_title = _simplify_title(video_title)
	3067
	3068	try:
	3069	self._downloader.process_info({
	3070	'id': video_id,
	3071	'url': video_url,
	3072	'uploader': u'NA',
	3073	'upload_date': u'NA',
	3074	'title': video_title,
	3075	'stitle': simple_title,
	3076	'ext': u'flv',
	3077	'format': u'NA',
	3078	'player_url': None,
	3079	})
	3080	except UnavailableVideoError:
	3081	self._downloader.trouble(u'\nERROR: Unable to download video')
	3082
	3083	class ComedyCentralIE(InfoExtractor):
	3084	"""Information extractor for The Daily Show and Colbert Report """
	3085
	3086	_VALID_URL = r'^(:(?P<shortname>tds\|thedailyshow\|cr\|colbert\|colbertnation\|colbertreport))\|(https?://)?(www\.)?(?P<showname>thedailyshow\|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
	3087	IE_NAME = u'comedycentral'
	3088
	3089	def report_extraction(self, episode_id):
	3090	self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
	3091
	3092	def report_config_download(self, episode_id):
	3093	self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
	3094
	3095	def report_index_download(self, episode_id):
	3096	self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
	3097
	3098	def report_player_url(self, episode_id):
	3099	self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
	3100
	3101	def _real_extract(self, url):
	3102	mobj = re.match(self._VALID_URL, url)
	3103	if mobj is None:
	3104	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3105	return
	3106
	3107	if mobj.group('shortname'):
	3108	if mobj.group('shortname') in ('tds', 'thedailyshow'):
	3109	url = u'http://www.thedailyshow.com/full-episodes/'
	3110	else:
	3111	url = u'http://www.colbertnation.com/full-episodes/'
	3112	mobj = re.match(self._VALID_URL, url)
	3113	assert mobj is not None
	3114
	3115	dlNewest = not mobj.group('episode')
	3116	if dlNewest:
	3117	epTitle = mobj.group('showname')
	3118	else:
	3119	epTitle = mobj.group('episode')
	3120
	3121	req = urllib2.Request(url)
	3122	self.report_extraction(epTitle)
	3123	try:
	3124	htmlHandle = urllib2.urlopen(req)
	3125	html = htmlHandle.read()
	3126	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3127	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
	3128	return
	3129	if dlNewest:
	3130	url = htmlHandle.geturl()
	3131	mobj = re.match(self._VALID_URL, url)
	3132	if mobj is None:
	3133	self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
	3134	return
	3135	if mobj.group('episode') == '':
	3136	self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
	3137	return
	3138	epTitle = mobj.group('episode')
	3139
	3140	mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]episode.?:.*?))"/>', html)
	3141	if len(mMovieParams) == 0:
	3142	self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
	3143	return
	3144
	3145	playerUrl_raw = mMovieParams[0][0]
	3146	self.report_player_url(epTitle)
	3147	try:
	3148	urlHandle = urllib2.urlopen(playerUrl_raw)
	3149	playerUrl = urlHandle.geturl()
	3150	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3151	self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
	3152	return
	3153
	3154	uri = mMovieParams[0][1]
	3155	indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
	3156	self.report_index_download(epTitle)
	3157	try:
	3158	indexXml = urllib2.urlopen(indexUrl).read()
	3159	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3160	self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
	3161	return
	3162
	3163	idoc = xml.etree.ElementTree.fromstring(indexXml)
	3164	itemEls = idoc.findall('.//item')
	3165	for itemEl in itemEls:
	3166	mediaId = itemEl.findall('./guid')[0].text
	3167	shortMediaId = mediaId.split(':')[-1]
	3168	showId = mediaId.split(':')[-2].replace('.com', '')
	3169	officialTitle = itemEl.findall('./title')[0].text
	3170	officialDate = itemEl.findall('./pubDate')[0].text
	3171
	3172	configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
	3173	urllib.urlencode({'uri': mediaId}))
	3174	configReq = urllib2.Request(configUrl)
	3175	self.report_config_download(epTitle)
	3176	try:
	3177	configXml = urllib2.urlopen(configReq).read()
	3178	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3179	self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
	3180	return
	3181
	3182	cdoc = xml.etree.ElementTree.fromstring(configXml)
	3183	turls = []
	3184	for rendition in cdoc.findall('.//rendition'):
	3185	finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
	3186	turls.append(finfo)
	3187
	3188	if len(turls) == 0:
	3189	self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
	3190	continue
	3191
	3192	# For now, just pick the highest bitrate
	3193	format,video_url = turls[-1]
	3194
	3195	self._downloader.increment_downloads()
	3196
	3197	effTitle = showId + u'-' + epTitle
	3198	info = {
	3199	'id': shortMediaId,
	3200	'url': video_url,
	3201	'uploader': showId,
	3202	'upload_date': officialDate,
	3203	'title': effTitle,
	3204	'stitle': _simplify_title(effTitle),
	3205	'ext': 'mp4',
	3206	'format': format,
	3207	'thumbnail': None,
	3208	'description': officialTitle,
	3209	'player_url': playerUrl
	3210	}
	3211
	3212	try:
	3213	self._downloader.process_info(info)
	3214	except UnavailableVideoError, err:
	3215	self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
	3216	continue
	3217
	3218
	3219	class EscapistIE(InfoExtractor):
	3220	"""Information extractor for The Escapist """
	3221
	3222	_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
	3223	IE_NAME = u'escapist'
	3224
	3225	def report_extraction(self, showName):
	3226	self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
	3227
	3228	def report_config_download(self, showName):
	3229	self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
	3230
	3231	def _real_extract(self, url):
	3232	htmlParser = HTMLParser.HTMLParser()
	3233
	3234	mobj = re.match(self._VALID_URL, url)
	3235	if mobj is None:
	3236	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3237	return
	3238	showName = mobj.group('showname')
	3239	videoId = mobj.group('episode')
	3240
	3241	self.report_extraction(showName)
	3242	try:
	3243	webPage = urllib2.urlopen(url).read()
	3244	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3245	self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
	3246	return
	3247
	3248	descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
	3249	description = htmlParser.unescape(descMatch.group(1))
	3250	imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
	3251	imgUrl = htmlParser.unescape(imgMatch.group(1))
	3252	playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
	3253	playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
	3254	configUrlMatch = re.search('config=(.*)$', playerUrl)
	3255	configUrl = urllib2.unquote(configUrlMatch.group(1))
	3256
	3257	self.report_config_download(showName)
	3258	try:
	3259	configJSON = urllib2.urlopen(configUrl).read()
	3260	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3261	self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
	3262	return
	3263
	3264	# Technically, it's JavaScript, not JSON
	3265	configJSON = configJSON.replace("'", '"')
	3266
	3267	try:
	3268	config = json.loads(configJSON)
	3269	except (ValueError,), err:
	3270	self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
	3271	return
	3272
	3273	playlist = config['playlist']
	3274	videoUrl = playlist[1]['url']
	3275
	3276	self._downloader.increment_downloads()
	3277	info = {
	3278	'id': videoId,
	3279	'url': videoUrl,
	3280	'uploader': showName,
	3281	'upload_date': None,
	3282	'title': showName,
	3283	'stitle': _simplify_title(showName),
	3284	'ext': 'flv',
	3285	'format': 'flv',
	3286	'thumbnail': imgUrl,
	3287	'description': description,
	3288	'player_url': playerUrl,
	3289	}
	3290
	3291	try:
	3292	self._downloader.process_info(info)
	3293	except UnavailableVideoError, err:
	3294	self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
	3295
	3296
	3297	class CollegeHumorIE(InfoExtractor):
	3298	"""Information extractor for collegehumor.com"""
	3299
	3300	_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
	3301	IE_NAME = u'collegehumor'
	3302
	3303	def report_webpage(self, video_id):
	3304	"""Report information extraction."""
	3305	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
	3306
	3307	def report_extraction(self, video_id):
	3308	"""Report information extraction."""
	3309	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3310
	3311	def _real_extract(self, url):
	3312	htmlParser = HTMLParser.HTMLParser()
	3313
	3314	mobj = re.match(self._VALID_URL, url)
	3315	if mobj is None:
	3316	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3317	return
	3318	video_id = mobj.group('videoid')
	3319
	3320	self.report_webpage(video_id)
	3321	request = urllib2.Request(url)
	3322	try:
	3323	webpage = urllib2.urlopen(request).read()
	3324	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3325	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	3326	return
	3327
	3328	m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
	3329	if m is None:
	3330	self._downloader.trouble(u'ERROR: Cannot extract internal video ID')
	3331	return
	3332	internal_video_id = m.group('internalvideoid')
	3333
	3334	info = {
	3335	'id': video_id,
	3336	'internal_id': internal_video_id,
	3337	}
	3338
	3339	self.report_extraction(video_id)
	3340	xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
	3341	try:
	3342	metaXml = urllib2.urlopen(xmlUrl).read()
	3343	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3344	self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
	3345	return
	3346
	3347	mdoc = xml.etree.ElementTree.fromstring(metaXml)
	3348	try:
	3349	videoNode = mdoc.findall('./video')[0]
	3350	info['description'] = videoNode.findall('./description')[0].text
	3351	info['title'] = videoNode.findall('./caption')[0].text
	3352	info['stitle'] = _simplify_title(info['title'])
	3353	info['url'] = videoNode.findall('./file')[0].text
	3354	info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
	3355	info['ext'] = info['url'].rpartition('.')[2]
	3356	info['format'] = info['ext']
	3357	except IndexError:
	3358	self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
	3359	return
	3360
	3361	self._downloader.increment_downloads()
	3362
	3363	try:
	3364	self._downloader.process_info(info)
	3365	except UnavailableVideoError, err:
	3366	self._downloader.trouble(u'\nERROR: unable to download video')
	3367
	3368
	3369	class XVideosIE(InfoExtractor):
	3370	"""Information extractor for xvideos.com"""
	3371
	3372	_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
	3373	IE_NAME = u'xvideos'
	3374
	3375	def report_webpage(self, video_id):
	3376	"""Report information extraction."""
	3377	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
	3378
	3379	def report_extraction(self, video_id):
	3380	"""Report information extraction."""
	3381	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3382
	3383	def _real_extract(self, url):
	3384	htmlParser = HTMLParser.HTMLParser()
	3385
	3386	mobj = re.match(self._VALID_URL, url)
	3387	if mobj is None:
	3388	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3389	return
	3390	video_id = mobj.group(1).decode('utf-8')
	3391
	3392	self.report_webpage(video_id)
	3393
	3394	request = urllib2.Request(r'http://www.xvideos.com/video' + video_id)
	3395	try:
	3396	webpage = urllib2.urlopen(request).read()
	3397	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3398	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	3399	return
	3400
	3401	self.report_extraction(video_id)
	3402
	3403
	3404	# Extract video URL
	3405	mobj = re.search(r'flv_url=(.+?)&', webpage)
	3406	if mobj is None:
	3407	self._downloader.trouble(u'ERROR: unable to extract video url')
	3408	return
	3409	video_url = urllib2.unquote(mobj.group(1).decode('utf-8'))
	3410
	3411
	3412	# Extract title
	3413	mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
	3414	if mobj is None:
	3415	self._downloader.trouble(u'ERROR: unable to extract video title')
	3416	return
	3417	video_title = mobj.group(1).decode('utf-8')
	3418
	3419
	3420	# Extract video thumbnail
	3421	mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage)
	3422	if mobj is None:
	3423	self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
	3424	return
	3425	video_thumbnail = mobj.group(1).decode('utf-8')
	3426
	3427
	3428
	3429	self._downloader.increment_downloads()
	3430	info = {
	3431	'id': video_id,
	3432	'url': video_url,
	3433	'uploader': None,
	3434	'upload_date': None,
	3435	'title': video_title,
	3436	'stitle': _simplify_title(video_title),
	3437	'ext': 'flv',
	3438	'format': 'flv',
	3439	'thumbnail': video_thumbnail,
	3440	'description': None,
	3441	'player_url': None,
	3442	}
	3443
	3444	try:
	3445	self._downloader.process_info(info)
	3446	except UnavailableVideoError, err:
	3447	self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
	3448
	3449
	3450	class SoundcloudIE(InfoExtractor):
	3451	"""Information extractor for soundcloud.com
	3452	To access the media, the uid of the song and a stream token
	3453	must be extracted from the page source and the script must make
	3454	a request to media.soundcloud.com/crossdomain.xml. Then
	3455	the media can be grabbed by requesting from an url composed
	3456	of the stream token and uid
	3457	"""
	3458
	3459	_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
	3460	IE_NAME = u'soundcloud'
	3461
	3462	def __init__(self, downloader=None):
	3463	InfoExtractor.__init__(self, downloader)
	3464
	3465	def report_webpage(self, video_id):
	3466	"""Report information extraction."""
	3467	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
	3468
	3469	def report_extraction(self, video_id):
	3470	"""Report information extraction."""
	3471	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3472
	3473	def _real_extract(self, url):
	3474	htmlParser = HTMLParser.HTMLParser()
	3475
	3476	mobj = re.match(self._VALID_URL, url)
	3477	if mobj is None:
	3478	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3479	return
	3480
	3481	# extract uploader (which is in the url)
	3482	uploader = mobj.group(1).decode('utf-8')
	3483	# extract simple title (uploader + slug of song title)
	3484	slug_title = mobj.group(2).decode('utf-8')
	3485	simple_title = uploader + '-' + slug_title
	3486
	3487	self.report_webpage('%s/%s' % (uploader, slug_title))
	3488
	3489	request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title))
	3490	try:
	3491	webpage = urllib2.urlopen(request).read()
	3492	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3493	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	3494	return
	3495
	3496	self.report_extraction('%s/%s' % (uploader, slug_title))
	3497
	3498	# extract uid and stream token that soundcloud hands out for access
	3499	mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage)
	3500	if mobj:
	3501	video_id = mobj.group(1)
	3502	stream_token = mobj.group(2)
	3503
	3504	# extract unsimplified title
	3505	mobj = re.search('"title":"(.*?)",', webpage)
	3506	if mobj:
	3507	title = mobj.group(1)
	3508
	3509	# construct media url (with uid/token)
	3510	mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
	3511	mediaURL = mediaURL % (video_id, stream_token)
	3512
	3513	# description
	3514	description = u'No description available'
	3515	mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
	3516	if mobj:
	3517	description = mobj.group(1)
	3518
	3519	# upload date
	3520	upload_date = None
	3521	mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
	3522	if mobj:
	3523	try:
	3524	upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
	3525	except Exception, e:
	3526	print str(e)
	3527
	3528	# for soundcloud, a request to a cross domain is required for cookies
	3529	request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
	3530
	3531	try:
	3532	self._downloader.process_info({
	3533	'id': video_id.decode('utf-8'),
	3534	'url': mediaURL,
	3535	'uploader': uploader.decode('utf-8'),
	3536	'upload_date': upload_date,
	3537	'title': simple_title.decode('utf-8'),
	3538	'stitle': simple_title.decode('utf-8'),
	3539	'ext': u'mp3',
	3540	'format': u'NA',
	3541	'player_url': None,
	3542	'description': description.decode('utf-8')
	3543	})
	3544	except UnavailableVideoError:
	3545	self._downloader.trouble(u'\nERROR: unable to download video')
	3546
	3547
	3548	class InfoQIE(InfoExtractor):
	3549	"""Information extractor for infoq.com"""
	3550
	3551	_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
	3552	IE_NAME = u'infoq'
	3553
	3554	def report_webpage(self, video_id):
	3555	"""Report information extraction."""
	3556	self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
	3557
	3558	def report_extraction(self, video_id):
	3559	"""Report information extraction."""
	3560	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
	3561
	3562	def _real_extract(self, url):
	3563	htmlParser = HTMLParser.HTMLParser()
	3564
	3565	mobj = re.match(self._VALID_URL, url)
	3566	if mobj is None:
	3567	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3568	return
	3569
	3570	self.report_webpage(url)
	3571
	3572	request = urllib2.Request(url)
	3573	try:
	3574	webpage = urllib2.urlopen(request).read()
	3575	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3576	self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
	3577	return
	3578
	3579	self.report_extraction(url)
	3580
	3581
	3582	# Extract video URL
	3583	mobj = re.search(r"jsclassref='([^']*)'", webpage)
	3584	if mobj is None:
	3585	self._downloader.trouble(u'ERROR: unable to extract video url')
	3586	return
	3587	video_url = 'rtmpe://video.infoq.com/cfx/st/' + urllib2.unquote(mobj.group(1).decode('base64'))
	3588
	3589
	3590	# Extract title
	3591	mobj = re.search(r'contentTitle = "(.*?)";', webpage)
	3592	if mobj is None:
	3593	self._downloader.trouble(u'ERROR: unable to extract video title')
	3594	return
	3595	video_title = mobj.group(1).decode('utf-8')
	3596
	3597	# Extract description
	3598	video_description = u'No description available.'
	3599	mobj = re.search(r'<meta name="description" content="(.)"(?:\s/)?>', webpage)
	3600	if mobj is not None:
	3601	video_description = mobj.group(1).decode('utf-8')
	3602
	3603	video_filename = video_url.split('/')[-1]
	3604	video_id, extension = video_filename.split('.')
	3605
	3606	self._downloader.increment_downloads()
	3607	info = {
	3608	'id': video_id,
	3609	'url': video_url,
	3610	'uploader': None,
	3611	'upload_date': None,
	3612	'title': video_title,
	3613	'stitle': _simplify_title(video_title),
	3614	'ext': extension,
	3615	'format': extension, # Extension is always(?) mp4, but seems to be flv
	3616	'thumbnail': None,
	3617	'description': video_description,
	3618	'player_url': None,
	3619	}
	3620
	3621	try:
	3622	self._downloader.process_info(info)
	3623	except UnavailableVideoError, err:
	3624	self._downloader.trouble(u'\nERROR: unable to download ' + video_url)
	3625
	3626	class MixcloudIE(InfoExtractor):
	3627	"""Information extractor for www.mixcloud.com"""
	3628	_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
	3629	IE_NAME = u'mixcloud'
	3630
	3631	def __init__(self, downloader=None):
	3632	InfoExtractor.__init__(self, downloader)
	3633
	3634	def report_download_json(self, file_id):
	3635	"""Report JSON download."""
	3636	self._downloader.to_screen(u'[%s] Downloading json' % self.IE_NAME)
	3637
	3638	def report_extraction(self, file_id):
	3639	"""Report information extraction."""
	3640	self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
	3641
	3642	def get_urls(self, jsonData, fmt, bitrate='best'):
	3643	"""Get urls from 'audio_formats' section in json"""
	3644	file_url = None
	3645	try:
	3646	bitrate_list = jsonData[fmt]
	3647	if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
	3648	bitrate = max(bitrate_list) # select highest
	3649
	3650	url_list = jsonData[fmt][bitrate]
	3651	except TypeError: # we have no bitrate info.
	3652	url_list = jsonData[fmt]
	3653
	3654	return url_list
	3655
	3656	def check_urls(self, url_list):
	3657	"""Returns 1st active url from list"""
	3658	for url in url_list:
	3659	try:
	3660	urllib2.urlopen(url)
	3661	return url
	3662	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3663	url = None
	3664
	3665	return None
	3666
	3667	def _print_formats(self, formats):
	3668	print 'Available formats:'
	3669	for fmt in formats.keys():
	3670	for b in formats[fmt]:
	3671	try:
	3672	ext = formats[fmt][b][0]
	3673	print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
	3674	except TypeError: # we have no bitrate info
	3675	ext = formats[fmt][0]
	3676	print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
	3677	break
	3678
	3679	def _real_extract(self, url):
	3680	mobj = re.match(self._VALID_URL, url)
	3681	if mobj is None:
	3682	self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
	3683	return
	3684	# extract uploader & filename from url
	3685	uploader = mobj.group(1).decode('utf-8')
	3686	file_id = uploader + "-" + mobj.group(2).decode('utf-8')
	3687
	3688	# construct API request
	3689	file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
	3690	# retrieve .json file with links to files
	3691	request = urllib2.Request(file_url)
	3692	try:
	3693	self.report_download_json(file_url)
	3694	jsonData = urllib2.urlopen(request).read()
	3695	except (urllib2.URLError, httplib.HTTPException, socket.error), err:
	3696	self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
	3697	return
	3698
	3699	# parse JSON
	3700	json_data = json.loads(jsonData)
	3701	player_url = json_data['player_swf_url']
	3702	formats = dict(json_data['audio_formats'])
	3703
	3704	req_format = self._downloader.params.get('format', None)
	3705	bitrate = None
	3706
	3707	if self._downloader.params.get('listformats', None):
	3708	self._print_formats(formats)
	3709	return
	3710
	3711	if req_format is None or req_format == 'best':
	3712	for format_param in formats.keys():
	3713	url_list = self.get_urls(formats, format_param)
	3714	# check urls
	3715	file_url = self.check_urls(url_list)
	3716	if file_url is not None:
	3717	break # got it!
	3718	else:
	3719	if req_format not in formats.keys():
	3720	self._downloader.trouble(u'ERROR: format is not available')
	3721	return
	3722
	3723	url_list = self.get_urls(formats, req_format)
	3724	file_url = self.check_urls(url_list)
	3725	format_param = req_format
	3726
	3727	# We have audio
	3728	self._downloader.increment_downloads()
	3729	try:
	3730	# Process file information
	3731	self._downloader.process_info({
	3732	'id': file_id.decode('utf-8'),
	3733	'url': file_url.decode('utf-8'),
	3734	'uploader': uploader.decode('utf-8'),
	3735	'upload_date': u'NA',
	3736	'title': json_data['name'],
	3737	'stitle': _simplify_title(json_data['name']),
	3738	'ext': file_url.split('.')[-1].decode('utf-8'),
	3739	'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
	3740	'thumbnail': json_data['thumbnail_url'],
	3741	'description': json_data['description'],
	3742	'player_url': player_url.decode('utf-8'),
	3743	})
	3744	except UnavailableVideoError, err:
	3745	self._downloader.trouble(u'ERROR: unable to download file')
	3746
	3747
	3748
	3749	class PostProcessor(object):
	3750	"""Post Processor class.
	3751
	3752	PostProcessor objects can be added to downloaders with their
	3753	add_post_processor() method. When the downloader has finished a
	3754	successful download, it will take its internal chain of PostProcessors
	3755	and start calling the run() method on each one of them, first with
	3756	an initial argument and then with the returned value of the previous
	3757	PostProcessor.
	3758
	3759	The chain will be stopped if one of them ever returns None or the end
	3760	of the chain is reached.
	3761
	3762	PostProcessor objects follow a "mutual registration" process similar
	3763	to InfoExtractor objects.
	3764	"""
	3765
	3766	_downloader = None
	3767
	3768	def __init__(self, downloader=None):
	3769	self._downloader = downloader
	3770
	3771	def set_downloader(self, downloader):
	3772	"""Sets the downloader for this PP."""
	3773	self._downloader = downloader
	3774
	3775	def run(self, information):
	3776	"""Run the PostProcessor.
	3777
	3778	The "information" argument is a dictionary like the ones
	3779	composed by InfoExtractors. The only difference is that this
	3780	one has an extra field called "filepath" that points to the
	3781	downloaded file.
	3782
	3783	When this method returns None, the postprocessing chain is
	3784	stopped. However, this method may return an information
	3785	dictionary that will be passed to the next postprocessing
	3786	object in the chain. It can be the one it received after
	3787	changing some fields.
	3788
	3789	In addition, this method may raise a PostProcessingError
	3790	exception that will be taken into account by the downloader
	3791	it was called from.
	3792	"""
	3793	return information # by default, do nothing
	3794
	3795
	3796	class FFmpegExtractAudioPP(PostProcessor):
	3797
	3798	def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
	3799	PostProcessor.__init__(self, downloader)
	3800	if preferredcodec is None:
	3801	preferredcodec = 'best'
	3802	self._preferredcodec = preferredcodec
	3803	self._preferredquality = preferredquality
	3804	self._keepvideo = keepvideo
	3805
	3806	@staticmethod
	3807	def get_audio_codec(path):
	3808	try:
	3809	cmd = ['ffprobe', '-show_streams', '--', path]
	3810	handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
	3811	output = handle.communicate()[0]
	3812	if handle.wait() != 0:
	3813	return None
	3814	except (IOError, OSError):
	3815	return None
	3816	audio_codec = None
	3817	for line in output.split('\n'):
	3818	if line.startswith('codec_name='):
	3819	audio_codec = line.split('=')[1].strip()
	3820	elif line.strip() == 'codec_type=audio' and audio_codec is not None:
	3821	return audio_codec
	3822	return None
	3823
	3824	@staticmethod
	3825	def run_ffmpeg(path, out_path, codec, more_opts):
	3826	try:
	3827	cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
	3828	ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
	3829	return (ret == 0)
	3830	except (IOError, OSError):
	3831	return False
	3832
	3833	def run(self, information):
	3834	path = information['filepath']
	3835
	3836	filecodec = self.get_audio_codec(path)
	3837	if filecodec is None:
	3838	self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
	3839	return None
	3840
	3841	more_opts = []
	3842	if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
	3843	if filecodec in ['aac', 'mp3', 'vorbis']:
	3844	# Lossless if possible
	3845	acodec = 'copy'
	3846	extension = filecodec
	3847	if filecodec == 'aac':
	3848	more_opts = ['-f', 'adts']
	3849	if filecodec == 'vorbis':
	3850	extension = 'ogg'
	3851	else:
	3852	# MP3 otherwise.
	3853	acodec = 'libmp3lame'
	3854	extension = 'mp3'
	3855	more_opts = []
	3856	if self._preferredquality is not None:
	3857	more_opts += ['-ab', self._preferredquality]
	3858	else:
	3859	# We convert the audio (lossy)
	3860	acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self._preferredcodec]
	3861	extension = self._preferredcodec
	3862	more_opts = []
	3863	if self._preferredquality is not None:
	3864	more_opts += ['-ab', self._preferredquality]
	3865	if self._preferredcodec == 'aac':
	3866	more_opts += ['-f', 'adts']
	3867	if self._preferredcodec == 'vorbis':
	3868	extension = 'ogg'
	3869
	3870	(prefix, ext) = os.path.splitext(path)
	3871	new_path = prefix + '.' + extension
	3872	self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
	3873	status = self.run_ffmpeg(path, new_path, acodec, more_opts)
	3874
	3875	if not status:
	3876	self._downloader.to_stderr(u'WARNING: error running ffmpeg')
	3877	return None
	3878
	3879	# Try to update the date time for extracted audio file.
	3880	if information.get('filetime') is not None:
	3881	try:
	3882	os.utime(new_path, (time.time(), information['filetime']))
	3883	except:
	3884	self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
	3885
	3886	if not self._keepvideo:
	3887	try:
	3888	os.remove(path)
	3889	except (IOError, OSError):
	3890	self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
	3891	return None
	3892
	3893	information['filepath'] = new_path
	3894	return information
	3895
	3896
	3897	def updateSelf(downloader, filename):
	3898	''' Update the program file with the latest version from the repository '''
	3899	# Note: downloader only used for options
	3900	if not os.access(filename, os.W_OK):
	3901	sys.exit('ERROR: no write permissions on %s' % filename)
	3902
	3903	downloader.to_screen('Updating to latest version...')
	3904
	3905	try:
	3906	try:
	3907	urlh = urllib.urlopen(UPDATE_URL)
	3908	newcontent = urlh.read()
	3909
	3910	vmatch = re.search("__version__ = '([^']+)'", newcontent)
	3911	if vmatch is not None and vmatch.group(1) == __version__:
	3912	downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')')
	3913	return
	3914	finally:
	3915	urlh.close()
	3916	except (IOError, OSError), err:
	3917	sys.exit('ERROR: unable to download latest version')
	3918
	3919	try:
	3920	outf = open(filename, 'wb')
	3921	try:
	3922	outf.write(newcontent)
	3923	finally:
	3924	outf.close()
	3925	except (IOError, OSError), err:
	3926	sys.exit('ERROR: unable to overwrite current version')
	3927
	3928	downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
	3929
	3930	def parseOpts():
	3931	# Deferred imports
	3932	import getpass
	3933	import optparse
	3934	import shlex
	3935
	3936	def _readOptions(filename):
	3937	try:
	3938	optionf = open(filename)
	3939	except IOError:
	3940	return [] # silently skip if file is not present
	3941	try:
	3942	res = []
	3943	for l in optionf:
	3944	res += shlex.split(l, comments=True)
	3945	finally:
	3946	optionf.close()
	3947	return res
	3948
	3949	def _format_option_string(option):
	3950	''' ('-o', '--option') -> -o, --format METAVAR'''
	3951
	3952	opts = []
	3953
	3954	if option._short_opts: opts.append(option._short_opts[0])
	3955	if option._long_opts: opts.append(option._long_opts[0])
	3956	if len(opts) > 1: opts.insert(1, ', ')
	3957
	3958	if option.takes_value(): opts.append(' %s' % option.metavar)
	3959
	3960	return "".join(opts)
	3961
	3962	def _find_term_columns():
	3963	columns = os.environ.get('COLUMNS', None)
	3964	if columns:
	3965	return int(columns)
	3966
	3967	try:
	3968	sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	3969	out,err = sp.communicate()
	3970	return int(out.split()[1])
	3971	except:
	3972	pass
	3973	return None
	3974
	3975	max_width = 80
	3976	max_help_position = 80
	3977
	3978	# No need to wrap help messages if we're on a wide console
	3979	columns = _find_term_columns()
	3980	if columns: max_width = columns
	3981
	3982	fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
	3983	fmt.format_option_strings = _format_option_string
	3984
	3985	kw = {
	3986	'version' : __version__,
	3987	'formatter' : fmt,
	3988	'usage' : '%prog [options] url [url...]',
	3989	'conflict_handler' : 'resolve',
	3990	}
	3991
	3992	parser = optparse.OptionParser(**kw)
	3993
	3994	# option groups
	3995	general = optparse.OptionGroup(parser, 'General Options')
	3996	selection = optparse.OptionGroup(parser, 'Video Selection')
	3997	authentication = optparse.OptionGroup(parser, 'Authentication Options')
	3998	video_format = optparse.OptionGroup(parser, 'Video Format Options')
	3999	postproc = optparse.OptionGroup(parser, 'Post-processing Options')
	4000	filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
	4001	verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
	4002
	4003	general.add_option('-h', '--help',
	4004	action='help', help='print this help text and exit')
	4005	general.add_option('-v', '--version',
	4006	action='version', help='print program version and exit')
	4007	general.add_option('-U', '--update',
	4008	action='store_true', dest='update_self', help='update this program to latest version')
	4009	general.add_option('-i', '--ignore-errors',
	4010	action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
	4011	general.add_option('-r', '--rate-limit',
	4012	dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
	4013	general.add_option('-R', '--retries',
	4014	dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
	4015	general.add_option('--dump-user-agent',
	4016	action='store_true', dest='dump_user_agent',
	4017	help='display the current browser identification', default=False)
	4018	general.add_option('--list-extractors',
	4019	action='store_true', dest='list_extractors',
	4020	help='List all supported extractors and the URLs they would handle', default=False)
	4021
	4022	selection.add_option('--playlist-start',
	4023	dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
	4024	selection.add_option('--playlist-end',
	4025	dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
	4026	selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
	4027	selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
	4028	selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
	4029
	4030	authentication.add_option('-u', '--username',
	4031	dest='username', metavar='USERNAME', help='account username')
	4032	authentication.add_option('-p', '--password',
	4033	dest='password', metavar='PASSWORD', help='account password')
	4034	authentication.add_option('-n', '--netrc',
	4035	action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
	4036
	4037
	4038	video_format.add_option('-f', '--format',
	4039	action='store', dest='format', metavar='FORMAT', help='video format code')
	4040	video_format.add_option('--all-formats',
	4041	action='store_const', dest='format', help='download all available video formats', const='all')
	4042	video_format.add_option('--max-quality',
	4043	action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
	4044	video_format.add_option('-F', '--list-formats',
	4045	action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
	4046
	4047
	4048	verbosity.add_option('-q', '--quiet',
	4049	action='store_true', dest='quiet', help='activates quiet mode', default=False)
	4050	verbosity.add_option('-s', '--simulate',
	4051	action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
	4052	verbosity.add_option('--skip-download',
	4053	action='store_true', dest='skip_download', help='do not download the video', default=False)
	4054	verbosity.add_option('-g', '--get-url',
	4055	action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
	4056	verbosity.add_option('-e', '--get-title',
	4057	action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
	4058	verbosity.add_option('--get-thumbnail',
	4059	action='store_true', dest='getthumbnail',
	4060	help='simulate, quiet but print thumbnail URL', default=False)
	4061	verbosity.add_option('--get-description',
	4062	action='store_true', dest='getdescription',
	4063	help='simulate, quiet but print video description', default=False)
	4064	verbosity.add_option('--get-filename',
	4065	action='store_true', dest='getfilename',
	4066	help='simulate, quiet but print output filename', default=False)
	4067	verbosity.add_option('--get-format',
	4068	action='store_true', dest='getformat',
	4069	help='simulate, quiet but print output format', default=False)
	4070	verbosity.add_option('--no-progress',
	4071	action='store_true', dest='noprogress', help='do not print progress bar', default=False)
	4072	verbosity.add_option('--console-title',
	4073	action='store_true', dest='consoletitle',
	4074	help='display progress in console titlebar', default=False)
	4075
	4076
	4077	filesystem.add_option('-t', '--title',
	4078	action='store_true', dest='usetitle', help='use title in file name', default=False)
	4079	filesystem.add_option('-l', '--literal',
	4080	action='store_true', dest='useliteral', help='use literal title in file name', default=False)
	4081	filesystem.add_option('-A', '--auto-number',
	4082	action='store_true', dest='autonumber',
	4083	help='number downloaded files starting from 00000', default=False)
	4084	filesystem.add_option('-o', '--output',
	4085	dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
	4086	filesystem.add_option('-a', '--batch-file',
	4087	dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
	4088	filesystem.add_option('-w', '--no-overwrites',
	4089	action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
	4090	filesystem.add_option('-c', '--continue',
	4091	action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
	4092	filesystem.add_option('--no-continue',
	4093	action='store_false', dest='continue_dl',
	4094	help='do not resume partially downloaded files (restart from beginning)')
	4095	filesystem.add_option('--cookies',
	4096	dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
	4097	filesystem.add_option('--no-part',
	4098	action='store_true', dest='nopart', help='do not use .part files', default=False)
	4099	filesystem.add_option('--no-mtime',
	4100	action='store_false', dest='updatetime',
	4101	help='do not use the Last-modified header to set the file modification time', default=True)
	4102	filesystem.add_option('--write-description',
	4103	action='store_true', dest='writedescription',
	4104	help='write video description to a .description file', default=False)
	4105	filesystem.add_option('--write-info-json',
	4106	action='store_true', dest='writeinfojson',
	4107	help='write video metadata to a .info.json file', default=False)
	4108
	4109
	4110	postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
	4111	help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
	4112	postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
	4113	help='"best", "aac", "vorbis" or "mp3"; best by default')
	4114	postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
	4115	help='ffmpeg audio bitrate specification, 128k by default')
	4116	postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
	4117	help='keeps the video file on disk after the post-processing; the video is erased by default')
	4118
	4119
	4120	parser.add_option_group(general)
	4121	parser.add_option_group(selection)
	4122	parser.add_option_group(filesystem)
	4123	parser.add_option_group(verbosity)
	4124	parser.add_option_group(video_format)
	4125	parser.add_option_group(authentication)
	4126	parser.add_option_group(postproc)
	4127
	4128	xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
	4129	if xdg_config_home:
	4130	userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
	4131	else:
	4132	userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
	4133	argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
	4134	opts, args = parser.parse_args(argv)
	4135
	4136	return parser, opts, args
	4137
	4138	def gen_extractors():
	4139	""" Return a list of an instance of every supported extractor.
	4140	The order does matter; the first extractor matched is the one handling the URL.
	4141	"""
	4142	youtube_ie = YoutubeIE()
	4143	google_ie = GoogleIE()
	4144	yahoo_ie = YahooIE()
	4145	return [
	4146	YoutubePlaylistIE(youtube_ie),
	4147	YoutubeUserIE(youtube_ie),
	4148	YoutubeSearchIE(youtube_ie),
	4149	youtube_ie,
	4150	MetacafeIE(youtube_ie),
	4151	DailymotionIE(),
	4152	google_ie,
	4153	GoogleSearchIE(google_ie),
	4154	PhotobucketIE(),
	4155	yahoo_ie,
	4156	YahooSearchIE(yahoo_ie),
	4157	DepositFilesIE(),
	4158	FacebookIE(),
	4159	BlipTVIE(),
	4160	VimeoIE(),
	4161	MyVideoIE(),
	4162	ComedyCentralIE(),
	4163	EscapistIE(),
	4164	CollegeHumorIE(),
	4165	XVideosIE(),
	4166	SoundcloudIE(),
	4167	InfoQIE(),
	4168	MixcloudIE(),
	4169
	4170	GenericIE()
	4171	]
	4172
	4173	def _real_main():
	4174	parser, opts, args = parseOpts()
	4175
	4176	# Open appropriate CookieJar
	4177	if opts.cookiefile is None:
	4178	jar = cookielib.CookieJar()
	4179	else:
	4180	try:
	4181	jar = cookielib.MozillaCookieJar(opts.cookiefile)
	4182	if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
	4183	jar.load()
	4184	except (IOError, OSError), err:
	4185	sys.exit(u'ERROR: unable to open cookie file')
	4186
	4187	# Dump user agent
	4188	if opts.dump_user_agent:
	4189	print std_headers['User-Agent']
	4190	sys.exit(0)
	4191
	4192	# Batch file verification
	4193	batchurls = []
	4194	if opts.batchfile is not None:
	4195	try:
	4196	if opts.batchfile == '-':
	4197	batchfd = sys.stdin
	4198	else:
	4199	batchfd = open(opts.batchfile, 'r')
	4200	batchurls = batchfd.readlines()
	4201	batchurls = [x.strip() for x in batchurls]
	4202	batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
	4203	except IOError:
	4204	sys.exit(u'ERROR: batch file could not be read')
	4205	all_urls = batchurls + args
	4206
	4207	# General configuration
	4208	cookie_processor = urllib2.HTTPCookieProcessor(jar)
	4209	opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
	4210	urllib2.install_opener(opener)
	4211	socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
	4212
	4213	extractors = gen_extractors()
	4214
	4215	if opts.list_extractors:
	4216	for ie in extractors:
	4217	print(ie.IE_NAME)
	4218	matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
	4219	all_urls = filter(lambda url: url not in matchedUrls, all_urls)
	4220	for mu in matchedUrls:
	4221	print(u' ' + mu)
	4222	sys.exit(0)
	4223
	4224	# Conflicting, missing and erroneous options
	4225	if opts.usenetrc and (opts.username is not None or opts.password is not None):
	4226	parser.error(u'using .netrc conflicts with giving username/password')
	4227	if opts.password is not None and opts.username is None:
	4228	parser.error(u'account username missing')
	4229	if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
	4230	parser.error(u'using output template conflicts with using title, literal title or auto number')
	4231	if opts.usetitle and opts.useliteral:
	4232	parser.error(u'using title conflicts with using literal title')
	4233	if opts.username is not None and opts.password is None:
	4234	opts.password = getpass.getpass(u'Type account password and press return:')
	4235	if opts.ratelimit is not None:
	4236	numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
	4237	if numeric_limit is None:
	4238	parser.error(u'invalid rate limit specified')
	4239	opts.ratelimit = numeric_limit
	4240	if opts.retries is not None:
	4241	try:
	4242	opts.retries = long(opts.retries)
	4243	except (TypeError, ValueError), err:
	4244	parser.error(u'invalid retry count specified')
	4245	try:
	4246	opts.playliststart = int(opts.playliststart)
	4247	if opts.playliststart <= 0:
	4248	raise ValueError(u'Playlist start must be positive')
	4249	except (TypeError, ValueError), err:
	4250	parser.error(u'invalid playlist start number specified')
	4251	try:
	4252	opts.playlistend = int(opts.playlistend)
	4253	if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
	4254	raise ValueError(u'Playlist end must be greater than playlist start')
	4255	except (TypeError, ValueError), err:
	4256	parser.error(u'invalid playlist end number specified')
	4257	if opts.extractaudio:
	4258	if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis']:
	4259	parser.error(u'invalid audio format specified')
	4260
	4261	# File downloader
	4262	fd = FileDownloader({
	4263	'usenetrc': opts.usenetrc,
	4264	'username': opts.username,
	4265	'password': opts.password,
	4266	'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
	4267	'forceurl': opts.geturl,
	4268	'forcetitle': opts.gettitle,
	4269	'forcethumbnail': opts.getthumbnail,
	4270	'forcedescription': opts.getdescription,
	4271	'forcefilename': opts.getfilename,
	4272	'forceformat': opts.getformat,
	4273	'simulate': opts.simulate,
	4274	'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
	4275	'format': opts.format,
	4276	'format_limit': opts.format_limit,
	4277	'listformats': opts.listformats,
	4278	'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
	4279	or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
	4280	or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
	4281	or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
	4282	or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
	4283	or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
	4284	or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
	4285	or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
	4286	or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
	4287	or u'%(id)s.%(ext)s'),
	4288	'ignoreerrors': opts.ignoreerrors,
	4289	'ratelimit': opts.ratelimit,
	4290	'nooverwrites': opts.nooverwrites,
	4291	'retries': opts.retries,
	4292	'continuedl': opts.continue_dl,
	4293	'noprogress': opts.noprogress,
	4294	'playliststart': opts.playliststart,
	4295	'playlistend': opts.playlistend,
	4296	'logtostderr': opts.outtmpl == '-',
	4297	'consoletitle': opts.consoletitle,
	4298	'nopart': opts.nopart,
	4299	'updatetime': opts.updatetime,
	4300	'writedescription': opts.writedescription,
	4301	'writeinfojson': opts.writeinfojson,
	4302	'matchtitle': opts.matchtitle,
	4303	'rejecttitle': opts.rejecttitle,
	4304	'max_downloads': opts.max_downloads,
	4305	})
	4306	for extractor in extractors:
	4307	fd.add_info_extractor(extractor)
	4308
	4309	# PostProcessors
	4310	if opts.extractaudio:
	4311	fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
	4312
	4313	# Update version
	4314	if opts.update_self:
	4315	updateSelf(fd, sys.argv[0])
	4316
	4317	# Maybe do nothing
	4318	if len(all_urls) < 1:
	4319	if not opts.update_self:
	4320	parser.error(u'you must provide at least one URL')
	4321	else:
	4322	sys.exit()
	4323	retcode = fd.download(all_urls)
	4324
	4325	# Dump cookie jar if requested
	4326	if opts.cookiefile is not None:
	4327	try:
	4328	jar.save()
	4329	except (IOError, OSError), err:
	4330	sys.exit(u'ERROR: unable to save cookie jar')
	4331
	4332	sys.exit(retcode)
	4333
	4334	def main():
	4335	try:
	4336	_real_main()
	4337	except DownloadError:
	4338	sys.exit(1)
	4339	except SameFileError:
	4340	sys.exit(u'ERROR: fixed output name but more than one file to download')
	4341	except KeyboardInterrupt:
	4342	sys.exit(u'\nERROR: Interrupted by user')
	4343
	4344	if __name__ == '__main__':
	4345	main()
	4346
	4347	# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: