jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import json
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import int_or_none
	8
	9
	10	class LiveLeakIE(InfoExtractor):
	11	_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.?)i=(?P<video_id>[\w_]+)(?:.)'
	12	_TESTS = [{
	13	'url': 'http://www.liveleak.com/view?i=757_1364311680',
	14	'md5': '0813c2430bea7a46bf13acf3406992f4',
	15	'info_dict': {
	16	'id': '757_1364311680',
	17	'ext': 'mp4',
	18	'description': 'extremely bad day for this guy..!',
	19	'uploader': 'ljfriel2',
	20	'title': 'Most unlucky car accident'
	21	}
	22	},
	23	{
	24	'url': 'http://www.liveleak.com/view?i=f93_1390833151',
	25	'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
	26	'info_dict': {
	27	'id': 'f93_1390833151',
	28	'ext': 'mp4',
	29	'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
	30	'uploader': 'ARD_Stinkt',
	31	'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
	32	}
	33	},
	34	{
	35	'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
	36	'md5': '42c6d97d54f1db107958760788c5f48f',
	37	'info_dict': {
	38	'id': '4f7_1392687779',
	39	'ext': 'mp4',
	40	'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
	41	'uploader': 'CapObveus',
	42	'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
	43	'age_limit': 18,
	44	}
	45	}]
	46
	47	def _real_extract(self, url):
	48	mobj = re.match(self._VALID_URL, url)
	49	video_id = mobj.group('video_id')
	50	webpage = self._download_webpage(url, video_id)
	51
	52	video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
	53	video_description = self._og_search_description(webpage)
	54	video_uploader = self._html_search_regex(
	55	r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
	56	age_limit = int_or_none(self._search_regex(
	57	r'you confirm that you are ([0-9]+) years and over.',
	58	webpage, 'age limit', default=None))
	59
	60	sources_raw = self._search_regex(
	61	r'(?s)sources:\s(\[.?\]),', webpage, 'video URLs', default=None)
	62	if sources_raw is None:
	63	alt_source = self._search_regex(
	64	r'(file: ".*?"),', webpage, 'video URL', default=None)
	65	if alt_source:
	66	sources_raw = '[{ %s}]' % alt_source
	67	else:
	68	# Maybe an embed?
	69	embed_url = self._search_regex(
	70	r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"',
	71	webpage, 'embed URL')
	72	return {
	73	'_type': 'url_transparent',
	74	'url': embed_url,
	75	'id': video_id,
	76	'title': video_title,
	77	'description': video_description,
	78	'uploader': video_uploader,
	79	'age_limit': age_limit,
	80	}
	81
	82	sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
	83	sources = json.loads(sources_json)
	84
	85	formats = [{
	86	'format_note': s.get('label'),
	87	'url': s['file'],
	88	} for s in sources]
	89	self._sort_formats(formats)
	90
	91	return {
	92	'id': video_id,
	93	'title': video_title,
	94	'description': video_description,
	95	'uploader': video_uploader,
	96	'formats': formats,
	97	'age_limit': age_limit,
	98	}