jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import re
	4	import base64
	5	import json
	6
	7	from .common import InfoExtractor
	8	from ..utils import (
	9	clean_html,
	10	ExtractorError
	11	)
	12
	13
	14	class ChilloutzoneIE(InfoExtractor):
	15	_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w\|-]+)\.html'
	16	_TESTS = [{
	17	'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
	18	'md5': 'a76f3457e813ea0037e5244f509e66d1',
	19	'info_dict': {
	20	'id': 'enemene-meck-alle-katzen-weg',
	21	'ext': 'mp4',
	22	'title': 'Enemene Meck - Alle Katzen weg',
	23	'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
	24	},
	25	}, {
	26	'note': 'Video hosted at YouTube',
	27	'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
	28	'info_dict': {
	29	'id': '1YVQaAgHyRU',
	30	'ext': 'mp4',
	31	'title': '16 Photos Taken 1 Second Before Disaster',
	32	'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
	33	'uploader': 'BuzzFeedVideo',
	34	'uploader_id': 'BuzzFeedVideo',
	35	'upload_date': '20131105',
	36	},
	37	}, {
	38	'note': 'Video hosted at Vimeo',
	39	'url': 'http://www.chilloutzone.net/video/icon-blending.html',
	40	'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
	41	'info_dict': {
	42	'id': '85523671',
	43	'ext': 'mp4',
	44	'title': 'The Sunday Times - Icons',
	45	'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}',
	46	'uploader': 'Us',
	47	'uploader_id': 'usfilms',
	48	'upload_date': '20140131'
	49	},
	50	}]
	51
	52	def _real_extract(self, url):
	53	mobj = re.match(self._VALID_URL, url)
	54	video_id = mobj.group('id')
	55
	56	webpage = self._download_webpage(url, video_id)
	57
	58	base64_video_info = self._html_search_regex(
	59	r'var cozVidData = "(.+?)";', webpage, 'video data')
	60	decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8')
	61	video_info_dict = json.loads(decoded_video_info)
	62
	63	# get video information from dict
	64	video_url = video_info_dict['mediaUrl']
	65	description = clean_html(video_info_dict.get('description'))
	66	title = video_info_dict['title']
	67	native_platform = video_info_dict['nativePlatform']
	68	native_video_id = video_info_dict['nativeVideoId']
	69	source_priority = video_info_dict['sourcePriority']
	70
	71	# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
	72	if native_platform is None:
	73	youtube_url = self._html_search_regex(
	74	r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
	75	webpage, 'fallback video URL', default=None)
	76	if youtube_url is not None:
	77	return self.url_result(youtube_url, ie='Youtube')
	78
	79	# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
	80	# the own CDN
	81	if source_priority == 'native':
	82	if native_platform == 'youtube':
	83	return self.url_result(native_video_id, ie='Youtube')
	84	if native_platform == 'vimeo':
	85	return self.url_result(
	86	'http://vimeo.com/' + native_video_id, ie='Vimeo')
	87
	88	if not video_url:
	89	raise ExtractorError('No video found')
	90
	91	return {
	92	'id': video_id,
	93	'url': video_url,
	94	'ext': 'mp4',
	95	'title': title,
	96	'description': description,
	97	}