jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	from .theplatform import ThePlatformFeedIE
	4	from ..utils import (
	5	ExtractorError,
	6	int_or_none,
	7	find_xpath_attr,
	8	xpath_element,
	9	xpath_text,
	10	update_url_query,
	11	)
	12
	13
	14	class CBSBaseIE(ThePlatformFeedIE):
	15	def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
	16	subtitles = {}
	17	for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]:
	18	cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k)
	19	if cc_e is not None:
	20	cc_url = cc_e.get('value')
	21	if cc_url:
	22	subtitles.setdefault(subtitles_lang, []).append({
	23	'ext': ext,
	24	'url': cc_url,
	25	})
	26	return subtitles
	27
	28
	29	class CBSIE(CBSBaseIE):
	30	_VALID_URL = r'(?:cbs:\|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video\|colbertlateshow\.com/(?:video\|podcasts))/)(?P<id>[\w-]+)'
	31
	32	_TESTS = [{
	33	'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
	34	'info_dict': {
	35	'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
	36	'ext': 'mp4',
	37	'title': 'Connect Chat feat. Garth Brooks',
	38	'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
	39	'duration': 1495,
	40	'timestamp': 1385585425,
	41	'upload_date': '20131127',
	42	'uploader': 'CBSI-NEW',
	43	},
	44	'params': {
	45	# m3u8 download
	46	'skip_download': True,
	47	},
	48	'_skip': 'Blocked outside the US',
	49	}, {
	50	'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
	51	'only_matching': True,
	52	}, {
	53	'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
	54	'only_matching': True,
	55	}]
	56
	57	def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
	58	items_data = self._download_xml(
	59	'http://can.cbs.com/thunder/player/videoPlayerService.php',
	60	content_id, query={'partner': site, 'contentId': content_id})
	61	video_data = xpath_element(items_data, './/item')
	62	title = xpath_text(video_data, 'videoTitle', 'title', True)
	63	tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
	64	tp_release_url = 'http://link.theplatform.com/s/' + tp_path
	65
	66	asset_types = []
	67	subtitles = {}
	68	formats = []
	69	last_e = None
	70	for item in items_data.findall('.//item'):
	71	asset_type = xpath_text(item, 'assetType')
	72	if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
	73	continue
	74	asset_types.append(asset_type)
	75	query = {
	76	'mbr': 'true',
	77	'assetTypes': asset_type,
	78	}
	79	if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
	80	query['formats'] = 'MPEG4,M3U'
	81	elif asset_type in ('RTMP', 'WIFI', '3G'):
	82	query['formats'] = 'MPEG4,FLV'
	83	try:
	84	tp_formats, tp_subtitles = self._extract_theplatform_smil(
	85	update_url_query(tp_release_url, query), content_id,
	86	'Downloading %s SMIL data' % asset_type)
	87	except ExtractorError as e:
	88	last_e = e
	89	continue
	90	formats.extend(tp_formats)
	91	subtitles = self._merge_subtitles(subtitles, tp_subtitles)
	92	if last_e and not formats:
	93	raise last_e
	94	self._sort_formats(formats)
	95
	96	info = self._extract_theplatform_metadata(tp_path, content_id)
	97	info.update({
	98	'id': content_id,
	99	'title': title,
	100	'series': xpath_text(video_data, 'seriesTitle'),
	101	'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
	102	'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
	103	'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
	104	'thumbnail': xpath_text(video_data, 'previewImageURL'),
	105	'formats': formats,
	106	'subtitles': subtitles,
	107	})
	108	return info
	109
	110	def _real_extract(self, url):
	111	content_id = self._match_id(url)
	112	return self._extract_video_info(content_id)