jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import compat_str
	8	from ..utils import (
	9	determine_ext,
	10	int_or_none,
	11	NO_DEFAULT,
	12	orderedSet,
	13	parse_codecs,
	14	qualities,
	15	try_get,
	16	unified_timestamp,
	17	update_url_query,
	18	urljoin,
	19	)
	20
	21
	22	class ZDFBaseIE(InfoExtractor):
	23	def _call_api(self, url, player, referrer, video_id, item):
	24	return self._download_json(
	25	url, video_id, 'Downloading JSON %s' % item,
	26	headers={
	27	'Referer': referrer,
	28	'Api-Auth': 'Bearer %s' % player['apiToken'],
	29	})
	30
	31	def _extract_player(self, webpage, video_id, fatal=True):
	32	return self._parse_json(
	33	self._search_regex(
	34	r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
	35	'player JSON', default='{}' if not fatal else NO_DEFAULT,
	36	group='json'),
	37	video_id)
	38
	39
	40	class ZDFIE(ZDFBaseIE):
	41	_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
	42	_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
	43
	44	_TESTS = [{
	45	'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
	46	'info_dict': {
	47	'id': 'zdfmediathek-trailer-100',
	48	'ext': 'mp4',
	49	'title': 'Die neue ZDFmediathek',
	50	'description': 'md5:3003d36487fb9a5ea2d1ff60beb55e8d',
	51	'duration': 30,
	52	'timestamp': 1477627200,
	53	'upload_date': '20161028',
	54	}
	55	}, {
	56	'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
	57	'only_matching': True,
	58	}, {
	59	'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
	60	'only_matching': True,
	61	}]
	62
	63	@staticmethod
	64	def _extract_subtitles(src):
	65	subtitles = {}
	66	for caption in try_get(src, lambda x: x['captions'], list) or []:
	67	subtitle_url = caption.get('uri')
	68	if subtitle_url and isinstance(subtitle_url, compat_str):
	69	lang = caption.get('language', 'deu')
	70	subtitles.setdefault(lang, []).append({
	71	'url': subtitle_url,
	72	})
	73	return subtitles
	74
	75	def _extract_format(self, video_id, formats, format_urls, meta):
	76	format_url = meta.get('url')
	77	if not format_url or not isinstance(format_url, compat_str):
	78	return
	79	if format_url in format_urls:
	80	return
	81	format_urls.add(format_url)
	82	mime_type = meta.get('mimeType')
	83	ext = determine_ext(format_url)
	84	if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
	85	formats.extend(self._extract_m3u8_formats(
	86	format_url, video_id, 'mp4', m3u8_id='hls',
	87	entry_protocol='m3u8_native', fatal=False))
	88	elif mime_type == 'application/f4m+xml' or ext == 'f4m':
	89	formats.extend(self._extract_f4m_formats(
	90	update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
	91	else:
	92	f = parse_codecs(meta.get('mimeCodec'))
	93	format_id = ['http']
	94	for p in (meta.get('type'), meta.get('quality')):
	95	if p and isinstance(p, compat_str):
	96	format_id.append(p)
	97	f.update({
	98	'url': format_url,
	99	'format_id': '-'.join(format_id),
	100	'format_note': meta.get('quality'),
	101	'language': meta.get('language'),
	102	'quality': qualities(self._QUALITIES)(meta.get('quality')),
	103	'preference': -10,
	104	})
	105	formats.append(f)
	106
	107	def _extract_entry(self, url, player, content, video_id):
	108	title = content.get('title') or content['teaserHeadline']
	109
	110	t = content['mainVideoContent']['http://zdf.de/rels/target']
	111
	112	ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
	113
	114	if not ptmd_path:
	115	ptmd_path = t[
	116	'http://zdf.de/rels/streams/ptmd-template'].replace(
	117	'{playerId}', 'portal')
	118
	119	ptmd = self._call_api(
	120	urljoin(url, ptmd_path), player, url, video_id, 'metadata')
	121
	122	formats = []
	123	track_uris = set()
	124	for p in ptmd['priorityList']:
	125	formitaeten = p.get('formitaeten')
	126	if not isinstance(formitaeten, list):
	127	continue
	128	for f in formitaeten:
	129	f_qualities = f.get('qualities')
	130	if not isinstance(f_qualities, list):
	131	continue
	132	for quality in f_qualities:
	133	tracks = try_get(quality, lambda x: x['audio']['tracks'], list)
	134	if not tracks:
	135	continue
	136	for track in tracks:
	137	self._extract_format(
	138	video_id, formats, track_uris, {
	139	'url': track.get('uri'),
	140	'type': f.get('type'),
	141	'mimeType': f.get('mimeType'),
	142	'quality': quality.get('quality'),
	143	'language': track.get('language'),
	144	})
	145	self._sort_formats(formats)
	146
	147	thumbnails = []
	148	layouts = try_get(
	149	content, lambda x: x['teaserImageRef']['layouts'], dict)
	150	if layouts:
	151	for layout_key, layout_url in layouts.items():
	152	if not isinstance(layout_url, compat_str):
	153	continue
	154	thumbnail = {
	155	'url': layout_url,
	156	'format_id': layout_key,
	157	}
	158	mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
	159	if mobj:
	160	thumbnail.update({
	161	'width': int(mobj.group('width')),
	162	'height': int(mobj.group('height')),
	163	})
	164	thumbnails.append(thumbnail)
	165
	166	return {
	167	'id': video_id,
	168	'title': title,
	169	'description': content.get('leadParagraph') or content.get('teasertext'),
	170	'duration': int_or_none(t.get('duration')),
	171	'timestamp': unified_timestamp(content.get('editorialDate')),
	172	'thumbnails': thumbnails,
	173	'subtitles': self._extract_subtitles(ptmd),
	174	'formats': formats,
	175	}
	176
	177	def _extract_regular(self, url, player, video_id):
	178	content = self._call_api(
	179	player['content'], player, url, video_id, 'content')
	180	return self._extract_entry(player['content'], player, content, video_id)
	181
	182	def _extract_mobile(self, video_id):
	183	document = self._download_json(
	184	'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
	185	video_id)['document']
	186
	187	title = document['titel']
	188
	189	formats = []
	190	format_urls = set()
	191	for f in document['formitaeten']:
	192	self._extract_format(video_id, formats, format_urls, f)
	193	self._sort_formats(formats)
	194
	195	thumbnails = []
	196	teaser_bild = document.get('teaserBild')
	197	if isinstance(teaser_bild, dict):
	198	for thumbnail_key, thumbnail in teaser_bild.items():
	199	thumbnail_url = try_get(
	200	thumbnail, lambda x: x['url'], compat_str)
	201	if thumbnail_url:
	202	thumbnails.append({
	203	'url': thumbnail_url,
	204	'id': thumbnail_key,
	205	'width': int_or_none(thumbnail.get('width')),
	206	'height': int_or_none(thumbnail.get('height')),
	207	})
	208
	209	return {
	210	'id': video_id,
	211	'title': title,
	212	'description': document.get('beschreibung'),
	213	'duration': int_or_none(document.get('length')),
	214	'timestamp': unified_timestamp(try_get(
	215	document, lambda x: x['meta']['editorialDate'], compat_str)),
	216	'thumbnails': thumbnails,
	217	'subtitles': self._extract_subtitles(document),
	218	'formats': formats,
	219	}
	220
	221	def _real_extract(self, url):
	222	video_id = self._match_id(url)
	223
	224	webpage = self._download_webpage(url, video_id, fatal=False)
	225	if webpage:
	226	player = self._extract_player(webpage, url, fatal=False)
	227	if player:
	228	return self._extract_regular(url, player, video_id)
	229
	230	return self._extract_mobile(video_id)
	231
	232
	233	class ZDFChannelIE(ZDFBaseIE):
	234	_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
	235	_TESTS = [{
	236	'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
	237	'info_dict': {
	238	'id': 'das-aktuelle-sportstudio',
	239	'title': 'das aktuelle sportstudio \| ZDF',
	240	},
	241	'playlist_count': 21,
	242	}, {
	243	'url': 'https://www.zdf.de/dokumentation/planet-e',
	244	'info_dict': {
	245	'id': 'planet-e',
	246	'title': 'planet e.',
	247	},
	248	'playlist_count': 4,
	249	}, {
	250	'url': 'https://www.zdf.de/filme/taunuskrimi/',
	251	'only_matching': True,
	252	}]
	253
	254	@classmethod
	255	def suitable(cls, url):
	256	return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
	257
	258	def _real_extract(self, url):
	259	channel_id = self._match_id(url)
	260
	261	webpage = self._download_webpage(url, channel_id)
	262
	263	entries = [
	264	self.url_result(item_url, ie=ZDFIE.ie_key())
	265	for item_url in orderedSet(re.findall(
	266	r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
	267
	268	return self.playlist_result(
	269	entries, channel_id, self._og_search_title(webpage, fatal=False))
	270
	271	r"""
	272	player = self._extract_player(webpage, channel_id)
	273
	274	channel_id = self._search_regex(
	275	r'docId\s:\s(["\'])(?P<id>(?!\1).+?)\1', webpage,
	276	'channel id', group='id')
	277
	278	channel = self._call_api(
	279	'https://api.zdf.de/content/documents/%s.json' % channel_id,
	280	player, url, channel_id)
	281
	282	items = []
	283	for module in channel['module']:
	284	for teaser in try_get(module, lambda x: x['teaser'], list) or []:
	285	t = try_get(
	286	teaser, lambda x: x['http://zdf.de/rels/target'], dict)
	287	if not t:
	288	continue
	289	items.extend(try_get(
	290	t,
	291	lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
	292	list) or [])
	293	items.extend(try_get(
	294	module,
	295	lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
	296	list) or [])
	297
	298	entries = []
	299	entry_urls = set()
	300	for item in items:
	301	t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
	302	if not t:
	303	continue
	304	sharing_url = t.get('http://zdf.de/rels/sharing-url')
	305	if not sharing_url or not isinstance(sharing_url, compat_str):
	306	continue
	307	if sharing_url in entry_urls:
	308	continue
	309	entry_urls.add(sharing_url)
	310	entries.append(self.url_result(
	311	sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
	312
	313	return self.playlist_result(entries, channel_id, channel.get('title'))
	314	"""