jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import compat_str
	8	from ..utils import (
	9	determine_ext,
	10	int_or_none,
	11	NO_DEFAULT,
	12	orderedSet,
	13	parse_codecs,
	14	qualities,
	15	try_get,
	16	unified_timestamp,
	17	update_url_query,
	18	url_or_none,
	19	urljoin,
	20	)
	21
	22
	23	class ZDFBaseIE(InfoExtractor):
	24	def _call_api(self, url, player, referrer, video_id, item):
	25	return self._download_json(
	26	url, video_id, 'Downloading JSON %s' % item,
	27	headers={
	28	'Referer': referrer,
	29	'Api-Auth': 'Bearer %s' % player['apiToken'],
	30	})
	31
	32	def _extract_player(self, webpage, video_id, fatal=True):
	33	return self._parse_json(
	34	self._search_regex(
	35	r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
	36	'player JSON', default='{}' if not fatal else NO_DEFAULT,
	37	group='json'),
	38	video_id)
	39
	40
	41	class ZDFIE(ZDFBaseIE):
	42	IE_NAME = "ZDF-3sat"
	43	_VALID_URL = r'https?://www\.(zdf\|3sat)\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
	44	_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
	45	_GEO_COUNTRIES = ['DE']
	46
	47	_TESTS = [{
	48	'url': 'https://www.3sat.de/wissen/wissenschaftsdoku/luxusgut-lebensraum-100.html',
	49	'info_dict': {
	50	'id': 'luxusgut-lebensraum-100',
	51	'ext': 'mp4',
	52	'title': 'Luxusgut Lebensraum',
	53	'description': 'md5:5c09b2f45ac3bc5233d1b50fc543d061',
	54	'duration': 2601,
	55	'timestamp': 1566497700,
	56	'upload_date': '20190822',
	57	}
	58	}, {
	59	'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
	60	'info_dict': {
	61	'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
	62	'ext': 'mp4',
	63	'title': 'Die Magie der Farben (2/2)',
	64	'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
	65	'duration': 2615,
	66	'timestamp': 1465021200,
	67	'upload_date': '20160604',
	68	},
	69	}, {
	70	'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
	71	'only_matching': True,
	72	}, {
	73	'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
	74	'only_matching': True,
	75	}, {
	76	'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
	77	'only_matching': True,
	78	}]
	79
	80	@staticmethod
	81	def _extract_subtitles(src):
	82	subtitles = {}
	83	for caption in try_get(src, lambda x: x['captions'], list) or []:
	84	subtitle_url = url_or_none(caption.get('uri'))
	85	if subtitle_url:
	86	lang = caption.get('language', 'deu')
	87	subtitles.setdefault(lang, []).append({
	88	'url': subtitle_url,
	89	})
	90	return subtitles
	91
	92	def _extract_format(self, video_id, formats, format_urls, meta):
	93	format_url = url_or_none(meta.get('url'))
	94	if not format_url:
	95	return
	96	if format_url in format_urls:
	97	return
	98	format_urls.add(format_url)
	99	mime_type = meta.get('mimeType')
	100	ext = determine_ext(format_url)
	101	if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
	102	formats.extend(self._extract_m3u8_formats(
	103	format_url, video_id, 'mp4', m3u8_id='hls',
	104	entry_protocol='m3u8_native', fatal=False))
	105	elif mime_type == 'application/f4m+xml' or ext == 'f4m':
	106	formats.extend(self._extract_f4m_formats(
	107	update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
	108	else:
	109	f = parse_codecs(meta.get('mimeCodec'))
	110	format_id = ['http']
	111	for p in (meta.get('type'), meta.get('quality')):
	112	if p and isinstance(p, compat_str):
	113	format_id.append(p)
	114	f.update({
	115	'url': format_url,
	116	'format_id': '-'.join(format_id),
	117	'format_note': meta.get('quality'),
	118	'language': meta.get('language'),
	119	'quality': qualities(self._QUALITIES)(meta.get('quality')),
	120	'preference': -10,
	121	})
	122	formats.append(f)
	123
	124	def _extract_entry(self, url, player, content, video_id):
	125	title = content.get('title') or content['teaserHeadline']
	126
	127	t = content['mainVideoContent']['http://zdf.de/rels/target']
	128
	129	ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
	130
	131	if not ptmd_path:
	132	ptmd_path = t[
	133	'http://zdf.de/rels/streams/ptmd-template'].replace(
	134	'{playerId}', 'ngplayer_2_4')
	135
	136	ptmd = self._call_api(
	137	urljoin(url, ptmd_path), player, url, video_id, 'metadata')
	138
	139	formats = []
	140	track_uris = set()
	141	for p in ptmd['priorityList']:
	142	formitaeten = p.get('formitaeten')
	143	if not isinstance(formitaeten, list):
	144	continue
	145	for f in formitaeten:
	146	f_qualities = f.get('qualities')
	147	if not isinstance(f_qualities, list):
	148	continue
	149	for quality in f_qualities:
	150	tracks = try_get(quality, lambda x: x['audio']['tracks'], list)
	151	if not tracks:
	152	continue
	153	for track in tracks:
	154	self._extract_format(
	155	video_id, formats, track_uris, {
	156	'url': track.get('uri'),
	157	'type': f.get('type'),
	158	'mimeType': f.get('mimeType'),
	159	'quality': quality.get('quality'),
	160	'language': track.get('language'),
	161	})
	162	self._sort_formats(formats)
	163
	164	thumbnails = []
	165	layouts = try_get(
	166	content, lambda x: x['teaserImageRef']['layouts'], dict)
	167	if layouts:
	168	for layout_key, layout_url in layouts.items():
	169	layout_url = url_or_none(layout_url)
	170	if not layout_url:
	171	continue
	172	thumbnail = {
	173	'url': layout_url,
	174	'format_id': layout_key,
	175	}
	176	mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
	177	if mobj:
	178	thumbnail.update({
	179	'width': int(mobj.group('width')),
	180	'height': int(mobj.group('height')),
	181	})
	182	thumbnails.append(thumbnail)
	183
	184	return {
	185	'id': video_id,
	186	'title': title,
	187	'description': content.get('leadParagraph') or content.get('teasertext'),
	188	'duration': int_or_none(t.get('duration')),
	189	'timestamp': unified_timestamp(content.get('editorialDate')),
	190	'thumbnails': thumbnails,
	191	'subtitles': self._extract_subtitles(ptmd),
	192	'formats': formats,
	193	}
	194
	195	def _extract_regular(self, url, player, video_id):
	196	content = self._call_api(
	197	player['content'], player, url, video_id, 'content')
	198	return self._extract_entry(player['content'], player, content, video_id)
	199
	200	def _extract_mobile(self, video_id):
	201	document = self._download_json(
	202	'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
	203	video_id)['document']
	204
	205	title = document['titel']
	206
	207	formats = []
	208	format_urls = set()
	209	for f in document['formitaeten']:
	210	self._extract_format(video_id, formats, format_urls, f)
	211	self._sort_formats(formats)
	212
	213	thumbnails = []
	214	teaser_bild = document.get('teaserBild')
	215	if isinstance(teaser_bild, dict):
	216	for thumbnail_key, thumbnail in teaser_bild.items():
	217	thumbnail_url = try_get(
	218	thumbnail, lambda x: x['url'], compat_str)
	219	if thumbnail_url:
	220	thumbnails.append({
	221	'url': thumbnail_url,
	222	'id': thumbnail_key,
	223	'width': int_or_none(thumbnail.get('width')),
	224	'height': int_or_none(thumbnail.get('height')),
	225	})
	226
	227	return {
	228	'id': video_id,
	229	'title': title,
	230	'description': document.get('beschreibung'),
	231	'duration': int_or_none(document.get('length')),
	232	'timestamp': unified_timestamp(try_get(
	233	document, lambda x: x['meta']['editorialDate'], compat_str)),
	234	'thumbnails': thumbnails,
	235	'subtitles': self._extract_subtitles(document),
	236	'formats': formats,
	237	}
	238
	239	def _real_extract(self, url):
	240	video_id = self._match_id(url)
	241
	242	webpage = self._download_webpage(url, video_id, fatal=False)
	243	if webpage:
	244	player = self._extract_player(webpage, url, fatal=False)
	245	if player:
	246	return self._extract_regular(url, player, video_id)
	247
	248	return self._extract_mobile(video_id)
	249
	250
	251	class ZDFChannelIE(ZDFBaseIE):
	252	_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
	253	_TESTS = [{
	254	'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
	255	'info_dict': {
	256	'id': 'das-aktuelle-sportstudio',
	257	'title': 'das aktuelle sportstudio \| ZDF',
	258	},
	259	'playlist_mincount': 23,
	260	}, {
	261	'url': 'https://www.zdf.de/dokumentation/planet-e',
	262	'info_dict': {
	263	'id': 'planet-e',
	264	'title': 'planet e.',
	265	},
	266	'playlist_mincount': 50,
	267	}, {
	268	'url': 'https://www.zdf.de/filme/taunuskrimi/',
	269	'only_matching': True,
	270	}]
	271
	272	@classmethod
	273	def suitable(cls, url):
	274	return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
	275
	276	def _real_extract(self, url):
	277	channel_id = self._match_id(url)
	278
	279	webpage = self._download_webpage(url, channel_id)
	280
	281	entries = [
	282	self.url_result(item_url, ie=ZDFIE.ie_key())
	283	for item_url in orderedSet(re.findall(
	284	r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
	285
	286	return self.playlist_result(
	287	entries, channel_id, self._og_search_title(webpage, fatal=False))
	288
	289	r"""
	290	player = self._extract_player(webpage, channel_id)
	291
	292	channel_id = self._search_regex(
	293	r'docId\s:\s(["\'])(?P<id>(?!\1).+?)\1', webpage,
	294	'channel id', group='id')
	295
	296	channel = self._call_api(
	297	'https://api.zdf.de/content/documents/%s.json' % channel_id,
	298	player, url, channel_id)
	299
	300	items = []
	301	for module in channel['module']:
	302	for teaser in try_get(module, lambda x: x['teaser'], list) or []:
	303	t = try_get(
	304	teaser, lambda x: x['http://zdf.de/rels/target'], dict)
	305	if not t:
	306	continue
	307	items.extend(try_get(
	308	t,
	309	lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
	310	list) or [])
	311	items.extend(try_get(
	312	module,
	313	lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
	314	list) or [])
	315
	316	entries = []
	317	entry_urls = set()
	318	for item in items:
	319	t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
	320	if not t:
	321	continue
	322	sharing_url = t.get('http://zdf.de/rels/sharing-url')
	323	if not sharing_url or not isinstance(sharing_url, compat_str):
	324	continue
	325	if sharing_url in entry_urls:
	326	continue
	327	entry_urls.add(sharing_url)
	328	entries.append(self.url_result(
	329	sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
	330
	331	return self.playlist_result(entries, channel_id, channel.get('title'))
	332	"""