jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import json
	2	import re
	3
	4	from .common import InfoExtractor
	5	from .generic import GenericIE
	6	from ..utils import (
	7	determine_ext,
	8	ExtractorError,
	9	int_or_none,
	10	parse_duration,
	11	qualities,
	12	str_or_none,
	13	try_get,
	14	unified_strdate,
	15	unified_timestamp,
	16	update_url_query,
	17	url_or_none,
	18	xpath_text,
	19	)
	20	from ..compat import compat_etree_fromstring
	21
	22
	23	class ARDMediathekBaseIE(InfoExtractor):
	24	_GEO_COUNTRIES = ['DE']
	25
	26	def _extract_media_info(self, media_info_url, webpage, video_id):
	27	media_info = self._download_json(
	28	media_info_url, video_id, 'Downloading media JSON')
	29	return self._parse_media_info(media_info, video_id, '"fsk"' in webpage)
	30
	31	def _parse_media_info(self, media_info, video_id, fsk):
	32	formats = self._extract_formats(media_info, video_id)
	33
	34	if not formats:
	35	if fsk:
	36	self.raise_no_formats(
	37	'This video is only available after 20:00', expected=True)
	38	elif media_info.get('_geoblocked'):
	39	self.raise_geo_restricted(
	40	'This video is not available due to geoblocking',
	41	countries=self._GEO_COUNTRIES, metadata_available=True)
	42
	43	self._sort_formats(formats)
	44
	45	subtitles = {}
	46	subtitle_url = media_info.get('_subtitleUrl')
	47	if subtitle_url:
	48	subtitles['de'] = [{
	49	'ext': 'ttml',
	50	'url': subtitle_url,
	51	}]
	52
	53	return {
	54	'id': video_id,
	55	'duration': int_or_none(media_info.get('_duration')),
	56	'thumbnail': media_info.get('_previewImage'),
	57	'is_live': media_info.get('_isLive') is True,
	58	'formats': formats,
	59	'subtitles': subtitles,
	60	}
	61
	62	def _ARD_extract_episode_info(self, title):
	63	"""Try to extract season/episode data from the title."""
	64	res = {}
	65	if not title:
	66	return res
	67
	68	for pattern in [
	69	# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
	70	# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
	71	r'.(?P<ep_info> $S(?P<season_number>\d+)/E(?P<episode_number>\d+)$).',
	72	# E.g.: title="Fritjof aus Norwegen (2) (AD)"
	73	# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
	74	r'.(?P<ep_info> $(?:Folge \|Teil )?(?P<episode_number>\d+)(?:/\d+)?$).',
	75	r'.(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:\| -\|) )\"(?P<episode>.+)\".',
	76	# E.g.: title="Folge 25/42: Symmetrie"
	77	# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
	78	# E.g.: title="Folge 1063 - Vertrauen"
	79	# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
	80	r'.(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:\| -\|) ).',
	81	]:
	82	m = re.match(pattern, title)
	83	if m:
	84	groupdict = m.groupdict()
	85	res['season_number'] = int_or_none(groupdict.get('season_number'))
	86	res['episode_number'] = int_or_none(groupdict.get('episode_number'))
	87	res['episode'] = str_or_none(groupdict.get('episode'))
	88	# Build the episode title by removing numeric episode information:
	89	if groupdict.get('ep_info') and not res['episode']:
	90	res['episode'] = str_or_none(
	91	title.replace(groupdict.get('ep_info'), ''))
	92	if res['episode']:
	93	res['episode'] = res['episode'].strip()
	94	break
	95
	96	# As a fallback use the whole title as the episode name:
	97	if not res.get('episode'):
	98	res['episode'] = title.strip()
	99	return res
	100
	101	def _extract_formats(self, media_info, video_id):
	102	type_ = media_info.get('_type')
	103	media_array = media_info.get('_mediaArray', [])
	104	formats = []
	105	for num, media in enumerate(media_array):
	106	for stream in media.get('_mediaStreamArray', []):
	107	stream_urls = stream.get('_stream')
	108	if not stream_urls:
	109	continue
	110	if not isinstance(stream_urls, list):
	111	stream_urls = [stream_urls]
	112	quality = stream.get('_quality')
	113	server = stream.get('_server')
	114	for stream_url in stream_urls:
	115	if not url_or_none(stream_url):
	116	continue
	117	ext = determine_ext(stream_url)
	118	if quality != 'auto' and ext in ('f4m', 'm3u8'):
	119	continue
	120	if ext == 'f4m':
	121	formats.extend(self._extract_f4m_formats(
	122	update_url_query(stream_url, {
	123	'hdcore': '3.1.1',
	124	'plugin': 'aasp-3.1.1.69.124'
	125	}), video_id, f4m_id='hds', fatal=False))
	126	elif ext == 'm3u8':
	127	formats.extend(self._extract_m3u8_formats(
	128	stream_url, video_id, 'mp4', 'm3u8_native',
	129	m3u8_id='hls', fatal=False))
	130	else:
	131	if server and server.startswith('rtmp'):
	132	f = {
	133	'url': server,
	134	'play_path': stream_url,
	135	'format_id': 'a%s-rtmp-%s' % (num, quality),
	136	}
	137	else:
	138	f = {
	139	'url': stream_url,
	140	'format_id': 'a%s-%s-%s' % (num, ext, quality)
	141	}
	142	m = re.search(
	143	r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
	144	stream_url)
	145	if m:
	146	f.update({
	147	'width': int(m.group('width')),
	148	'height': int(m.group('height')),
	149	})
	150	if type_ == 'audio':
	151	f['vcodec'] = 'none'
	152	formats.append(f)
	153	return formats
	154
	155
	156	class ARDMediathekIE(ARDMediathekBaseIE):
	157	IE_NAME = 'ARD:mediathek'
	158	_VALID_URL = r'^https?://(?:(?:(?:www\|classic)\.)?ardmediathek\.de\|mediathek\.(?:daserste\|rbb-online)\.de\|one\.ard\.de)/(?:./)(?P<video_id>[0-9]+\|[^0-9][^/\?]+)[^/\?](?:\?.*)?'
	159
	160	_TESTS = [{
	161	# available till 26.07.2022
	162	'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
	163	'info_dict': {
	164	'id': '44726822',
	165	'ext': 'mp4',
	166	'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
	167	'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
	168	'duration': 1740,
	169	},
	170	'params': {
	171	# m3u8 download
	172	'skip_download': True,
	173	}
	174	}, {
	175	'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
	176	'only_matching': True,
	177	}, {
	178	# audio
	179	'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
	180	'only_matching': True,
	181	}, {
	182	'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
	183	'only_matching': True,
	184	}, {
	185	# audio
	186	'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
	187	'only_matching': True,
	188	}, {
	189	'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
	190	'only_matching': True,
	191	}]
	192
	193	@classmethod
	194	def suitable(cls, url):
	195	return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
	196
	197	def _real_extract(self, url):
	198	# determine video id from url
	199	m = self._match_valid_url(url)
	200
	201	document_id = None
	202
	203	numid = re.search(r'documentId=([0-9]+)', url)
	204	if numid:
	205	document_id = video_id = numid.group(1)
	206	else:
	207	video_id = m.group('video_id')
	208
	209	webpage = self._download_webpage(url, video_id)
	210
	211	ERRORS = (
	212	('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
	213	('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
	214	'Video %s is no longer available'),
	215	)
	216
	217	for pattern, message in ERRORS:
	218	if pattern in webpage:
	219	raise ExtractorError(message % video_id, expected=True)
	220
	221	if re.search(r'[\?&]rss($\|[=&])', url):
	222	doc = compat_etree_fromstring(webpage.encode('utf-8'))
	223	if doc.tag == 'rss':
	224	return GenericIE()._extract_rss(url, video_id, doc)
	225
	226	title = self._og_search_title(webpage, default=None) or self._html_search_regex(
	227	[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
	228	r'<meta name="dcterms\.title" content="(.*?)"/>',
	229	r'<h4 class="headline">(.*?)</h4>',
	230	r'<title[^>]>(.?)</title>'],
	231	webpage, 'title')
	232	description = self._og_search_description(webpage, default=None) or self._html_search_meta(
	233	'dcterms.abstract', webpage, 'description', default=None)
	234	if description is None:
	235	description = self._html_search_meta(
	236	'description', webpage, 'meta description', default=None)
	237	if description is None:
	238	description = self._html_search_regex(
	239	r'<p\s+class="teasertext">(.+?)</p>',
	240	webpage, 'teaser text', default=None)
	241
	242	# Thumbnail is sometimes not present.
	243	# It is in the mobile version, but that seems to use a different URL
	244	# structure altogether.
	245	thumbnail = self._og_search_thumbnail(webpage, default=None)
	246
	247	media_streams = re.findall(r'''(?x)
	248	mediaCollection\.addMediaStream\([0-9]+,\s[0-9]+,\s"[^"]",\s
	249	"([^"]+)"''', webpage)
	250
	251	if media_streams:
	252	QUALITIES = qualities(['lo', 'hi', 'hq'])
	253	formats = []
	254	for furl in set(media_streams):
	255	if furl.endswith('.f4m'):
	256	fid = 'f4m'
	257	else:
	258	fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
	259	fid = fid_m.group(1) if fid_m else None
	260	formats.append({
	261	'quality': QUALITIES(fid),
	262	'format_id': fid,
	263	'url': furl,
	264	})
	265	self._sort_formats(formats)
	266	info = {
	267	'formats': formats,
	268	}
	269	else: # request JSON file
	270	if not document_id:
	271	video_id = self._search_regex(
	272	(r'/play/(?:config\|media\|sola)/(\d+)', r'contentId["\']\s:\s(\d+)'),
	273	webpage, 'media id', default=None)
	274	info = self._extract_media_info(
	275	'http://www.ardmediathek.de/play/media/%s' % video_id,
	276	webpage, video_id)
	277
	278	info.update({
	279	'id': video_id,
	280	'title': title,
	281	'description': description,
	282	'thumbnail': thumbnail,
	283	})
	284	info.update(self._ARD_extract_episode_info(info['title']))
	285
	286	return info
	287
	288
	289	class ARDIE(InfoExtractor):
	290	_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
	291	_TESTS = [{
	292	# available till 7.01.2022
	293	'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
	294	'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
	295	'info_dict': {
	296	'id': 'maischberger-die-woche-video100',
	297	'display_id': 'maischberger-die-woche-video100',
	298	'ext': 'mp4',
	299	'duration': 3687.0,
	300	'title': 'maischberger. die woche vom 7. Januar 2021',
	301	'upload_date': '20210107',
	302	'thumbnail': r're:^https?://.*\.jpg$',
	303	},
	304	}, {
	305	'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
	306	'only_matching': True,
	307	}, {
	308	'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
	309	'only_matching': True,
	310	}, {
	311	'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/videos/diversity-tag-sanam-afrashteh100.html',
	312	'only_matching': True,
	313	}, {
	314	'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
	315	'only_matching': True,
	316	}, {
	317	'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
	318	'only_matching': True,
	319	}, {
	320	'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
	321	'only_matching': True,
	322	}]
	323
	324	def _real_extract(self, url):
	325	mobj = self._match_valid_url(url)
	326	display_id = mobj.group('id')
	327
	328	player_url = mobj.group('mainurl') + '~playerXml.xml'
	329	doc = self._download_xml(player_url, display_id)
	330	video_node = doc.find('./video')
	331	upload_date = unified_strdate(xpath_text(
	332	video_node, './broadcastDate'))
	333	thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
	334
	335	formats = []
	336	for a in video_node.findall('.//asset'):
	337	file_name = xpath_text(a, './fileName', default=None)
	338	if not file_name:
	339	continue
	340	format_type = a.attrib.get('type')
	341	format_url = url_or_none(file_name)
	342	if format_url:
	343	ext = determine_ext(file_name)
	344	if ext == 'm3u8':
	345	formats.extend(self._extract_m3u8_formats(
	346	format_url, display_id, 'mp4', entry_protocol='m3u8_native',
	347	m3u8_id=format_type or 'hls', fatal=False))
	348	continue
	349	elif ext == 'f4m':
	350	formats.extend(self._extract_f4m_formats(
	351	update_url_query(format_url, {'hdcore': '3.7.0'}),
	352	display_id, f4m_id=format_type or 'hds', fatal=False))
	353	continue
	354	f = {
	355	'format_id': format_type,
	356	'width': int_or_none(xpath_text(a, './frameWidth')),
	357	'height': int_or_none(xpath_text(a, './frameHeight')),
	358	'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
	359	'abr': int_or_none(xpath_text(a, './bitrateAudio')),
	360	'vcodec': xpath_text(a, './codecVideo'),
	361	'tbr': int_or_none(xpath_text(a, './totalBitrate')),
	362	}
	363	server_prefix = xpath_text(a, './serverPrefix', default=None)
	364	if server_prefix:
	365	f.update({
	366	'url': server_prefix,
	367	'playpath': file_name,
	368	})
	369	else:
	370	if not format_url:
	371	continue
	372	f['url'] = format_url
	373	formats.append(f)
	374	self._sort_formats(formats)
	375
	376	_SUB_FORMATS = (
	377	('./dataTimedText', 'ttml'),
	378	('./dataTimedTextNoOffset', 'ttml'),
	379	('./dataTimedTextVtt', 'vtt'),
	380	)
	381
	382	subtitles = {}
	383	for subsel, subext in _SUB_FORMATS:
	384	for node in video_node.findall(subsel):
	385	subtitles.setdefault('de', []).append({
	386	'url': node.attrib['url'],
	387	'ext': subext,
	388	})
	389
	390	return {
	391	'id': xpath_text(video_node, './videoId', default=display_id),
	392	'formats': formats,
	393	'subtitles': subtitles,
	394	'display_id': display_id,
	395	'title': video_node.find('./title').text,
	396	'duration': parse_duration(video_node.find('./duration').text),
	397	'upload_date': upload_date,
	398	'thumbnail': thumbnail,
	399	}
	400
	401
	402	class ARDBetaMediathekIE(ARDMediathekBaseIE):
	403	_VALID_URL = r'''(?x)https://
	404	(?:(?:beta\|www)\.)?ardmediathek\.de/
	405	(?:(?P<client>[^/]+)/)?
	406	(?:player\|live\|video\|(?P<playlist>sendung\|sammlung))/
	407	(?:(?P<display_id>(?(playlist)[^?#]+?\|[^?#]+))/)?
	408	(?P<id>(?(playlist)\|Y3JpZDovL)[a-zA-Z0-9]+)
	409	(?(playlist)/(?P<season>\d+)?/?(?:[?#]\|$))'''
	410
	411	_TESTS = [{
	412	'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
	413	'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
	414	'info_dict': {
	415	'display_id': 'die-robuste-roswita',
	416	'id': '78566716',
	417	'title': 'Die robuste Roswita',
	418	'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
	419	'duration': 5316,
	420	'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
	421	'timestamp': 1596658200,
	422	'upload_date': '20200805',
	423	'ext': 'mp4',
	424	},
	425	'skip': 'Error',
	426	}, {
	427	'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
	428	'md5': 'f1837e563323b8a642a8ddeff0131f51',
	429	'info_dict': {
	430	'id': '10049223',
	431	'ext': 'mp4',
	432	'title': 'tagesschau, 20:00 Uhr',
	433	'timestamp': 1636398000,
	434	'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
	435	'upload_date': '20211108',
	436	},
	437	}, {
	438	'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1',
	439	'playlist_count': 6,
	440	'info_dict': {
	441	'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw',
	442	'title': 'beforeigners/beforeigners/staffel-1',
	443	},
	444	}, {
	445	'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
	446	'only_matching': True,
	447	}, {
	448	'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
	449	'only_matching': True,
	450	}, {
	451	'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
	452	'only_matching': True,
	453	}, {
	454	'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
	455	'only_matching': True,
	456	}, {
	457	'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
	458	'only_matching': True,
	459	}, {
	460	# playlist of type 'sendung'
	461	'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
	462	'only_matching': True,
	463	}, {
	464	# playlist of type 'sammlung'
	465	'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
	466	'only_matching': True,
	467	}, {
	468	'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
	469	'only_matching': True,
	470	}, {
	471	'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
	472	'only_matching': True,
	473	}]
	474
	475	def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
	476	""" Query the ARD server for playlist information
	477	and returns the data in "raw" format """
	478	if mode == 'sendung':
	479	graphQL = json.dumps({
	480	'query': '''{
	481	showPage(
	482	client: "%s"
	483	showId: "%s"
	484	pageNumber: %d
	485	) {
	486	pagination {
	487	pageSize
	488	totalElements
	489	}
	490	teasers { # Array
	491	mediumTitle
	492	links { target { id href title } }
	493	type
	494	}
	495	}}''' % (client, playlist_id, pageNumber),
	496	}).encode()
	497	else: # mode == 'sammlung'
	498	graphQL = json.dumps({
	499	'query': '''{
	500	morePage(
	501	client: "%s"
	502	compilationId: "%s"
	503	pageNumber: %d
	504	) {
	505	widget {
	506	pagination {
	507	pageSize
	508	totalElements
	509	}
	510	teasers { # Array
	511	mediumTitle
	512	links { target { id href title } }
	513	type
	514	}
	515	}
	516	}}''' % (client, playlist_id, pageNumber),
	517	}).encode()
	518	# Ressources for ARD graphQL debugging:
	519	# https://api-test.ardmediathek.de/public-gateway
	520	show_page = self._download_json(
	521	'https://api.ardmediathek.de/public-gateway',
	522	'[Playlist] %s' % display_id,
	523	data=graphQL,
	524	headers={'Content-Type': 'application/json'})['data']
	525	# align the structure of the returned data:
	526	if mode == 'sendung':
	527	show_page = show_page['showPage']
	528	else: # mode == 'sammlung'
	529	show_page = show_page['morePage']['widget']
	530	return show_page
	531
	532	def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
	533	""" Collects all playlist entries and returns them as info dict.
	534	Supports playlists of mode 'sendung' and 'sammlung', and also nested
	535	playlists. """
	536	entries = []
	537	pageNumber = 0
	538	while True: # iterate by pageNumber
	539	show_page = self._ARD_load_playlist_snipped(
	540	playlist_id, display_id, client, mode, pageNumber)
	541	for teaser in show_page['teasers']: # process playlist items
	542	if '/compilation/' in teaser['links']['target']['href']:
	543	# alternativ cond.: teaser['type'] == "compilation"
	544	# => This is an nested compilation, e.g. like:
	545	# https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/
	546	link_mode = 'sammlung'
	547	else:
	548	link_mode = 'video'
	549
	550	item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % (
	551	client, link_mode, display_id,
	552	# perform HTLM quoting of episode title similar to ARD:
	553	re.sub('^-\|-$', '', # remove '-' from begin/end
	554	re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by -
	555	teaser['links']['target']['title'].lower()
	556	.replace('ä', 'ae').replace('ö', 'oe')
	557	.replace('ü', 'ue').replace('ß', 'ss'))),
	558	teaser['links']['target']['id'])
	559	entries.append(self.url_result(
	560	item_url,
	561	ie=ARDBetaMediathekIE.ie_key()))
	562
	563	if (show_page['pagination']['pageSize'] * (pageNumber + 1)
	564	>= show_page['pagination']['totalElements']):
	565	# we've processed enough pages to get all playlist entries
	566	break
	567	pageNumber = pageNumber + 1
	568
	569	return self.playlist_result(entries, playlist_id, playlist_title=display_id)
	570
	571	def _real_extract(self, url):
	572	video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group(
	573	'id', 'display_id', 'playlist', 'client', 'season')
	574	display_id, client = display_id or video_id, client or 'ard'
	575
	576	if playlist_type:
	577	# TODO: Extract only specified season
	578	return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
	579
	580	player_page = self._download_json(
	581	'https://api.ardmediathek.de/public-gateway',
	582	display_id, data=json.dumps({
	583	'query': '''{
	584	playerPage(client:"%s", clipId: "%s") {
	585	blockedByFsk
	586	broadcastedOn
	587	maturityContentRating
	588	mediaCollection {
	589	_duration
	590	_geoblocked
	591	_isLive
	592	_mediaArray {
	593	_mediaStreamArray {
	594	_quality
	595	_server
	596	_stream
	597	}
	598	}
	599	_previewImage
	600	_subtitleUrl
	601	_type
	602	}
	603	show {
	604	title
	605	}
	606	synopsis
	607	title
	608	tracking {
	609	atiCustomVars {
	610	contentId
	611	}
	612	}
	613	}
	614	}''' % (client, video_id),
	615	}).encode(), headers={
	616	'Content-Type': 'application/json'
	617	})['data']['playerPage']
	618	title = player_page['title']
	619	content_id = str_or_none(try_get(
	620	player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
	621	media_collection = player_page.get('mediaCollection') or {}
	622	if not media_collection and content_id:
	623	media_collection = self._download_json(
	624	'https://www.ardmediathek.de/play/media/' + content_id,
	625	content_id, fatal=False) or {}
	626	info = self._parse_media_info(
	627	media_collection, content_id or video_id,
	628	player_page.get('blockedByFsk'))
	629	age_limit = None
	630	description = player_page.get('synopsis')
	631	maturity_content_rating = player_page.get('maturityContentRating')
	632	if maturity_content_rating:
	633	age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
	634	if not age_limit and description:
	635	age_limit = int_or_none(self._search_regex(
	636	r'$FSK\s(\d+)$\s$', description, 'age limit', default=None))
	637	info.update({
	638	'age_limit': age_limit,
	639	'display_id': display_id,
	640	'title': title,
	641	'description': description,
	642	'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
	643	'series': try_get(player_page, lambda x: x['show']['title']),
	644	})
	645	info.update(self._ARD_extract_episode_info(info['title']))
	646	return info