jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import json
	5	import re
	6
	7	from .common import InfoExtractor
	8	from .generic import GenericIE
	9	from ..utils import (
	10	determine_ext,
	11	ExtractorError,
	12	int_or_none,
	13	parse_duration,
	14	qualities,
	15	str_or_none,
	16	try_get,
	17	unified_strdate,
	18	unified_timestamp,
	19	update_url_query,
	20	url_or_none,
	21	xpath_text,
	22	)
	23	from ..compat import compat_etree_fromstring
	24
	25
	26	class ARDMediathekBaseIE(InfoExtractor):
	27	_GEO_COUNTRIES = ['DE']
	28
	29	def _extract_media_info(self, media_info_url, webpage, video_id):
	30	media_info = self._download_json(
	31	media_info_url, video_id, 'Downloading media JSON')
	32	return self._parse_media_info(media_info, video_id, '"fsk"' in webpage)
	33
	34	def _parse_media_info(self, media_info, video_id, fsk):
	35	formats = self._extract_formats(media_info, video_id)
	36
	37	if not formats:
	38	if fsk:
	39	self.raise_no_formats(
	40	'This video is only available after 20:00', expected=True)
	41	elif media_info.get('_geoblocked'):
	42	self.raise_geo_restricted(
	43	'This video is not available due to geoblocking',
	44	countries=self._GEO_COUNTRIES, metadata_available=True)
	45
	46	self._sort_formats(formats)
	47
	48	subtitles = {}
	49	subtitle_url = media_info.get('_subtitleUrl')
	50	if subtitle_url:
	51	subtitles['de'] = [{
	52	'ext': 'ttml',
	53	'url': subtitle_url,
	54	}]
	55
	56	return {
	57	'id': video_id,
	58	'duration': int_or_none(media_info.get('_duration')),
	59	'thumbnail': media_info.get('_previewImage'),
	60	'is_live': media_info.get('_isLive') is True,
	61	'formats': formats,
	62	'subtitles': subtitles,
	63	}
	64
	65	def _ARD_extract_episode_info(self, title):
	66	"""Try to extract season/episode data from the title."""
	67	res = {}
	68	if not title:
	69	return res
	70
	71	for pattern in [
	72	# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
	73	# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
	74	r'.(?P<ep_info> $S(?P<season_number>\d+)/E(?P<episode_number>\d+)$).',
	75	# E.g.: title="Fritjof aus Norwegen (2) (AD)"
	76	# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
	77	r'.(?P<ep_info> $(?:Folge \|Teil )?(?P<episode_number>\d+)(?:/\d+)?$).',
	78	r'.(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:\| -\|) )\"(?P<episode>.+)\".',
	79	# E.g.: title="Folge 25/42: Symmetrie"
	80	# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
	81	# E.g.: title="Folge 1063 - Vertrauen"
	82	# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
	83	r'.(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:\| -\|) ).',
	84	]:
	85	m = re.match(pattern, title)
	86	if m:
	87	groupdict = m.groupdict()
	88	res['season_number'] = int_or_none(groupdict.get('season_number'))
	89	res['episode_number'] = int_or_none(groupdict.get('episode_number'))
	90	res['episode'] = str_or_none(groupdict.get('episode'))
	91	# Build the episode title by removing numeric episode information:
	92	if groupdict.get('ep_info') and not res['episode']:
	93	res['episode'] = str_or_none(
	94	title.replace(groupdict.get('ep_info'), ''))
	95	if res['episode']:
	96	res['episode'] = res['episode'].strip()
	97	break
	98
	99	# As a fallback use the whole title as the episode name:
	100	if not res.get('episode'):
	101	res['episode'] = title.strip()
	102	return res
	103
	104	def _extract_formats(self, media_info, video_id):
	105	type_ = media_info.get('_type')
	106	media_array = media_info.get('_mediaArray', [])
	107	formats = []
	108	for num, media in enumerate(media_array):
	109	for stream in media.get('_mediaStreamArray', []):
	110	stream_urls = stream.get('_stream')
	111	if not stream_urls:
	112	continue
	113	if not isinstance(stream_urls, list):
	114	stream_urls = [stream_urls]
	115	quality = stream.get('_quality')
	116	server = stream.get('_server')
	117	for stream_url in stream_urls:
	118	if not url_or_none(stream_url):
	119	continue
	120	ext = determine_ext(stream_url)
	121	if quality != 'auto' and ext in ('f4m', 'm3u8'):
	122	continue
	123	if ext == 'f4m':
	124	formats.extend(self._extract_f4m_formats(
	125	update_url_query(stream_url, {
	126	'hdcore': '3.1.1',
	127	'plugin': 'aasp-3.1.1.69.124'
	128	}), video_id, f4m_id='hds', fatal=False))
	129	elif ext == 'm3u8':
	130	formats.extend(self._extract_m3u8_formats(
	131	stream_url, video_id, 'mp4', 'm3u8_native',
	132	m3u8_id='hls', fatal=False))
	133	else:
	134	if server and server.startswith('rtmp'):
	135	f = {
	136	'url': server,
	137	'play_path': stream_url,
	138	'format_id': 'a%s-rtmp-%s' % (num, quality),
	139	}
	140	else:
	141	f = {
	142	'url': stream_url,
	143	'format_id': 'a%s-%s-%s' % (num, ext, quality)
	144	}
	145	m = re.search(
	146	r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
	147	stream_url)
	148	if m:
	149	f.update({
	150	'width': int(m.group('width')),
	151	'height': int(m.group('height')),
	152	})
	153	if type_ == 'audio':
	154	f['vcodec'] = 'none'
	155	formats.append(f)
	156	return formats
	157
	158
	159	class ARDMediathekIE(ARDMediathekBaseIE):
	160	IE_NAME = 'ARD:mediathek'
	161	_VALID_URL = r'^https?://(?:(?:(?:www\|classic)\.)?ardmediathek\.de\|mediathek\.(?:daserste\|rbb-online)\.de\|one\.ard\.de)/(?:./)(?P<video_id>[0-9]+\|[^0-9][^/\?]+)[^/\?](?:\?.*)?'
	162
	163	_TESTS = [{
	164	# available till 26.07.2022
	165	'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
	166	'info_dict': {
	167	'id': '44726822',
	168	'ext': 'mp4',
	169	'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
	170	'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
	171	'duration': 1740,
	172	},
	173	'params': {
	174	# m3u8 download
	175	'skip_download': True,
	176	}
	177	}, {
	178	'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
	179	'only_matching': True,
	180	}, {
	181	# audio
	182	'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
	183	'only_matching': True,
	184	}, {
	185	'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
	186	'only_matching': True,
	187	}, {
	188	# audio
	189	'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
	190	'only_matching': True,
	191	}, {
	192	'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
	193	'only_matching': True,
	194	}]
	195
	196	@classmethod
	197	def suitable(cls, url):
	198	return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
	199
	200	def _real_extract(self, url):
	201	# determine video id from url
	202	m = self._match_valid_url(url)
	203
	204	document_id = None
	205
	206	numid = re.search(r'documentId=([0-9]+)', url)
	207	if numid:
	208	document_id = video_id = numid.group(1)
	209	else:
	210	video_id = m.group('video_id')
	211
	212	webpage = self._download_webpage(url, video_id)
	213
	214	ERRORS = (
	215	('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
	216	('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
	217	'Video %s is no longer available'),
	218	)
	219
	220	for pattern, message in ERRORS:
	221	if pattern in webpage:
	222	raise ExtractorError(message % video_id, expected=True)
	223
	224	if re.search(r'[\?&]rss($\|[=&])', url):
	225	doc = compat_etree_fromstring(webpage.encode('utf-8'))
	226	if doc.tag == 'rss':
	227	return GenericIE()._extract_rss(url, video_id, doc)
	228
	229	title = self._og_search_title(webpage, default=None) or self._html_search_regex(
	230	[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
	231	r'<meta name="dcterms\.title" content="(.*?)"/>',
	232	r'<h4 class="headline">(.*?)</h4>',
	233	r'<title[^>]>(.?)</title>'],
	234	webpage, 'title')
	235	description = self._og_search_description(webpage, default=None) or self._html_search_meta(
	236	'dcterms.abstract', webpage, 'description', default=None)
	237	if description is None:
	238	description = self._html_search_meta(
	239	'description', webpage, 'meta description', default=None)
	240	if description is None:
	241	description = self._html_search_regex(
	242	r'<p\s+class="teasertext">(.+?)</p>',
	243	webpage, 'teaser text', default=None)
	244
	245	# Thumbnail is sometimes not present.
	246	# It is in the mobile version, but that seems to use a different URL
	247	# structure altogether.
	248	thumbnail = self._og_search_thumbnail(webpage, default=None)
	249
	250	media_streams = re.findall(r'''(?x)
	251	mediaCollection\.addMediaStream\([0-9]+,\s[0-9]+,\s"[^"]",\s
	252	"([^"]+)"''', webpage)
	253
	254	if media_streams:
	255	QUALITIES = qualities(['lo', 'hi', 'hq'])
	256	formats = []
	257	for furl in set(media_streams):
	258	if furl.endswith('.f4m'):
	259	fid = 'f4m'
	260	else:
	261	fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
	262	fid = fid_m.group(1) if fid_m else None
	263	formats.append({
	264	'quality': QUALITIES(fid),
	265	'format_id': fid,
	266	'url': furl,
	267	})
	268	self._sort_formats(formats)
	269	info = {
	270	'formats': formats,
	271	}
	272	else: # request JSON file
	273	if not document_id:
	274	video_id = self._search_regex(
	275	(r'/play/(?:config\|media\|sola)/(\d+)', r'contentId["\']\s:\s(\d+)'),
	276	webpage, 'media id', default=None)
	277	info = self._extract_media_info(
	278	'http://www.ardmediathek.de/play/media/%s' % video_id,
	279	webpage, video_id)
	280
	281	info.update({
	282	'id': video_id,
	283	'title': title,
	284	'description': description,
	285	'thumbnail': thumbnail,
	286	})
	287	info.update(self._ARD_extract_episode_info(info['title']))
	288
	289	return info
	290
	291
	292	class ARDIE(InfoExtractor):
	293	_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
	294	_TESTS = [{
	295	# available till 7.01.2022
	296	'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
	297	'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
	298	'info_dict': {
	299	'id': 'maischberger-die-woche-video100',
	300	'display_id': 'maischberger-die-woche-video100',
	301	'ext': 'mp4',
	302	'duration': 3687.0,
	303	'title': 'maischberger. die woche vom 7. Januar 2021',
	304	'upload_date': '20210107',
	305	'thumbnail': r're:^https?://.*\.jpg$',
	306	},
	307	}, {
	308	'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
	309	'only_matching': True,
	310	}, {
	311	'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
	312	'only_matching': True,
	313	}, {
	314	'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/videos/diversity-tag-sanam-afrashteh100.html',
	315	'only_matching': True,
	316	}, {
	317	'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
	318	'only_matching': True,
	319	}, {
	320	'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
	321	'only_matching': True,
	322	}, {
	323	'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
	324	'only_matching': True,
	325	}]
	326
	327	def _real_extract(self, url):
	328	mobj = self._match_valid_url(url)
	329	display_id = mobj.group('id')
	330
	331	player_url = mobj.group('mainurl') + '~playerXml.xml'
	332	doc = self._download_xml(player_url, display_id)
	333	video_node = doc.find('./video')
	334	upload_date = unified_strdate(xpath_text(
	335	video_node, './broadcastDate'))
	336	thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
	337
	338	formats = []
	339	for a in video_node.findall('.//asset'):
	340	file_name = xpath_text(a, './fileName', default=None)
	341	if not file_name:
	342	continue
	343	format_type = a.attrib.get('type')
	344	format_url = url_or_none(file_name)
	345	if format_url:
	346	ext = determine_ext(file_name)
	347	if ext == 'm3u8':
	348	formats.extend(self._extract_m3u8_formats(
	349	format_url, display_id, 'mp4', entry_protocol='m3u8_native',
	350	m3u8_id=format_type or 'hls', fatal=False))
	351	continue
	352	elif ext == 'f4m':
	353	formats.extend(self._extract_f4m_formats(
	354	update_url_query(format_url, {'hdcore': '3.7.0'}),
	355	display_id, f4m_id=format_type or 'hds', fatal=False))
	356	continue
	357	f = {
	358	'format_id': format_type,
	359	'width': int_or_none(xpath_text(a, './frameWidth')),
	360	'height': int_or_none(xpath_text(a, './frameHeight')),
	361	'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
	362	'abr': int_or_none(xpath_text(a, './bitrateAudio')),
	363	'vcodec': xpath_text(a, './codecVideo'),
	364	'tbr': int_or_none(xpath_text(a, './totalBitrate')),
	365	}
	366	server_prefix = xpath_text(a, './serverPrefix', default=None)
	367	if server_prefix:
	368	f.update({
	369	'url': server_prefix,
	370	'playpath': file_name,
	371	})
	372	else:
	373	if not format_url:
	374	continue
	375	f['url'] = format_url
	376	formats.append(f)
	377	self._sort_formats(formats)
	378
	379	_SUB_FORMATS = (
	380	('./dataTimedText', 'ttml'),
	381	('./dataTimedTextNoOffset', 'ttml'),
	382	('./dataTimedTextVtt', 'vtt'),
	383	)
	384
	385	subtitles = {}
	386	for subsel, subext in _SUB_FORMATS:
	387	for node in video_node.findall(subsel):
	388	subtitles.setdefault('de', []).append({
	389	'url': node.attrib['url'],
	390	'ext': subext,
	391	})
	392
	393	return {
	394	'id': xpath_text(video_node, './videoId', default=display_id),
	395	'formats': formats,
	396	'subtitles': subtitles,
	397	'display_id': display_id,
	398	'title': video_node.find('./title').text,
	399	'duration': parse_duration(video_node.find('./duration').text),
	400	'upload_date': upload_date,
	401	'thumbnail': thumbnail,
	402	}
	403
	404
	405	class ARDBetaMediathekIE(ARDMediathekBaseIE):
	406	_VALID_URL = r'''(?x)https://
	407	(?:(?:beta\|www)\.)?ardmediathek\.de/
	408	(?:(?P<client>[^/]+)/)?
	409	(?:player\|live\|video\|(?P<playlist>sendung\|sammlung))/
	410	(?:(?P<display_id>[^?#]+)/)?
	411	(?P<id>(?(playlist)\|Y3JpZDovL)[a-zA-Z0-9]+)'''
	412
	413	_TESTS = [{
	414	'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
	415	'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
	416	'info_dict': {
	417	'display_id': 'die-robuste-roswita',
	418	'id': '78566716',
	419	'title': 'Die robuste Roswita',
	420	'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
	421	'duration': 5316,
	422	'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
	423	'timestamp': 1596658200,
	424	'upload_date': '20200805',
	425	'ext': 'mp4',
	426	},
	427	'skip': 'Error',
	428	}, {
	429	'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
	430	'md5': 'f1837e563323b8a642a8ddeff0131f51',
	431	'info_dict': {
	432	'id': '10049223',
	433	'ext': 'mp4',
	434	'title': 'tagesschau, 20:00 Uhr',
	435	'timestamp': 1636398000,
	436	'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
	437	'upload_date': '20211108',
	438	},
	439	}, {
	440	'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
	441	'only_matching': True,
	442	}, {
	443	'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
	444	'only_matching': True,
	445	}, {
	446	'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
	447	'only_matching': True,
	448	}, {
	449	'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
	450	'only_matching': True,
	451	}, {
	452	'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
	453	'only_matching': True,
	454	}, {
	455	# playlist of type 'sendung'
	456	'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
	457	'only_matching': True,
	458	}, {
	459	# playlist of type 'sammlung'
	460	'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
	461	'only_matching': True,
	462	}, {
	463	'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
	464	'only_matching': True,
	465	}, {
	466	'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
	467	'only_matching': True,
	468	}]
	469
	470	def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
	471	""" Query the ARD server for playlist information
	472	and returns the data in "raw" format """
	473	if mode == 'sendung':
	474	graphQL = json.dumps({
	475	'query': '''{
	476	showPage(
	477	client: "%s"
	478	showId: "%s"
	479	pageNumber: %d
	480	) {
	481	pagination {
	482	pageSize
	483	totalElements
	484	}
	485	teasers { # Array
	486	mediumTitle
	487	links { target { id href title } }
	488	type
	489	}
	490	}}''' % (client, playlist_id, pageNumber),
	491	}).encode()
	492	else: # mode == 'sammlung'
	493	graphQL = json.dumps({
	494	'query': '''{
	495	morePage(
	496	client: "%s"
	497	compilationId: "%s"
	498	pageNumber: %d
	499	) {
	500	widget {
	501	pagination {
	502	pageSize
	503	totalElements
	504	}
	505	teasers { # Array
	506	mediumTitle
	507	links { target { id href title } }
	508	type
	509	}
	510	}
	511	}}''' % (client, playlist_id, pageNumber),
	512	}).encode()
	513	# Ressources for ARD graphQL debugging:
	514	# https://api-test.ardmediathek.de/public-gateway
	515	show_page = self._download_json(
	516	'https://api.ardmediathek.de/public-gateway',
	517	'[Playlist] %s' % display_id,
	518	data=graphQL,
	519	headers={'Content-Type': 'application/json'})['data']
	520	# align the structure of the returned data:
	521	if mode == 'sendung':
	522	show_page = show_page['showPage']
	523	else: # mode == 'sammlung'
	524	show_page = show_page['morePage']['widget']
	525	return show_page
	526
	527	def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
	528	""" Collects all playlist entries and returns them as info dict.
	529	Supports playlists of mode 'sendung' and 'sammlung', and also nested
	530	playlists. """
	531	entries = []
	532	pageNumber = 0
	533	while True: # iterate by pageNumber
	534	show_page = self._ARD_load_playlist_snipped(
	535	playlist_id, display_id, client, mode, pageNumber)
	536	for teaser in show_page['teasers']: # process playlist items
	537	if '/compilation/' in teaser['links']['target']['href']:
	538	# alternativ cond.: teaser['type'] == "compilation"
	539	# => This is an nested compilation, e.g. like:
	540	# https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/
	541	link_mode = 'sammlung'
	542	else:
	543	link_mode = 'video'
	544
	545	item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % (
	546	client, link_mode, display_id,
	547	# perform HTLM quoting of episode title similar to ARD:
	548	re.sub('^-\|-$', '', # remove '-' from begin/end
	549	re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by -
	550	teaser['links']['target']['title'].lower()
	551	.replace('ä', 'ae').replace('ö', 'oe')
	552	.replace('ü', 'ue').replace('ß', 'ss'))),
	553	teaser['links']['target']['id'])
	554	entries.append(self.url_result(
	555	item_url,
	556	ie=ARDBetaMediathekIE.ie_key()))
	557
	558	if (show_page['pagination']['pageSize'] * (pageNumber + 1)
	559	>= show_page['pagination']['totalElements']):
	560	# we've processed enough pages to get all playlist entries
	561	break
	562	pageNumber = pageNumber + 1
	563
	564	return self.playlist_result(entries, playlist_title=display_id)
	565
	566	def _real_extract(self, url):
	567	video_id, display_id, playlist_type, client = self._match_valid_url(url).group(
	568	'id', 'display_id', 'playlist', 'client')
	569	display_id, client = display_id or video_id, client or 'ard'
	570
	571	if playlist_type:
	572	return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
	573
	574	player_page = self._download_json(
	575	'https://api.ardmediathek.de/public-gateway',
	576	display_id, data=json.dumps({
	577	'query': '''{
	578	playerPage(client:"%s", clipId: "%s") {
	579	blockedByFsk
	580	broadcastedOn
	581	maturityContentRating
	582	mediaCollection {
	583	_duration
	584	_geoblocked
	585	_isLive
	586	_mediaArray {
	587	_mediaStreamArray {
	588	_quality
	589	_server
	590	_stream
	591	}
	592	}
	593	_previewImage
	594	_subtitleUrl
	595	_type
	596	}
	597	show {
	598	title
	599	}
	600	synopsis
	601	title
	602	tracking {
	603	atiCustomVars {
	604	contentId
	605	}
	606	}
	607	}
	608	}''' % (client, video_id),
	609	}).encode(), headers={
	610	'Content-Type': 'application/json'
	611	})['data']['playerPage']
	612	title = player_page['title']
	613	content_id = str_or_none(try_get(
	614	player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
	615	media_collection = player_page.get('mediaCollection') or {}
	616	if not media_collection and content_id:
	617	media_collection = self._download_json(
	618	'https://www.ardmediathek.de/play/media/' + content_id,
	619	content_id, fatal=False) or {}
	620	info = self._parse_media_info(
	621	media_collection, content_id or video_id,
	622	player_page.get('blockedByFsk'))
	623	age_limit = None
	624	description = player_page.get('synopsis')
	625	maturity_content_rating = player_page.get('maturityContentRating')
	626	if maturity_content_rating:
	627	age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
	628	if not age_limit and description:
	629	age_limit = int_or_none(self._search_regex(
	630	r'$FSK\s(\d+)$\s$', description, 'age limit', default=None))
	631	info.update({
	632	'age_limit': age_limit,
	633	'display_id': display_id,
	634	'title': title,
	635	'description': description,
	636	'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
	637	'series': try_get(player_page, lambda x: x['show']['title']),
	638	})
	639	info.update(self._ARD_extract_episode_info(info['title']))
	640	return info