+ }, {
+ 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
+ 'only_matching': True,
+ }]
+
+ def _extract_episode_info(self, title):
+ patterns = [
+ # Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
+ # from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
+ r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
+ # E.g.: title="Fritjof aus Norwegen (2) (AD)"
+ # from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
+ r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
+ r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
+ # E.g.: title="Folge 25/42: Symmetrie"
+ # from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
+ # E.g.: title="Folge 1063 - Vertrauen"
+ # from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
+ r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
+ # As a fallback use the full title
+ r'(?P<title>.*)',
+ ]
+
+ return traverse_obj(patterns, (..., {partial(re.match, string=title)}, {
+ 'season_number': ('season_number', {int_or_none}),
+ 'episode_number': ('episode_number', {int_or_none}),
+ 'episode': ((
+ ('episode', {str_or_none}),
+ ('ep_info', {lambda x: title.replace(x, '')}),
+ ('title', {str}),
+ ), {str.strip}),
+ }), get_all=False)
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+
+ page_data = self._download_json(
+ f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}', video_id, query={
+ 'embedded': 'false',
+ 'mcV6': 'true',
+ })
+
+ player_data = traverse_obj(
+ page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
+ is_live = player_data.get('type') == 'player_live'
+ media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
+
+ if player_data.get('blockedByFsk'):
+ self.raise_no_formats('This video is only available after 22:00', expected=True)
+
+ formats = []
+ subtitles = {}
+ for stream in traverse_obj(media_data, ('streams', ..., {dict})):
+ kind = stream.get('kind')
+ # Prioritize main stream over sign language and others
+ preference = 1 if kind == 'main' else None
+ for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))):
+ media_url = media['url']
+
+ audio_kind = traverse_obj(media, (
+ 'audios', 0, 'kind', {str}), default='').replace('standard', '')
+ lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu'
+ lang = join_nonempty(lang_code, audio_kind)
+ language_preference = 10 if lang == 'deu' else -10
+
+ if determine_ext(media_url) == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live)
+ for f in fmts:
+ f['language'] = lang
+ f['language_preference'] = language_preference
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': media_url,
+ 'format_id': f'http-{kind}',
+ 'preference': preference,
+ 'language': lang,
+ 'language_preference': language_preference,
+ **traverse_obj(media, {
+ 'format_note': ('forcedLabel', {str}),
+ 'width': ('maxHResolutionPx', {int_or_none}),
+ 'height': ('maxVResolutionPx', {int_or_none}),
+ 'vcodec': ('videoCodec', {str}),
+ }),
+ })
+
+ for sub in traverse_obj(media_data, ('subtitles', ..., {dict})):
+ for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))):
+ subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({
+ 'url': sources['url'],
+ 'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')),
+ })
+
+ age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
+ old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ 'age_limit': age_limit,
+ **traverse_obj(media_data, ('meta', {
+ 'title': 'title',
+ 'description': 'synopsis',
+ 'timestamp': ('broadcastedOnDateTime', {parse_iso8601}),
+ 'series': 'seriesTitle',
+ 'thumbnail': ('images', 0, 'url', {url_or_none}),
+ 'duration': ('durationSeconds', {int_or_none}),
+ 'channel': 'clipSourceName',
+ })),
+ **self._extract_episode_info(page_data.get('title')),
+ '_old_archive_ids': [make_archive_id(ARDBetaMediathekIE, old_id)],
+ }
+
+
+class ARDMediathekCollectionIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https://
+ (?:(?:beta|www)\.)?ardmediathek\.de/
+ (?:[^/?#]+/)?
+ (?P<playlist>sendung|serie|sammlung)/
+ (?:(?P<display_id>[^?#]+?)/)?
+ (?P<id>[a-zA-Z0-9]+)
+ (?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)'''
+ _GEO_COUNTRIES = ['DE']
+
+ _TESTS = [{
+ 'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV',
+ 'info_dict': {
+ 'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV',
+ 'display_id': 'quiz/staffel-1-originalversion',
+ 'title': 'Staffel 1 Originalversion',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD',
+ 'info_dict': {
+ 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD',
+ 'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription',
+ 'title': 'Staffel 4 mit Audiodeskription',
+ },
+ 'playlist_count': 12,
+ }, {
+ 'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/',
+ 'info_dict': {
+ 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1',
+ 'display_id': 'babylon-berlin/staffel-1',
+ 'title': 'Staffel 1',
+ },
+ 'playlist_count': 8,
+ }, {
+ 'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
+ 'info_dict': {
+ 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
+ 'display_id': 'tatort',
+ 'title': 'Tatort',
+ },
+ 'playlist_mincount': 500,
+ }, {
+ 'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2',
+ 'info_dict': {
+ 'id': '5eOHzt8XB2sqeFXbIoJlg2',
+ 'display_id': 'die-kirche-bleibt-im-dorf',
+ 'title': 'Die Kirche bleibt im Dorf',
+ 'description': 'Die Kirche bleibt im Dorf',
+ },
+ 'playlist_count': 4,