X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/9f14daf22b4080ae1531a772ee7574959af4e2fa..add96eb9f84cfffe85682bf2fb85135746994ee8:/yt_dlp/extractor/zdf.py diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index fca426a50..a862e25d0 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( NO_DEFAULT, ExtractorError, @@ -24,16 +23,16 @@ class ZDFBaseIE(InfoExtractor): _GEO_COUNTRIES = ['DE'] - _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd') + _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd') def _call_api(self, url, video_id, item, api_token=None, referrer=None): headers = {} if api_token: - headers['Api-Auth'] = 'Bearer %s' % api_token + headers['Api-Auth'] = f'Bearer {api_token}' if referrer: headers['Referer'] = referrer return self._download_json( - url, video_id, 'Downloading JSON %s' % item, headers=headers) + url, video_id, f'Downloading JSON {item}', headers=headers) @staticmethod def _extract_subtitles(src): @@ -61,6 +60,9 @@ def _extract_format(self, video_id, formats, format_urls, meta): elif mime_type == 'application/f4m+xml' or ext == 'f4m': new_formats = self._extract_f4m_formats( update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False) + elif ext == 'mpd': + new_formats = self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False) else: f = parse_codecs(meta.get('mimeCodec')) if not f and meta.get('type'): @@ -70,7 +72,7 @@ def _extract_format(self, video_id, formats, format_urls, meta): f.update({ 'url': format_url, 'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')), - 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)) + 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)), }) new_formats = [f] formats.extend(merge_dicts(f, { @@ -174,7 +176,8 @@ class ZDFIE(ZDFBaseIE): 'thumbnail': 'md5:e65f459f741be5455c952cd820eb188e', 'title': 'heute journal vom 30.12.2021', 'timestamp': 1640897100, - } + }, + 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"', }, { 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', 'info_dict': { @@ -189,7 +192,7 @@ class ZDFIE(ZDFBaseIE): }, }, { 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', - 'md5': '1b93bdec7d02fc0b703c5e7687461628', + 'md5': '57af4423db0455a3975d2dc4578536bc', 'info_dict': { 'ext': 'mp4', 'id': 'video_funk_1770473', @@ -198,7 +201,7 @@ class ZDFIE(ZDFBaseIE): 'title': 'Alles ist verzaubert', 'timestamp': 1635520560, 'upload_date': '20211029', - 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799', + 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-102~1920x1080?cb=1663848412907', }, }, { # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche @@ -232,7 +235,7 @@ class ZDFIE(ZDFBaseIE): 'timestamp': 1641355200, 'upload_date': '20220105', }, - 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"' + 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"', }, { 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html', 'info_dict': { @@ -241,10 +244,23 @@ class ZDFIE(ZDFBaseIE): 'title': 'Das Geld anderer Leute', 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d', 'duration': 2581.0, - 'timestamp': 1654790700, - 'upload_date': '20220609', + 'timestamp': 1675160100, + 'upload_date': '20230131', 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350', }, + }, { + 'url': 'https://www.zdf.de/dokumentation/terra-x/unser-gruener-planet-wuesten-doku-100.html', + 'info_dict': { + 'id': '220605_dk_gruener_planet_wuesten_tex', + 'ext': 'mp4', + 'title': 'Unser grüner Planet - Wüsten', + 'description': 'md5:4fc647b6f9c3796eea66f4a0baea2862', + 'duration': 2613.0, + 'timestamp': 1654450200, + 'upload_date': '20220605', + 'format_note': 'uhd, main', + 'thumbnail': 'https://www.zdf.de/assets/saguaro-kakteen-102~3840x2160?cb=1655910690796', + }, }] def _extract_entry(self, url, player, content, video_id): @@ -253,13 +269,13 @@ def _extract_entry(self, url, player, content, video_id): t = content['mainVideoContent']['http://zdf.de/rels/target'] ptmd_path = traverse_obj(t, ( (('streams', 'default'), None), - ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template') + ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'), ), get_all=False) if not ptmd_path: raise ExtractorError('Could not extract ptmd_path') info = self._extract_ptmd( - urljoin(url, ptmd_path.replace('{playerId}', 'ngplayer_2_4')), video_id, player['apiToken'], url) + urljoin(url, ptmd_path.replace('{playerId}', 'android_native_5')), video_id, player['apiToken'], url) thumbnails = [] layouts = try_get( @@ -286,7 +302,7 @@ def _extract_entry(self, url, player, content, video_id): chapters = [{ 'start_time': chap.get('anchorOffset'), 'end_time': next_chap.get('anchorOffset'), - 'title': chap.get('anchorLabel') + 'title': chap.get('anchorLabel'), } for chap, next_chap in zip(chapter_marks, chapter_marks[1:])] return merge_dicts(info, { @@ -295,7 +311,7 @@ def _extract_entry(self, url, player, content, video_id): 'duration': int_or_none(t.get('duration')), 'timestamp': unified_timestamp(content.get('editorialDate')), 'thumbnails': thumbnails, - 'chapters': chapters or None + 'chapters': chapters or None, }) def _extract_regular(self, url, player, video_id): @@ -305,7 +321,7 @@ def _extract_regular(self, url, player, video_id): def _extract_mobile(self, video_id): video = self._download_json( - 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, + f'https://zdf-cdn.live.cellular.de/mediathekV2/document/{video_id}', video_id) formats = [] @@ -324,7 +340,7 @@ def _extract_mobile(self, video_id): if isinstance(teaser_bild, dict): for thumbnail_key, thumbnail in teaser_bild.items(): thumbnail_url = try_get( - thumbnail, lambda x: x['url'], compat_str) + thumbnail, lambda x: x['url'], str) if thumbnail_url: thumbnails.append({ 'url': thumbnail_url, @@ -339,7 +355,7 @@ def _extract_mobile(self, video_id): 'description': document.get('beschreibung'), 'duration': int_or_none(document.get('length')), 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp( - try_get(video, lambda x: x['meta']['editorialDate'], compat_str)), + try_get(video, lambda x: x['meta']['editorialDate'], str)), 'thumbnails': thumbnails, 'subtitles': self._extract_subtitles(document), 'formats': formats, @@ -388,10 +404,10 @@ class ZDFChannelIE(ZDFBaseIE): @classmethod def suitable(cls, url): - return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url) + return False if ZDFIE.suitable(url) else super().suitable(url) def _og_search_title(self, webpage, fatal=False): - title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal) + title = super()._og_search_title(webpage, fatal=fatal) return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None def _real_extract(self, url): @@ -400,7 +416,7 @@ def _real_extract(self, url): webpage = self._download_webpage(url, channel_id) matches = re.finditer( - r''']*?\sdata-plusbar-id\s*=\s*(["'])(?P[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P%s)\1''' % ZDFIE._VALID_URL, + rf''']*?\sdata-plusbar-id\s*=\s*(["'])(?P[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P{ZDFIE._VALID_URL})\1''', webpage) if self._downloader.params.get('noplaylist', False): @@ -411,11 +427,11 @@ def _real_extract(self, url): if entry: return entry else: - self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, )) + self.to_screen(f'Downloading playlist {channel_id} - add --no-playlist to download just the main video') def check_video(m): v_ref = self._search_regex( - r'''(]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ), + r'''(]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["']){}\2[^>]*>)'''.format(m.group('p_id')), webpage, 'check id', default='') v_ref = extract_attributes(v_ref) return v_ref.get('data-target-video-type') != 'novideo'