]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/tele5.py
[ie/tele5] Overhaul extractor (#10024)
[yt-dlp.git] / yt_dlp / extractor / tele5.py
index 0d9cf75ca9e1a460b29cac1eecc03ed1b73ef2af..a455375415347b4de06ef41e96b1d417ef7c5457 100644 (file)
-# coding: utf-8
-from __future__ import unicode_literals
+import functools
 
-import re
+from .dplay import DiscoveryPlusBaseIE
+from ..utils import join_nonempty
+from ..utils.traversal import traverse_obj
 
-from .common import InfoExtractor
-from .jwplatform import JWPlatformIE
-from .nexx import NexxIE
-from ..utils import (
-    NO_DEFAULT,
-    parse_qs,
-    smuggle_url,
-)
 
-
-class Tele5IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _GEO_COUNTRIES = ['DE']
+class Tele5IE(DiscoveryPlusBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P<parent_slug>[\w-]+)/(?P<slug_a>[\w-]+)(?:/(?P<slug_b>[\w-]+))?'
     _TESTS = [{
-        'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
+        # slug_a and slug_b
+        'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane',
         'info_dict': {
-            'id': '1549416',
+            'id': '6852024',
             'ext': 'mp4',
-            'upload_date': '20180814',
-            'timestamp': 1534290623,
-            'title': 'Pandorum',
-        },
-        'params': {
-            'skip_download': True,
+            'title': 'Quarantäne',
+            'description': 'md5:6af0373bd0fcc4f13e5d47701903d675',
+            'episode': 'Episode 73',
+            'episode_number': 73,
+            'season': 'Season 4',
+            'season_number': 4,
+            'series': 'Stargate Atlantis',
+            'upload_date': '20240525',
+            'timestamp': 1716643200,
+            'duration': 2503.2,
+            'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg',
+            'creators': ['Tele5'],
+            'tags': [],
         },
     }, {
-        # jwplatform, nexx unavailable
-        'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
+        # only slug_a
+        'url': 'https://tele5.de/mediathek/inside-out',
         'info_dict': {
-            'id': 'WJuiOlUp',
+            'id': '6819502',
             'ext': 'mp4',
-            'upload_date': '20200603',
-            'timestamp': 1591214400,
-            'title': 'Ghoul - Das Geheimnis des Friedhofmonsters',
-            'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97',
+            'title': 'Inside out',
+            'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd',
+            'series': 'Inside out',
+            'upload_date': '20240523',
+            'timestamp': 1716494400,
+            'duration': 5343.4,
+            'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg',
+            'creators': ['Tele5'],
+            'tags': [],
         },
-        'params': {
-            'skip_download': True,
-        },
-        'add_ie': [JWPlatformIE.ie_key()],
-    }, {
-        'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
-        'only_matching': True,
     }, {
-        'url': 'https://www.tele5.de/anders-ist-sevda/',
-        'only_matching': True,
+        # playlist
+        'url': 'https://tele5.de/mediathek/schlefaz',
+        'info_dict': {
+            'id': 'mediathek-schlefaz',
+        },
+        'playlist_mincount': 3,
     }]
 
     def _real_extract(self, url):
-        qs = parse_qs(url)
-        video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
-
-        NEXX_ID_RE = r'\d{6,}'
-        JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
-
-        def nexx_result(nexx_id):
-            return self.url_result(
-                'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
-                ie=NexxIE.ie_key(), video_id=nexx_id)
-
-        nexx_id = jwplatform_id = None
-
-        if video_id:
-            if re.match(NEXX_ID_RE, video_id):
-                return nexx_result(video_id)
-            elif re.match(JWPLATFORM_ID_RE, video_id):
-                jwplatform_id = video_id
-
-        if not nexx_id:
-            display_id = self._match_id(url)
-            webpage = self._download_webpage(url, display_id)
-
-            def extract_id(pattern, name, default=NO_DEFAULT):
-                return self._html_search_regex(
-                    (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
-                     r'\s+id\s*=\s*["\']player_(%s)' % pattern,
-                     r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
-                    default=default)
+        parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b')
+        playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-')
 
-            nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
-            if nexx_id:
-                return nexx_result(nexx_id)
+        query = {'environment': 'tele5', 'v': '2'}
+        if not slug_b:
+            endpoint = f'page/{slug_a}'
+            query['parent_slug'] = parent_slug
+        else:
+            endpoint = f'videos/{slug_b}'
+            query['filter[show.slug]'] = slug_a
+        cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query)
 
-            if not jwplatform_id:
-                jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
+        return self.playlist_result(map(
+            functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'),
+            traverse_obj(cms_data, ('blocks', ..., 'videoId', {str}))), playlist_id)
 
-        return self.url_result(
-            smuggle_url(
-                'jwplatform:%s' % jwplatform_id,
-                {'geo_countries': self._GEO_COUNTRIES}),
-            ie=JWPlatformIE.ie_key(), video_id=jwplatform_id)
+    def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+        headers.update({
+            'x-disco-params': f'realm={realm}',
+            'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
+            'Authorization': self._get_auth(disco_base, display_id, realm),
+        })