[ie/orf:on] Add extractor (#9113)

author HobbyistDev <redacted>

Fri, 2 Feb 2024 20:57:53 +0000 (05:57 +0900)

committer GitHub <redacted>

Fri, 2 Feb 2024 20:57:53 +0000 (20:57 +0000)
author HobbyistDev <redacted>
Fri, 2 Feb 2024 20:57:53 +0000 (05:57 +0900)
committer GitHub <redacted>
Fri, 2 Feb 2024 20:57:53 +0000 (20:57 +0000)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py

index 7726fe359728444a7c410df137fc6bd9477f4122..04318a716da511592d2bc5ceab88c6abaff2dacc 100644 (file)
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1394,6 +1394,7 @@
  from .orf import (
      ORFTVthekIE,
      ORFFM4StoryIE,
+    ORFONIE,
      ORFRadioIE,
      ORFPodcastIE,
      ORFIPTVIE,
diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py

index 9a48ae1b3e49475a7686bdf68a39627d030047bf..1b2a79a625a3380f3116194097b06822e0dbfbab 100644 (file)
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@@ -1,3 +1,4 @@
+import base64
  import functools
  import re
  
@@ -565,3 +566,66 @@ def _real_extract(self, url):
              })
  
          return self.playlist_result(entries)
+
+
+class ORFONIE(InfoExtractor):
+    IE_NAME = 'orf:on'
+    _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
+        'info_dict': {
+            'id': '14210000',
+            'ext': 'mp4',
+            'duration': 2651.08,
+            'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
+            'title': 'School of Champions (4/8)',
+            'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
+            'media_type': 'episode',
+            'timestamp': 1706472362,
+            'upload_date': '20240128',
+        }
+    }]
+
+    def _extract_video(self, video_id, display_id):
+        encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
+        api_json = self._download_json(
+            f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
+
+        formats, subtitles = [], {}
+        for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
+            for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
+                if manifest_type == 'hls':
+                    fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                        manifest_url, display_id, fatal=False, m3u8_id='hls')
+                elif manifest_type == 'dash':
+                    fmts, subs = self._extract_mpd_formats_and_subtitles(
+                        manifest_url, display_id, fatal=False, mpd_id='dash')
+                else:
+                    continue
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+            **traverse_obj(api_json, {
+                'duration': ('duration_second', {float_or_none}),
+                'title': (('title', 'headline'), {str}),
+                'description': (('description', 'teaser_text'), {str}),
+                'media_type': ('video_type', {str}),
+            }, get_all=False),
+        }
+
+    def _real_extract(self, url):
+        video_id, display_id = self._match_valid_url(url).group('id', 'slug')
+        webpage = self._download_webpage(url, display_id)
+
+        return {
+            'id': video_id,
+            'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
+            'description': self._html_search_meta(
+                ['description', 'og:description', 'twitter:description'], webpage, default=None),
+            **self._search_json_ld(webpage, display_id, fatal=False),
+            **self._extract_video(video_id, display_id),
+        }
author	HobbyistDev <redacted>
	Fri, 2 Feb 2024 20:57:53 +0000 (05:57 +0900)
committer	GitHub <redacted>
	Fri, 2 Feb 2024 20:57:53 +0000 (20:57 +0000)
yt_dlp/extractor/_extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/orf.py		patch \| blob \| blame \| history