[ie/orf:podcast] Add extractor (#8486)

author Esokrates <redacted>

Sat, 11 Nov 2023 20:06:25 +0000 (20:06 +0000)

committer GitHub <redacted>

Sat, 11 Nov 2023 20:06:25 +0000 (20:06 +0000)
author Esokrates <redacted>
Sat, 11 Nov 2023 20:06:25 +0000 (20:06 +0000)
committer GitHub <redacted>
Sat, 11 Nov 2023 20:06:25 +0000 (20:06 +0000)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py

index c4bf2acdf05372b969690a95f532f7dc199f5aa2..525944c61260d54b60f33163f98c89f00bc97c45 100644 (file)
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1420,6 +1420,7 @@
      ORFTVthekIE,
      ORFFM4StoryIE,
      ORFRadioIE,
+    ORFPodcastIE,
      ORFIPTVIE,
  )
  from .outsidetv import OutsideTVIE
diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py

index cc3c003fa024f2833fd9afd0b72dbf5265540dd3..9a48ae1b3e49475a7686bdf68a39627d030047bf 100644 (file)
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@@ -4,15 +4,16 @@
  from .common import InfoExtractor
  from ..networking import HEADRequest
  from ..utils import (
+    InAdvancePagedList,
      clean_html,
      determine_ext,
      float_or_none,
-    InAdvancePagedList,
      int_or_none,
      join_nonempty,
+    make_archive_id,
+    mimetype2ext,
      orderedSet,
      remove_end,
-    make_archive_id,
      smuggle_url,
      strip_jsonp,
      try_call,
@@ -21,6 +22,7 @@
      unsmuggle_url,
      url_or_none,
  )
+from ..utils.traversal import traverse_obj
  
  
  class ORFTVthekIE(InfoExtractor):
@@ -334,6 +336,45 @@ def _real_extract(self, url):
              self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle')))
  
  
+class ORFPodcastIE(InfoExtractor):
+    IE_NAME = 'orf:podcast'
+    _STATION_RE = '|'.join(map(re.escape, (
+        'bgl', 'fm4', 'ktn', 'noe', 'oe1', 'oe3',
+        'ooe', 'sbg', 'stm', 'tir', 'tv', 'vbg', 'wie')))
+    _VALID_URL = rf'https?://sound\.orf\.at/podcast/(?P<station>{_STATION_RE})/(?P<show>[\w-]+)/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://sound.orf.at/podcast/oe3/fruehstueck-bei-mir/nicolas-stockhammer-15102023',
+        'md5': '526a5700e03d271a1505386a8721ab9b',
+        'info_dict': {
+            'id': 'nicolas-stockhammer-15102023',
+            'ext': 'mp3',
+            'title': 'Nicolas Stockhammer (15.10.2023)',
+            'duration': 3396.0,
+            'series': 'Frühstück bei mir',
+        },
+        'skip': 'ORF podcasts are only available for a limited time'
+    }]
+
+    def _real_extract(self, url):
+        station, show, show_id = self._match_valid_url(url).group('station', 'show', 'id')
+        data = self._download_json(
+            f'https://audioapi.orf.at/radiothek/api/2.0/podcast/{station}/{show}/{show_id}', show_id)
+
+        return {
+            'id': show_id,
+            'ext': 'mp3',
+            'vcodec': 'none',
+            **traverse_obj(data, ('payload', {
+                'url': ('enclosures', 0, 'url'),
+                'ext': ('enclosures', 0, 'type', {mimetype2ext}),
+                'title': 'title',
+                'description': ('description', {clean_html}),
+                'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
+                'series': ('podcast', 'title'),
+            })),
+        }
+
+
  class ORFIPTVIE(InfoExtractor):
      IE_NAME = 'orf:iptv'
      IE_DESC = 'iptv.ORF.at'
author	Esokrates <redacted>
	Sat, 11 Nov 2023 20:06:25 +0000 (20:06 +0000)
committer	GitHub <redacted>
	Sat, 11 Nov 2023 20:06:25 +0000 (20:06 +0000)
yt_dlp/extractor/_extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/orf.py		patch \| blob \| blame \| history