]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/radiko.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / radiko.py
index c363d9ba5f7b1763be56b3c3b9b1dcae14da01c0..f0135827b665ab86897ac0b5540fe95c43d101a8 100644 (file)
@@ -1,5 +1,6 @@
 import base64
 import random
+import re
 import urllib.parse
 
 from .common import InfoExtractor
@@ -11,6 +12,7 @@
     unified_timestamp,
     update_url_query,
 )
+from ..utils.traversal import traverse_obj
 
 
 class RadikoBaseIE(InfoExtractor):
@@ -159,6 +161,10 @@ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token,
 
         return formats
 
+    def _extract_performers(self, prog):
+        return traverse_obj(prog, (
+            'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip})) or None
+
 
 class RadikoIE(RadikoBaseIE):
     _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
@@ -186,10 +192,12 @@ def _real_extract(self, url):
         return {
             'id': video_id,
             'title': try_call(lambda: prog.find('title').text),
+            'cast': self._extract_performers(prog),
             'description': clean_html(try_call(lambda: prog.find('info').text)),
             'uploader': try_call(lambda: station_program.find('.//name').text),
             'uploader_id': station,
             'timestamp': vid_int,
+            'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
             'is_live': True,
             'formats': self._extract_formats(
                 video_id=video_id, station=station, is_onair=False,
@@ -243,6 +251,7 @@ def _real_extract(self, url):
         return {
             'id': station,
             'title': title,
+            'cast': self._extract_performers(prog),
             'description': description,
             'uploader': station_name,
             'uploader_id': station,