X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/5ad28e7ffd41deccba33776c1609aa7855601739..61edf57f8f13f6dfd81154174e647eb5fdd26089:/yt_dlp/extractor/rtve.py

diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py
index 59832eeac..7e0b666ab 100644
--- a/yt_dlp/extractor/rtve.py
+++ b/yt_dlp/extractor/rtve.py
@@ -1,27 +1,18 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import base64
 import io
-import sys
+import struct
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_b64decode,
-    compat_struct_unpack,
-)
 from ..utils import (
-    determine_ext,
     ExtractorError,
+    determine_ext,
     float_or_none,
     qualities,
     remove_end,
     remove_start,
-    std_headers,
+    try_get,
 )
 
-_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
-
 
 class RTVEALaCartaIE(InfoExtractor):
     IE_NAME = 'rtve.es:alacarta'
@@ -70,16 +61,16 @@ class RTVEALaCartaIE(InfoExtractor):
     }]
 
     def _real_initialize(self):
-        user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
+        user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8')
         self._manager = self._download_json(
             'http://www.rtve.es/odin/loki/' + user_agent_b64,
             None, 'Fetching manager info')['manager']
 
     @staticmethod
     def _decrypt_url(png):
-        encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
+        encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
         while True:
-            length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
+            length = struct.unpack('!I', encrypted_data.read(4))[0]
             chunk_type = encrypted_data.read(4)
             if chunk_type == b'IEND':
                 break
@@ -90,7 +81,7 @@ def _decrypt_url(png):
                 alphabet = []
                 e = 0
                 d = 0
-                for l in _bytes_to_chr(alphabet_data):
+                for l in alphabet_data.decode('iso-8859-1'):
                     if d == 0:
                         alphabet.append(l)
                         d = e = (e + 1) % 4
@@ -100,7 +91,7 @@ def _decrypt_url(png):
                 f = 0
                 e = 3
                 b = 1
-                for letter in _bytes_to_chr(url_data):
+                for letter in url_data.decode('iso-8859-1'):
                     if f == 0:
                         l = int(letter) * 10
                         f = 1
@@ -119,7 +110,7 @@ def _decrypt_url(png):
 
     def _extract_png_formats(self, video_id):
         png = self._download_webpage(
-            'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
+            f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png',
             video_id, 'Downloading url information', query={'q': 'v2'})
         q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
         formats = []
@@ -138,13 +129,12 @@ def _extract_png_formats(self, video_id):
                     'quality': q(quality),
                     'url': video_url,
                 })
-        self._sort_formats(formats)
         return formats
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         info = self._download_json(
-            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+            f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json',
             video_id)['page']['items'][0]
         if info['state'] == 'DESPU':
             raise ExtractorError('The video is no longer available', expected=True)
@@ -160,7 +150,7 @@ def _real_extract(self, url):
 
         return {
             'id': video_id,
-            'title': self._live_title(title) if is_live else title,
+            'title': title,
             'formats': formats,
             'thumbnail': info.get('image'),
             'subtitles': subtitles,
@@ -178,7 +168,92 @@ def _get_subtitles(self, video_id, sub_file):
             for s in subs)
 
 
-class RTVEInfantilIE(RTVEALaCartaIE):
+class RTVEAudioIE(RTVEALaCartaIE):  # XXX: Do not subclass from concrete IE
+    IE_NAME = 'rtve.es:audio'
+    IE_DESC = 'RTVE audio'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
+
+    _TESTS = [{
+        'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
+        'md5': 'ae06d27bff945c4e87a50f89f6ce48ce',
+        'info_dict': {
+            'id': '5889192',
+            'ext': 'mp3',
+            'title': 'CÃ³digos informÃ¡ticos',
+            'thumbnail': r're:https?://.+/1598856591583.jpg',
+            'duration': 349.440,
+            'series': 'A hombros de gigantes',
+        },
+    }, {
+        'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
+        'md5': '072855ab89a9450e0ba314c717fa5ebc',
+        'info_dict': {
+            'id': '5791165',
+            'ext': 'mp3',
+            'title': 'Ignatius Farray',
+            'thumbnail': r're:https?://.+/1613243011863.jpg',
+            'duration': 3559.559,
+            'series': 'En Radio 3',
+        },
+    }, {
+        'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
+        'md5': '0eadab248cc8dd193fa5765712e84d5c',
+        'info_dict': {
+            'id': '6082623',
+            'ext': 'mp3',
+            'title': 'CapÃ­tulo 26 y Ãºltimo: La muerte de Victor',
+            'thumbnail': r're:https?://.+/1632147445707.jpg',
+            'duration': 3174.086,
+            'series': 'Frankenstein o el moderno Prometeo',
+        },
+    }]
+
+    def _extract_png_formats(self, audio_id):
+        """
+        This function retrieves media related png thumbnail which obfuscate
+        valuable information about the media. This information is decrypted
+        via base class _decrypt_url function providing media quality and
+        media url
+        """
+        png = self._download_webpage(
+            f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png',
+            audio_id, 'Downloading url information', query={'q': 'v2'})
+        q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
+        formats = []
+        for quality, audio_url in self._decrypt_url(png):
+            ext = determine_ext(audio_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    audio_url, audio_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    audio_url, audio_id, 'dash', fatal=False))
+            else:
+                formats.append({
+                    'format_id': quality,
+                    'quality': q(quality),
+                    'url': audio_url,
+                })
+        return formats
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+        info = self._download_json(
+            f'https://www.rtve.es/api/audios/{audio_id}.json',
+            audio_id)['page']['items'][0]
+
+        return {
+            'id': audio_id,
+            'title': info['title'].strip(),
+            'thumbnail': info.get('thumbnail'),
+            'duration': float_or_none(info.get('duration'), 1000),
+            'series': try_get(info, lambda x: x['programInfo']['title']),
+            'formats': self._extract_png_formats(audio_id),
+        }
+
+
+class RTVEInfantilIE(RTVEALaCartaIE):  # XXX: Do not subclass from concrete IE
     IE_NAME = 'rtve.es:infantil'
     IE_DESC = 'RTVE infantil'
     _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
@@ -197,7 +272,7 @@ class RTVEInfantilIE(RTVEALaCartaIE):
     }]
 
 
-class RTVELiveIE(RTVEALaCartaIE):
+class RTVELiveIE(RTVEALaCartaIE):  # XXX: Do not subclass from concrete IE
     IE_NAME = 'rtve.es:live'
     IE_DESC = 'RTVE.es live streams'
     _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
@@ -211,7 +286,7 @@ class RTVELiveIE(RTVEALaCartaIE):
         },
         'params': {
             'skip_download': 'live stream',
-        }
+        },
     }]
 
     def _real_extract(self, url):
@@ -230,7 +305,7 @@ def _real_extract(self, url):
 
         return {
             'id': video_id,
-            'title': self._live_title(title),
+            'title': title,
             'formats': self._extract_png_formats(vidplayer_id),
             'is_live': True,
         }