X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/7a5c1cfe93924351387b44919b3c0b2f66c4b883..61edf57f8f13f6dfd81154174e647eb5fdd26089:/yt_dlp/extractor/rts.py diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py index 48f17b828..dc1e2d3b4 100644 --- a/yt_dlp/extractor/rts.py +++ b/yt_dlp/extractor/rts.py @@ -1,27 +1,25 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .srgssr import SRGSSRIE -from ..compat import compat_str from ..utils import ( + determine_ext, int_or_none, parse_duration, parse_iso8601, unescapeHTML, - determine_ext, + urljoin, ) -class RTSIE(SRGSSRIE): +class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE + _WORKING = False IE_DESC = 'RTS.ch' _VALID_URL = r'rts:(?P\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P[0-9]+)-(?P.+?)\.html' _TESTS = [ { 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', - 'md5': 'ff7f8450a90cf58dacb64e29707b4a8e', + 'md5': '753b877968ad8afaeddccc374d4256a5', 'info_dict': { 'id': '3449373', 'display_id': 'les-enfants-terribles', @@ -35,6 +33,7 @@ class RTSIE(SRGSSRIE): 'thumbnail': r're:^https?://.*\.image', 'view_count': int, }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', @@ -63,11 +62,12 @@ class RTSIE(SRGSSRIE): # m3u8 download 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], 'skip': 'Blocked outside Switzerland', }, { 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', - 'md5': '1bae984fe7b1f78e94abc74e802ed99f', + 'md5': '9bb06503773c07ce83d3cbd793cebb91', 'info_dict': { 'id': '5745356', 'display_id': 'londres-cachee-par-un-epais-smog', @@ -81,6 +81,7 @@ class RTSIE(SRGSSRIE): 'thumbnail': r're:^https?://.*\.image', 'view_count': int, }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', @@ -108,17 +109,17 @@ class RTSIE(SRGSSRIE): { 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', 'only_matching': True, - } + }, ] def _real_extract(self, url): - m = re.match(self._VALID_URL, url) + m = self._match_valid_url(url) media_id = m.group('rts_id') or m.group('id') display_id = m.group('display_id') or media_id def download_json(internal_id): return self._download_json( - 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, + f'http://www.rts.ch/a/{internal_id}.html?f=json/article', display_id) all_info = download_json(media_id) @@ -135,8 +136,8 @@ def download_json(internal_id): if not entries: page, urlh = self._download_webpage_handle(url, display_id) - if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id: - return self.url_result(urlh.geturl(), 'RTS') + if re.match(self._VALID_URL, urlh.url).group('id') != media_id: + return self.url_result(urlh.url, 'RTS') # article with videos on rhs videos = re.findall( @@ -147,7 +148,7 @@ def download_json(internal_id): r'(?s)]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', page) if videos: - entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos] + entries = [self.url_result(f'srgssr:{video_urn}', 'SRGSSR') for video_urn in videos] if entries: return self.playlist_result(entries, media_id, all_info.get('title')) @@ -160,7 +161,7 @@ def download_json(internal_id): media_type = 'video' if 'video' in all_info else 'audio' # check for errors - self.get_media_data('rts', media_type, media_id) + self._get_media_data('rts', media_type, media_id) info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] @@ -194,6 +195,7 @@ def extract_bitrate(url): 'tbr': extract_bitrate(format_url), }) + download_base = 'http://rtsww{}-d.rts.ch/'.format('-a' if media_type == 'audio' else '') for media in info.get('media', []): media_url = media.get('url') if not media_url or re.match(r'https?://', media_url): @@ -205,15 +207,14 @@ def extract_bitrate(url): format_id += '-%dk' % rate formats.append({ 'format_id': format_id, - 'url': 'http://download-video.rts.ch/' + media_url, + 'url': urljoin(download_base, media_url), 'tbr': rate or extract_bitrate(media_url), }) self._check_formats(formats, media_id) - self._sort_formats(formats) duration = info.get('duration') or info.get('cutout') or info.get('cutduration') - if isinstance(duration, compat_str): + if isinstance(duration, str): duration = parse_duration(duration) return {