X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/3783b5f1d13380f9472bcbdca192aff349c01b17..61edf57f8f13f6dfd81154174e647eb5fdd26089:/yt_dlp/extractor/itv.py diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 6e6a3673c..89e6f189c 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -1,26 +1,21 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json -from .common import InfoExtractor from .brightcove import BrightcoveNewIE - -from ..compat import compat_str +from .common import InfoExtractor from ..utils import ( + JSON_LD_RE, + ExtractorError, base_url, clean_html, determine_ext, extract_attributes, - ExtractorError, get_element_by_class, - JSON_LD_RE, merge_dicts, parse_duration, smuggle_url, try_get, - url_or_none, url_basename, + url_or_none, urljoin, ) @@ -38,7 +33,7 @@ class ITVIE(InfoExtractor): 'series': 'Plebs', 'season_number': 1, 'episode_number': 1, - 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002' + 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002', }, 'params': { # m3u8 download @@ -54,7 +49,7 @@ class ITVIE(InfoExtractor): 'series': 'The Jonathan Ross Show', 'episode_number': 8, 'season_number': 17, - 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002' + 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002', }, 'params': { # m3u8 download @@ -87,7 +82,7 @@ def _call_api(self, video_id, playlist_url, headers, platform_tag, featureset, f 'user': { 'itvUserId': '', 'entitlements': [], - 'token': '' + 'token': '', }, 'device': { 'manufacturer': 'Safari', @@ -95,20 +90,20 @@ def _call_api(self, video_id, playlist_url, headers, platform_tag, featureset, f 'os': { 'name': 'Windows NT', 'version': '6.1', - 'type': 'desktop' - } + 'type': 'desktop', + }, }, 'client': { 'version': '4.1', - 'id': 'browser' + 'id': 'browser', }, 'variantAvailability': { 'featureset': { 'min': featureset, - 'max': featureset + 'max': featureset, }, - 'platformTag': platform_tag - } + 'platformTag': platform_tag, + }, }).encode(), headers=headers, fatal=fatal) def _get_subtitles(self, video_id, variants, ios_playlist_url, headers, *args, **kwargs): @@ -117,7 +112,7 @@ def _get_subtitles(self, video_id, variants, ios_playlist_url, headers, *args, * # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_subs, featureset_subs = next( ((platform_tag, featureset) - for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets + for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets if try_get(featureset, lambda x: x[2]) == 'outband-webvtt'), (None, None)) @@ -140,14 +135,14 @@ def _real_extract(self, url): params = extract_attributes(self._search_regex( r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params')) variants = self._parse_json( - try_get(params, lambda x: x['data-video-variants'], compat_str) or '{}', + try_get(params, lambda x: x['data-video-variants'], str) or '{}', video_id, fatal=False) # Prefer last matching featureset # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_video, featureset_video = next( ((platform_tag, featureset) - for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets - if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']), + for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets + if set(try_get(featureset, lambda x: x[:2]) or []) == {'aes', 'hls'}), (None, None)) if not platform_tag_video or not featureset_video: raise ExtractorError('No downloads available', expected=True, video_id=video_id) @@ -175,7 +170,6 @@ def _real_extract(self, url): formats.append({ 'url': href, }) - self._sort_formats(formats) info = self._search_json_ld(webpage, video_id, default={}) if not info: json_ld = self._parse_json(self._search_regex( @@ -190,7 +184,7 @@ def _real_extract(self, url): break thumbnails = [] - thumbnail_url = try_get(params, lambda x: x['data-video-posterframe'], compat_str) + thumbnail_url = try_get(params, lambda x: x['data-video-posterframe'], str) if thumbnail_url: thumbnails.extend([{ 'url': thumbnail_url.format(width=1920, height=1080, quality=100, blur=0, bg='false'), @@ -198,7 +192,7 @@ def _real_extract(self, url): 'height': 1080, }, { 'url': urljoin(base_url(thumbnail_url), url_basename(thumbnail_url)), - 'preference': -2 + 'preference': -2, }]) thumbnail_url = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None) @@ -215,7 +209,7 @@ def _real_extract(self, url): 'subtitles': self.extract_subtitles(video_id, variants, ios_playlist_url, headers), 'duration': parse_duration(video_data.get('Duration')), 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)), - 'thumbnails': thumbnails + 'thumbnails': thumbnails, }, info) @@ -232,9 +226,9 @@ class ITVBTCCIE(InfoExtractor): 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', 'info_dict': { 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', - 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32' + 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32', }, - 'playlist_count': 4 + 'playlist_count': 4, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' @@ -243,8 +237,8 @@ def _real_extract(self, url): webpage = self._download_webpage(url, playlist_id) - json_map = try_get(self._parse_json(self._html_search_regex( - '(?s)]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)', webpage, 'json_map'), playlist_id), + json_map = try_get( + self._search_nextjs_data(webpage, playlist_id), lambda x: x['props']['pageProps']['article']['body']['content']) or [] entries = [] @@ -259,7 +253,7 @@ def _real_extract(self, url): # ITV does not like some GB IP ranges, so here are some # IP blocks it accepts 'geo_ip_blocks': [ - '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21' + '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21', ], 'referrer': url, }),