X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/e897bd8292a41999cf51dba91b390db5643c72db..24f3097ea9a470a984d0454dc013cafa2325f5f8:/yt_dlp/extractor/nbc.py diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index e88f98abf..1da2cad3d 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -1,12 +1,12 @@ import base64 import json import re +import urllib.parse import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor from .theplatform import ThePlatformIE, default_ns -from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..utils import ( ExtractorError, @@ -148,12 +148,12 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE # Percent escaped url 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189', 'only_matching': True, - } + }, ] def _real_extract(self, url): permalink, video_id = self._match_valid_url(url).groups() - permalink = 'http' + compat_urllib_parse_unquote(permalink) + permalink = 'http' + urllib.parse.unquote(permalink) video_data = self._download_json( 'https://friendship.nbc.co/v2/graphql', video_id, query={ 'query': '''query bonanzaPage( @@ -201,7 +201,7 @@ def _real_extract(self, url): 'switch': 'HLSServiceSecure', } video_id = video_data['mpxGuid'] - tp_path = 'NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id) + tp_path = 'NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id) tpm = self._download_theplatform_metadata(tp_path, video_id) title = tpm.get('title') or video_data.get('secondaryTitle') if video_data.get('locked'): @@ -211,7 +211,7 @@ def _real_extract(self, url): query['auth'] = self._extract_mvpd_auth( url, video_id, 'nbcentertainment', resource) theplatform_url = smuggle_url(update_url_query( - 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id), + 'http://link.theplatform.com/s/NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id), query), {'force_smil_url': True}) # Empty string or 0 can be valid values for these. So the check must be `is None` @@ -253,7 +253,7 @@ def _real_extract(self, url): class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/' _VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P[0-9a-zA-Z_]+)' - _EMBED_REGEX = [r'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P%s[^\"]+)' % _VALID_URL_BASE] + _EMBED_REGEX = [rf'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P{_VALID_URL_BASE}[^\"]+)'] _TESTS = [{ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI', @@ -267,8 +267,8 @@ class NBCSportsVPlayerIE(InfoExtractor): 'uploader': 'NBCU-SPORTS', 'duration': 72.818, 'chapters': [], - 'thumbnail': r're:^https?://.*\.jpg$' - } + 'thumbnail': r're:^https?://.*\.jpg$', + }, }, { 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2', 'only_matching': True, @@ -301,7 +301,7 @@ class NBCSportsIE(InfoExtractor): 'chapters': [], 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg', 'duration': 528.395, - } + }, }, { # data-mpx-src 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot', @@ -339,7 +339,7 @@ class NBCSportsStreamIE(AdobePassIE): def _real_extract(self, url): video_id = self._match_id(url) live_source = self._download_json( - 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id, + f'http://stream.nbcsports.com/data/live_sources_{video_id}.json', video_id) video_source = live_source['videoSources'][0] title = video_source['title'] @@ -499,7 +499,7 @@ def _real_extract(self, url): continue tbr = int_or_none(va.get('bitrate'), 1000) if tbr: - format_id += '-%d' % tbr + format_id += f'-{tbr}' formats.append({ 'format_id': format_id, 'url': public_url, @@ -568,7 +568,7 @@ def _real_extract(self, url): except RegexNotFoundError: theplatform_url = self._search_regex( r"([\"'])embedUrl\1: *([\"'])(?P.+)\2", - webpage, 'embedding URL', group="embedUrl") + webpage, 'embedding URL', group='embedUrl') return { '_type': 'url_transparent', @@ -623,7 +623,7 @@ def _real_extract(self, url): source_url = self._download_json( f'https://api-leap.nbcsports.com/feeds/assets/{pid}?application=NBCOlympics&platform=desktop&format=nbc-player&env=staging', - pid, 'Downloading leap config' + pid, 'Downloading leap config', )['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl'] if event_config.get('cdnToken'):