X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/db50f19d76c6870a5a13d0cab9287d684fd7449a..add96eb9f84cfffe85682bf2fb85135746994ee8:/yt_dlp/extractor/panopto.py diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py index 6b2596236..91f105519 100644 --- a/yt_dlp/extractor/panopto.py +++ b/yt_dlp/extractor/panopto.py @@ -3,9 +3,9 @@ import functools import json import random +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse, compat_urlparse from ..utils import ( ExtractorError, OnDemandPagedList, @@ -44,7 +44,7 @@ class PanoptoBaseIE(InfoExtractor): 18: 'hu-HU', 19: 'nb-NO', 20: 'sv-SE', - 21: 'it-IT' + 21: 'it-IT', } def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs): @@ -66,7 +66,7 @@ def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs): @staticmethod def _parse_fragment(url): - return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()} + return {k: json.loads(v[0]) for k, v in urllib.parse.parse_qs(urllib.parse.urlparse(url).fragment).items()} class PanoptoIE(PanoptoBaseIE): @@ -88,7 +88,7 @@ class PanoptoIE(PanoptoBaseIE): 'average_rating': int, 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e', 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', - 'channel': 'Showcase Videos' + 'channel': 'Showcase Videos', }, }, { @@ -131,7 +131,7 @@ class PanoptoIE(PanoptoBaseIE): 'uploader': 'Kathryn Kelly', 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3', 'channel': 'Getting Started', - } + }, }, { # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream. @@ -174,7 +174,7 @@ class PanoptoIE(PanoptoBaseIE): 'chapters': 'count:28', 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+', }, - 'params': {'format': 'mhtml', 'skip_download': True} + 'params': {'format': 'mhtml', 'skip_download': True}, }, { 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9', @@ -196,7 +196,7 @@ class PanoptoIE(PanoptoBaseIE): 'subtitles': {'en-US': [{'ext': 'srt', 'data': 'md5:a3f4d25963fdeace838f327097c13265'}], 'es-ES': [{'ext': 'srt', 'data': 'md5:57e9dad365fd0fbaf0468eac4949f189'}]}, }, - 'params': {'writesubtitles': True, 'skip_download': True} + 'params': {'writesubtitles': True, 'skip_download': True}, }, { # On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped. 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b', @@ -218,15 +218,15 @@ class PanoptoIE(PanoptoBaseIE): 'upload_date': '20191129', }, - 'params': {'writesubtitles': True, 'skip_download': True} + 'params': {'writesubtitles': True, 'skip_download': True}, }, { 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638', - 'only_matching': True + 'only_matching': True, }, ] @@ -254,7 +254,7 @@ def _mark_watched(self, base_url, video_id, delivery_info): 'StopReason': None, 'StreamID': stream_id, 'TimeStamp': timestamp_str, - 'UpdatesRejected': 0 + 'UpdatesRejected': 0, }, ]} @@ -274,7 +274,7 @@ def _extract_chapters(timestamps): chapters.append({ 'start_time': start, 'end_time': start + duration, - 'title': caption + 'title': caption, }) return chapters @@ -283,11 +283,11 @@ def _extract_mhtml_formats(base_url, timestamps): image_frags = {} for timestamp in timestamps or []: duration = timestamp.get('Duration') - obj_id, obj_sn = timestamp.get('ObjectIdentifier'), timestamp.get('ObjectSequenceNumber'), + obj_id, obj_sn = timestamp.get('ObjectIdentifier'), timestamp.get('ObjectSequenceNumber') if timestamp.get('EventTargetType') == 'PowerPoint' and obj_id is not None and obj_sn is not None: image_frags.setdefault('slides', []).append({ 'url': base_url + f'/Pages/Viewer/Image.aspx?id={obj_id}&number={obj_sn}', - 'duration': duration + 'duration': duration, }) obj_pid, session_id, abs_time = timestamp.get('ObjectPublicIdentifier'), timestamp.get('SessionID'), timestamp.get('AbsoluteTime') @@ -304,7 +304,7 @@ def _extract_mhtml_formats(base_url, timestamps): 'acodec': 'none', 'vcodec': 'none', 'url': 'about:invalid', - 'fragments': fragments + 'fragments': fragments, } @staticmethod @@ -329,8 +329,8 @@ def _get_subtitles(self, base_url, video_id, delivery): 'deliveryId': video_id, 'getCaptions': True, 'language': str(lang), - 'responseType': 'json' - } + 'responseType': 'json', + }, ) if not isinstance(response, list): continue @@ -359,12 +359,12 @@ def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs subtitles = self._merge_subtitles(subtitles, stream_subtitles) else: stream_formats.append({ - 'url': stream_url + 'url': stream_url, }) for fmt in stream_formats: fmt.update({ 'format_note': stream.get('Tag'), - **fmt_kwargs + **fmt_kwargs, }) formats.extend(stream_formats) @@ -384,7 +384,7 @@ def _real_extract(self, url): 'isKollectiveAgentInstalled': 'false', 'isEmbed': 'false', 'responseType': 'json', - } + }, ) delivery = delivery_info['Delivery'] @@ -421,7 +421,7 @@ def _real_extract(self, url): 'channel_id': delivery.get('SessionGroupPublicID'), 'channel': traverse_obj(delivery, 'SessionGroupLongName', 'SessionGroupShortName', get_all=False), 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } @@ -435,7 +435,7 @@ class PanoptoPlaylistIE(PanoptoBaseIE): 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36', 'description': '', }, - 'playlist_mincount': 36 + 'playlist_mincount': 36, }, { 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190', @@ -444,7 +444,7 @@ class PanoptoPlaylistIE(PanoptoBaseIE): 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190', 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb', }, - 'playlist_mincount': 4 + 'playlist_mincount': 4, }, ] @@ -466,7 +466,7 @@ def _entries(self, base_url, playlist_id, session_list_id): 'description': item.get('Description'), 'duration': item.get('Duration'), 'channel': traverse_obj(item, ('Parent', 'Name')), - 'channel_id': traverse_obj(item, ('Parent', 'Id')) + 'channel_id': traverse_obj(item, ('Parent', 'Id')), } def _real_extract(self, url): @@ -475,7 +475,7 @@ def _real_extract(self, url): video_id = get_first(parse_qs(url), 'id') if video_id: if self.get_param('noplaylist'): - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + self.to_screen(f'Downloading just video {video_id} because of --no-playlist') return self.url_result(base_url + f'/Pages/Viewer.aspx?id={video_id}', ie_key=PanoptoIE.ie_key(), video_id=video_id) else: self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}') @@ -495,28 +495,28 @@ class PanoptoListIE(PanoptoBaseIE): 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22', 'info_dict': { 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', - 'title': 'Showcase Videos' + 'title': 'Showcase Videos', }, - 'playlist_mincount': 140 + 'playlist_mincount': 140, }, { 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250', 'info_dict': { 'id': 'panopto_list', - 'title': 'panopto_list' + 'title': 'panopto_list', }, - 'playlist_mincount': 300 + 'playlist_mincount': 300, }, { # Folder that contains 8 folders and a playlist 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22', 'info_dict': { 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e', - 'title': 'Video Tutorials' + 'title': 'Video Tutorials', }, - 'playlist_mincount': 9 - } + 'playlist_mincount': 9, + }, ] @@ -559,7 +559,7 @@ def _extract_folder_metadata(self, base_url, folder_id): base_url, '/Services/Data.svc/GetFolderInfo', folder_id, data={'folderID': folder_id}, fatal=False) return { - 'title': get_first(response, 'Name') + 'title': get_first(response, 'Name'), } def _real_extract(self, url):