4 from datetime
import datetime
5 from random
import random
7 from .common
import InfoExtractor
9 compat_urllib_parse_urlparse
,
20 srt_subtitles_timecode
,
25 class PanoptoBaseIE(InfoExtractor
):
26 BASE_URL_RE
= r
'(?P<base_url>https?://[\w.-]+\.panopto.(?:com|eu)/Panopto)'
54 def _call_api(self
, base_url
, path
, video_id
, data
=None, fatal
=True, **kwargs
):
55 response
= self
._download
_json
(
56 base_url
+ path
, video_id
, data
=json
.dumps(data
).encode('utf8') if data
else None,
57 fatal
=fatal
, headers
={'accept': 'application/json', 'content-type': 'application/json'}
, **kwargs
)
60 error_code
= traverse_obj(response
, 'ErrorCode')
62 self
.raise_login_required(method
='cookies')
63 elif error_code
is not None:
64 msg
= f
'Panopto said: {response.get("ErrorMessage")}'
66 raise ExtractorError(msg
, video_id
=video_id
, expected
=True)
68 self
.report_warning(msg
, video_id
=video_id
)
72 def _parse_fragment(url
):
73 return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()}
76 class PanoptoIE(PanoptoBaseIE
):
77 _VALID_URL
= PanoptoBaseIE
.BASE_URL_RE
+ r
'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)'
78 _EMBED_REGEX
= [rf
'<iframe[^>]+src=["\'](?P<url>{PanoptoBaseIE.BASE_URL_RE}/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)']
81 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
83 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
84 'title': 'Panopto for Business - Use Cases',
85 'timestamp': 1459184200,
86 'thumbnail': r
're:https://demo\.hosted\.panopto\.com/.+',
87 'upload_date': '20160328',
91 'duration': 88.17099999999999,
92 'average_rating': int,
93 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e',
94 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
95 'channel': 'Showcase Videos'
99 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
101 'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
102 'title': 'Overcoming Top 4 Challenges of Enterprise Video',
103 'uploader': 'Panopto Support',
104 'timestamp': 1449409251,
105 'thumbnail': r
're:https://demo\.hosted\.panopto\.com/.+',
106 'upload_date': '20151206',
108 'chapters': 'count:12',
109 'cast': ['Panopto Support'],
110 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
111 'average_rating': int,
112 'description': 'md5:4391837802b3fc856dadf630c4b375d1',
113 'duration': 1088.2659999999998,
114 'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546',
115 'channel': 'Webcasts',
119 # Extra params in URL
120 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
122 'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed',
125 'cast': ['Kathryn Kelly'],
126 'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56',
127 'timestamp': 1569845768,
128 'tags': ['Viewer', 'Enterprise'],
130 'upload_date': '20190930',
131 'thumbnail': r
're:https://howtovideos\.hosted\.panopto\.com/.+',
132 'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f',
133 'title': 'Getting Started: View a Video',
134 'average_rating': int,
135 'uploader': 'Kathryn Kelly',
136 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3',
137 'channel': 'Getting Started',
141 # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream.
142 'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
144 'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
146 'cast': ['LTS CLI Script'],
149 'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa',
150 'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8',
151 'average_rating': int,
152 'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7',
153 'uploader': 'LTS CLI Script',
154 'timestamp': 1572458134,
155 'title': 'WW2 Vets Interview 3 Ronald Stanley George',
156 'thumbnail': r
're:https://unisa\.au\.panopto\.com/.+',
157 'channel': 'World War II Veteran Interviews',
158 'upload_date': '20191030',
163 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=a7f12f1d-3872-4310-84b0-f8d8ab15326b',
165 'id': 'a7f12f1d-3872-4310-84b0-f8d8ab15326b',
167 'timestamp': 1448798857,
168 'duration': 4712.681,
169 'title': 'Cache Memory - CompSci 15-213, Lecture 12',
170 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
171 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
172 'upload_date': '20151129',
174 'uploader': 'Panopto Support',
175 'channel': 'Showcase Videos',
176 'description': 'md5:55e51d54233ddb0e6c2ed388ca73822c',
177 'cast': ['ISR Videographer', 'Panopto Support'],
178 'chapters': 'count:28',
179 'thumbnail': r
're:https://demo\.hosted\.panopto\.com/.+',
181 'params': {'format': 'mhtml', 'skip_download': True}
184 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9',
186 'id': '8285224a-9a2b-4957-84f2-acb0000c4ea9',
189 'title': 'Company Policy',
191 'timestamp': 1615058901,
192 'channel': 'Human Resources',
193 'tags': ['HumanResources'],
194 'duration': 1604.243,
195 'thumbnail': r
're:https://na-training-1\.hosted\.panopto\.com/.+',
196 'uploader_id': '8e8ba0a3-424f-40df-a4f1-ab3a01375103',
197 'uploader': 'Cait M.',
198 'upload_date': '20210306',
200 'subtitles': {'en-US': [{'ext': 'srt', 'data': 'md5:a3f4d25963fdeace838f327097c13265'}
],
201 'es-ES': [{'ext': 'srt', 'data': 'md5:57e9dad365fd0fbaf0468eac4949f189'}
]},
203 'params': {'writesubtitles': True, 'skip_download': True}
205 # On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped.
206 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b',
208 'id': '940cbd41-f616-4a45-b13e-aaf1000c915b',
210 'subtitles': 'count:1',
211 'title': 'HR Benefits Review Meeting*',
212 'cast': ['Panopto Support'],
214 'timestamp': 1575024251,
215 'thumbnail': r
're:https://na-training-1\.hosted\.panopto\.com/.+',
217 'description': 'md5:04f90a9c2c68b7828144abfb170f0106',
218 'uploader': 'Panopto Support',
220 'duration': 409.34499999999997,
221 'uploader_id': 'b6ac04ad-38b8-4724-a004-a851004ea3df',
222 'upload_date': '20191129',
225 'params': {'writesubtitles': True, 'skip_download': True}
228 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb',
229 'only_matching': True
232 'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638',
233 'only_matching': True
238 def suitable(cls
, url
):
239 return False if PanoptoPlaylistIE
.suitable(url
) else super().suitable(url
)
241 def _mark_watched(self
, base_url
, video_id
, delivery_info
):
242 duration
= traverse_obj(delivery_info
, ('Delivery', 'Duration'), expected_type
=float)
243 invocation_id
= delivery_info
.get('InvocationId')
244 stream_id
= traverse_obj(delivery_info
, ('Delivery', 'Streams', ..., 'PublicID'), get_all
=False, expected_type
=str)
245 if invocation_id
and stream_id
and duration
:
246 timestamp_str
= f
'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/'
250 'ClientTimeStamp': timestamp_str
,
252 'InvocationID': invocation_id
,
254 'SecondsListened': duration
- 1,
255 'SecondsRejected': 0,
259 'StreamID': stream_id
,
260 'TimeStamp': timestamp_str
,
265 self
._download
_webpage
(
266 base_url
+ '/Services/Analytics.svc/AddStreamRequests', video_id
,
267 fatal
=False, data
=json
.dumps(data
).encode('utf8'), headers
={'content-type': 'application/json'}
,
268 note
='Marking watched', errnote
='Unable to mark watched')
271 def _extract_chapters(timestamps
):
273 for timestamp
in timestamps
or []:
274 caption
= timestamp
.get('Caption')
275 start
, duration
= int_or_none(timestamp
.get('Time')), int_or_none(timestamp
.get('Duration'))
276 if not caption
or start
is None or duration
is None:
280 'end_time': start
+ duration
,
286 def _extract_mhtml_formats(base_url
, timestamps
):
288 for timestamp
in timestamps
or []:
289 duration
= timestamp
.get('Duration')
290 obj_id
, obj_sn
= timestamp
.get('ObjectIdentifier'), timestamp
.get('ObjectSequenceNumber'),
291 if timestamp
.get('EventTargetType') == 'PowerPoint' and obj_id
is not None and obj_sn
is not None:
292 image_frags
.setdefault('slides', []).append({
293 'url': base_url
+ f
'/Pages/Viewer/Image.aspx?id={obj_id}&number={obj_sn}',
297 obj_pid
, session_id
, abs_time
= timestamp
.get('ObjectPublicIdentifier'), timestamp
.get('SessionID'), timestamp
.get('AbsoluteTime')
298 if None not in (obj_pid
, session_id
, abs_time
):
299 image_frags
.setdefault('chapter', []).append({
300 'url': base_url
+ f
'/Pages/Viewer/Thumb.aspx?eventTargetPID={obj_pid}&sessionPID={session_id}&number={obj_sn}&isPrimary=false&absoluteTime={abs_time}',
301 'duration': duration
,
303 for name
, fragments
in image_frags
.items():
310 'url': 'about:invalid',
311 'fragments': fragments
315 def _json2srt(data
, delivery
):
317 for i
, line
in enumerate(data
):
318 start_time
= line
['Time']
319 duration
= line
.get('Duration')
321 end_time
= start_time
+ duration
323 end_time
= traverse_obj(data
, (i
+ 1, 'Time')) or delivery
['Duration']
324 yield f
'{i + 1}\n{srt_subtitles_timecode(start_time)} --> {srt_subtitles_timecode(end_time)}\n{line["Caption"]}'
325 return '\n\n'.join(_gen_lines())
327 def _get_subtitles(self
, base_url
, video_id
, delivery
):
329 for lang
in delivery
.get('AvailableLanguages') or []:
330 response
= self
._call
_api
(
331 base_url
, '/Pages/Viewer/DeliveryInfo.aspx', video_id
, fatal
=False,
332 note
='Downloading captions JSON metadata', query
={
333 'deliveryId': video_id
,
335 'language': str(lang
),
336 'responseType': 'json'
339 if not isinstance(response
, list):
341 subtitles
.setdefault(self
._SUB
_LANG
_MAPPING
.get(lang
) or 'default', []).append({
343 'data': self
._json
2srt
(response
, delivery
),
347 def _extract_streams_formats_and_subtitles(self
, video_id
, streams
, **fmt_kwargs
):
350 for stream
in streams
or []:
352 http_stream_url
= stream
.get('StreamHttpUrl')
353 stream_url
= stream
.get('StreamUrl')
356 stream_formats
.append({'url': http_stream_url}
)
359 media_type
= stream
.get('ViewerMediaFileTypeName')
360 if media_type
in ('hls', ):
361 m3u8_formats
, stream_subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(stream_url
, video_id
)
362 stream_formats
.extend(m3u8_formats
)
363 subtitles
= self
._merge
_subtitles
(subtitles
, stream_subtitles
)
365 stream_formats
.append({
368 for fmt
in stream_formats
:
370 'format_note': stream
.get('Tag'),
373 formats
.extend(stream_formats
)
375 return formats
, subtitles
377 def _real_extract(self
, url
):
378 base_url
, video_id
= self
._match
_valid
_url
(url
).group('base_url', 'id')
379 delivery_info
= self
._call
_api
(
380 base_url
, '/Pages/Viewer/DeliveryInfo.aspx', video_id
,
382 'deliveryId': video_id
,
384 'isLiveNotes': 'false',
385 'refreshAuthCookie': 'true',
386 'isActiveBroadcast': 'false',
387 'isEditing': 'false',
388 'isKollectiveAgentInstalled': 'false',
390 'responseType': 'json',
394 delivery
= delivery_info
['Delivery']
395 session_start_time
= int_or_none(delivery
.get('SessionStartTime'))
396 timestamps
= delivery
.get('Timestamps')
398 # Podcast stream is usually the combined streams. We will prefer that by default.
399 podcast_formats
, podcast_subtitles
= self
._extract
_streams
_formats
_and
_subtitles
(
400 video_id
, delivery
.get('PodcastStreams'), format_note
='PODCAST')
402 streams_formats
, streams_subtitles
= self
._extract
_streams
_formats
_and
_subtitles
(
403 video_id
, delivery
.get('Streams'), preference
=-10)
405 formats
= podcast_formats
+ streams_formats
406 formats
.extend(self
._extract
_mhtml
_formats
(base_url
, timestamps
))
407 subtitles
= self
._merge
_subtitles
(
408 podcast_subtitles
, streams_subtitles
, self
.extract_subtitles(base_url
, video_id
, delivery
))
410 self
._sort
_formats
(formats
)
411 self
.mark_watched(base_url
, video_id
, delivery_info
)
415 'title': delivery
.get('SessionName'),
416 'cast': traverse_obj(delivery
, ('Contributors', ..., 'DisplayName'), default
=[], expected_type
=lambda x
: x
or None),
417 'timestamp': session_start_time
- 11640000000 if session_start_time
else None,
418 'duration': delivery
.get('Duration'),
419 'thumbnail': base_url
+ f
'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}',
420 'average_rating': delivery
.get('AverageRating'),
421 'chapters': self
._extract
_chapters
(timestamps
),
422 'uploader': delivery
.get('OwnerDisplayName') or None,
423 'uploader_id': delivery
.get('OwnerId'),
424 'description': delivery
.get('SessionAbstract'),
425 'tags': traverse_obj(delivery
, ('Tags', ..., 'Content')),
426 'channel_id': delivery
.get('SessionGroupPublicID'),
427 'channel': traverse_obj(delivery
, 'SessionGroupLongName', 'SessionGroupShortName', get_all
=False),
429 'subtitles': subtitles
433 class PanoptoPlaylistIE(PanoptoBaseIE
):
434 _VALID_URL
= PanoptoBaseIE
.BASE_URL_RE
+ r
'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P<id>[a-f0-9-]+)'
437 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
439 'title': 'Featured Video Tutorials',
440 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36',
443 'playlist_mincount': 36
446 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190',
448 'title': 'Library Website Introduction Playlist',
449 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190',
450 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb',
452 'playlist_mincount': 4
457 def _entries(self
, base_url
, playlist_id
, session_list_id
):
458 session_list_info
= self
._call
_api
(
459 base_url
, f
'/Api/SessionLists/{session_list_id}?collections[0].maxCount=500&collections[0].name=items', playlist_id
)
461 items
= session_list_info
['Items']
463 if item
.get('TypeName') != 'Session':
464 self
.report_warning('Got an item in the playlist that is not a Session' + bug_reports_message(), only_once
=True)
468 'id': item
.get('Id'),
469 'url': item
.get('ViewerUri'),
470 'title': item
.get('Name'),
471 'description': item
.get('Description'),
472 'duration': item
.get('Duration'),
473 'channel': traverse_obj(item
, ('Parent', 'Name')),
474 'channel_id': traverse_obj(item
, ('Parent', 'Id'))
477 def _real_extract(self
, url
):
478 base_url
, playlist_id
= self
._match
_valid
_url
(url
).group('base_url', 'id')
480 video_id
= get_first(parse_qs(url
), 'id')
482 if self
.get_param('noplaylist'):
483 self
.to_screen('Downloading just video %s because of --no-playlist' % video_id
)
484 return self
.url_result(base_url
+ f
'/Pages/Viewer.aspx?id={video_id}', ie_key
=PanoptoIE
.ie_key(), video_id
=video_id
)
486 self
.to_screen(f
'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
488 playlist_info
= self
._call
_api
(base_url
, f
'/Api/Playlists/{playlist_id}', playlist_id
)
489 return self
.playlist_result(
490 self
._entries
(base_url
, playlist_id
, playlist_info
['SessionListId']),
491 playlist_id
=playlist_id
, playlist_title
=playlist_info
.get('Name'),
492 playlist_description
=playlist_info
.get('Description'))
495 class PanoptoListIE(PanoptoBaseIE
):
496 _VALID_URL
= PanoptoBaseIE
.BASE_URL_RE
+ r
'/Pages/Sessions/List\.aspx'
500 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22',
502 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
503 'title': 'Showcase Videos'
505 'playlist_mincount': 140
509 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250',
511 'id': 'panopto_list',
512 'title': 'panopto_list'
514 'playlist_mincount': 300
517 # Folder that contains 8 folders and a playlist
518 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22',
520 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e',
521 'title': 'Video Tutorials'
523 'playlist_mincount': 9
528 def _fetch_page(self
, base_url
, query_params
, display_id
, page
):
532 'getFolderData': True,
533 'includePlaylists': True,
536 'maxResults': self
._PAGE
_SIZE
,
539 response
= self
._call
_api
(
540 base_url
, '/Services/Data.svc/GetSessions', f
'{display_id} page {page+1}',
541 data
={'queryParameters': params}
, fatal
=False)
543 for result
in get_first(response
, 'Results', default
=[]):
544 # This could be a video, playlist (or maybe something else)
545 item_id
= result
.get('DeliveryID')
549 'title': result
.get('SessionName'),
550 'url': traverse_obj(result
, 'ViewerUrl', 'EmbedUrl', get_all
=False) or (base_url
+ f
'/Pages/Viewer.aspx?id={item_id}'),
551 'duration': result
.get('Duration'),
552 'channel': result
.get('FolderName'),
553 'channel_id': result
.get('FolderID'),
556 for folder
in get_first(response
, 'Subfolders', default
=[]):
557 folder_id
= folder
.get('ID')
558 yield self
.url_result(
559 base_url
+ f
'/Pages/Sessions/List.aspx#folderID="{folder_id}"',
560 ie_key
=PanoptoListIE
.ie_key(), video_id
=folder_id
, title
=folder
.get('Name'))
562 def _extract_folder_metadata(self
, base_url
, folder_id
):
563 response
= self
._call
_api
(
564 base_url
, '/Services/Data.svc/GetFolderInfo', folder_id
,
565 data
={'folderID': folder_id}
, fatal
=False)
567 'title': get_first(response
, 'Name', default
=[])
570 def _real_extract(self
, url
):
571 mobj
= self
._match
_valid
_url
(url
)
572 base_url
= mobj
.group('base_url')
574 query_params
= self
._parse
_fragment
(url
)
575 folder_id
, display_id
= query_params
.get('folderID'), 'panopto_list'
577 if query_params
.get('isSubscriptionsPage'):
578 display_id
= 'subscriptions'
579 if not query_params
.get('subscribableTypes'):
580 query_params
['subscribableTypes'] = [0, 1, 2]
581 elif query_params
.get('isSharedWithMe'):
582 display_id
= 'sharedwithme'
584 display_id
= folder_id
586 query
= query_params
.get('query')
588 display_id
+= f
': query "{query}"'
596 info
.update(self
._extract
_folder
_metadata
(base_url
, folder_id
))
598 info
['entries'] = OnDemandPagedList(
599 functools
.partial(self
._fetch
_page
, base_url
, query_params
, display_id
), self
._PAGE
_SIZE
)