]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/rice.py
3 from .common
import InfoExtractor
4 from ..compat
import compat_parse_qs
14 class RICEIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)'
17 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw',
18 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a',
20 'id': 'YEWIvbhb40aqdjMD1ALSqw',
22 'title': 'Active Learning in Archeology',
23 'upload_date': '20140616',
24 'timestamp': 1402926346,
27 _NS
= 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config'
29 def _real_extract(self
, url
):
30 qs
= compat_parse_qs(self
._match
_valid
_url
(url
).group('query'))
31 if not qs
.get('PortalID') or not qs
.get('DestinationID') or not qs
.get('ContentID'):
32 raise ExtractorError('Invalid URL', expected
=True)
34 portal_id
= qs
['PortalID'][0]
35 playlist_id
= qs
['DestinationID'][0]
36 content_id
= qs
['ContentID'][0]
38 content_data
= self
._download
_xml
('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id
, query
={
39 'portalId': portal_id
,
40 'playlistId': playlist_id
,
41 'contentId': content_id
43 metadata
= xpath_element(content_data
, './/metaData', fatal
=True)
44 title
= xpath_text(metadata
, 'primaryTitle', fatal
=True)
45 encodings
= xpath_element(content_data
, './/encodings', fatal
=True)
46 player_data
= self
._download
_xml
('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id
, query
={
47 'temporaryLinkId': xpath_text(encodings
, 'temporaryLinkId', fatal
=True),
48 'contentId': content_id
,
52 dimensions
= xpath_text(encodings
, 'dimensions')
54 wh
= dimensions
.split('x')
57 'width': int_or_none(wh
[0]),
58 'height': int_or_none(wh
[1]),
62 rtsp_path
= xpath_text(player_data
, self
._xpath
_ns
('RtspPath', self
._NS
))
68 fmt
.update(common_fmt
)
70 for source
in player_data
.findall(self
._xpath
_ns
('.//Source', self
._NS
)):
71 video_url
= xpath_text(source
, self
._xpath
_ns
('File', self
._NS
))
74 if '.m3u8' in video_url
:
75 formats
.extend(self
._extract
_m
3u8_formats
(video_url
, content_id
, 'mp4', 'm3u8_native', m3u8_id
='hls', fatal
=False))
79 'format_id': video_url
.split(':')[0],
81 fmt
.update(common_fmt
)
82 rtmp
= re
.search(r
'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url
)
85 'url': rtmp
.group('url'),
86 'play_path': rtmp
.group('playpath'),
87 'app': rtmp
.group('app'),
91 self
._sort
_formats
(formats
)
94 for content_asset
in content_data
.findall('.//contentAssets'):
95 asset_type
= xpath_text(content_asset
, 'type')
96 if asset_type
== 'image':
97 image_url
= xpath_text(content_asset
, 'httpPath')
101 'id': xpath_text(content_asset
, 'ID'),
108 'description': xpath_text(metadata
, 'abstract'),
109 'duration': int_or_none(xpath_text(metadata
, 'duration')),
110 'timestamp': parse_iso8601(xpath_text(metadata
, 'dateUpdated')),
111 'thumbnails': thumbnails
,