]>
Commit | Line | Data |
---|---|---|
877f440f | 1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..compat import compat_parse_qs | |
5 | from ..utils import ( | |
6 | xpath_text, | |
7 | xpath_element, | |
8 | int_or_none, | |
9 | parse_iso8601, | |
10 | ExtractorError, | |
11 | ) | |
12 | ||
13 | ||
14 | class RICEIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)' | |
16 | _TEST = { | |
17 | 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw', | |
18 | 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a', | |
19 | 'info_dict': { | |
20 | 'id': 'YEWIvbhb40aqdjMD1ALSqw', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'Active Learning in Archeology', | |
23 | 'upload_date': '20140616', | |
24 | 'timestamp': 1402926346, | |
25 | } | |
26 | } | |
27 | _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config' | |
28 | ||
29 | def _real_extract(self, url): | |
5ad28e7f | 30 | qs = compat_parse_qs(self._match_valid_url(url).group('query')) |
877f440f | 31 | if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'): |
32 | raise ExtractorError('Invalid URL', expected=True) | |
33 | ||
34 | portal_id = qs['PortalID'][0] | |
35 | playlist_id = qs['DestinationID'][0] | |
36 | content_id = qs['ContentID'][0] | |
37 | ||
38 | content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={ | |
39 | 'portalId': portal_id, | |
40 | 'playlistId': playlist_id, | |
41 | 'contentId': content_id | |
42 | }) | |
43 | metadata = xpath_element(content_data, './/metaData', fatal=True) | |
44 | title = xpath_text(metadata, 'primaryTitle', fatal=True) | |
45 | encodings = xpath_element(content_data, './/encodings', fatal=True) | |
46 | player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={ | |
47 | 'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True), | |
48 | 'contentId': content_id, | |
49 | }) | |
50 | ||
51 | common_fmt = {} | |
52 | dimensions = xpath_text(encodings, 'dimensions') | |
53 | if dimensions: | |
54 | wh = dimensions.split('x') | |
55 | if len(wh) == 2: | |
56 | common_fmt.update({ | |
57 | 'width': int_or_none(wh[0]), | |
58 | 'height': int_or_none(wh[1]), | |
59 | }) | |
60 | ||
61 | formats = [] | |
62 | rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS)) | |
63 | if rtsp_path: | |
64 | fmt = { | |
65 | 'url': rtsp_path, | |
66 | 'format_id': 'rtsp', | |
67 | } | |
68 | fmt.update(common_fmt) | |
69 | formats.append(fmt) | |
70 | for source in player_data.findall(self._xpath_ns('.//Source', self._NS)): | |
71 | video_url = xpath_text(source, self._xpath_ns('File', self._NS)) | |
72 | if not video_url: | |
73 | continue | |
74 | if '.m3u8' in video_url: | |
75 | formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) | |
76 | else: | |
77 | fmt = { | |
78 | 'url': video_url, | |
79 | 'format_id': video_url.split(':')[0], | |
80 | } | |
81 | fmt.update(common_fmt) | |
82 | rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) | |
83 | if rtmp: | |
84 | fmt.update({ | |
85 | 'url': rtmp.group('url'), | |
86 | 'play_path': rtmp.group('playpath'), | |
87 | 'app': rtmp.group('app'), | |
88 | 'ext': 'flv', | |
89 | }) | |
90 | formats.append(fmt) | |
877f440f | 91 | |
92 | thumbnails = [] | |
93 | for content_asset in content_data.findall('.//contentAssets'): | |
94 | asset_type = xpath_text(content_asset, 'type') | |
95 | if asset_type == 'image': | |
96 | image_url = xpath_text(content_asset, 'httpPath') | |
97 | if not image_url: | |
98 | continue | |
99 | thumbnails.append({ | |
100 | 'id': xpath_text(content_asset, 'ID'), | |
101 | 'url': image_url, | |
102 | }) | |
103 | ||
104 | return { | |
105 | 'id': content_id, | |
106 | 'title': title, | |
107 | 'description': xpath_text(metadata, 'abstract'), | |
108 | 'duration': int_or_none(xpath_text(metadata, 'duration')), | |
109 | 'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')), | |
110 | 'thumbnails': thumbnails, | |
111 | 'formats': formats, | |
112 | } |