]>
Commit | Line | Data |
---|---|---|
877f440f | 1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..compat import compat_parse_qs | |
8 | from ..utils import ( | |
9 | xpath_text, | |
10 | xpath_element, | |
11 | int_or_none, | |
12 | parse_iso8601, | |
13 | ExtractorError, | |
14 | ) | |
15 | ||
16 | ||
17 | class RICEIE(InfoExtractor): | |
18 | _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)' | |
19 | _TEST = { | |
20 | 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw', | |
21 | 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a', | |
22 | 'info_dict': { | |
23 | 'id': 'YEWIvbhb40aqdjMD1ALSqw', | |
24 | 'ext': 'mp4', | |
25 | 'title': 'Active Learning in Archeology', | |
26 | 'upload_date': '20140616', | |
27 | 'timestamp': 1402926346, | |
28 | } | |
29 | } | |
30 | _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config' | |
31 | ||
32 | def _real_extract(self, url): | |
33 | qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query')) | |
34 | if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'): | |
35 | raise ExtractorError('Invalid URL', expected=True) | |
36 | ||
37 | portal_id = qs['PortalID'][0] | |
38 | playlist_id = qs['DestinationID'][0] | |
39 | content_id = qs['ContentID'][0] | |
40 | ||
41 | content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={ | |
42 | 'portalId': portal_id, | |
43 | 'playlistId': playlist_id, | |
44 | 'contentId': content_id | |
45 | }) | |
46 | metadata = xpath_element(content_data, './/metaData', fatal=True) | |
47 | title = xpath_text(metadata, 'primaryTitle', fatal=True) | |
48 | encodings = xpath_element(content_data, './/encodings', fatal=True) | |
49 | player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={ | |
50 | 'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True), | |
51 | 'contentId': content_id, | |
52 | }) | |
53 | ||
54 | common_fmt = {} | |
55 | dimensions = xpath_text(encodings, 'dimensions') | |
56 | if dimensions: | |
57 | wh = dimensions.split('x') | |
58 | if len(wh) == 2: | |
59 | common_fmt.update({ | |
60 | 'width': int_or_none(wh[0]), | |
61 | 'height': int_or_none(wh[1]), | |
62 | }) | |
63 | ||
64 | formats = [] | |
65 | rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS)) | |
66 | if rtsp_path: | |
67 | fmt = { | |
68 | 'url': rtsp_path, | |
69 | 'format_id': 'rtsp', | |
70 | } | |
71 | fmt.update(common_fmt) | |
72 | formats.append(fmt) | |
73 | for source in player_data.findall(self._xpath_ns('.//Source', self._NS)): | |
74 | video_url = xpath_text(source, self._xpath_ns('File', self._NS)) | |
75 | if not video_url: | |
76 | continue | |
77 | if '.m3u8' in video_url: | |
78 | formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) | |
79 | else: | |
80 | fmt = { | |
81 | 'url': video_url, | |
82 | 'format_id': video_url.split(':')[0], | |
83 | } | |
84 | fmt.update(common_fmt) | |
85 | rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) | |
86 | if rtmp: | |
87 | fmt.update({ | |
88 | 'url': rtmp.group('url'), | |
89 | 'play_path': rtmp.group('playpath'), | |
90 | 'app': rtmp.group('app'), | |
91 | 'ext': 'flv', | |
92 | }) | |
93 | formats.append(fmt) | |
94 | self._sort_formats(formats) | |
95 | ||
96 | thumbnails = [] | |
97 | for content_asset in content_data.findall('.//contentAssets'): | |
98 | asset_type = xpath_text(content_asset, 'type') | |
99 | if asset_type == 'image': | |
100 | image_url = xpath_text(content_asset, 'httpPath') | |
101 | if not image_url: | |
102 | continue | |
103 | thumbnails.append({ | |
104 | 'id': xpath_text(content_asset, 'ID'), | |
105 | 'url': image_url, | |
106 | }) | |
107 | ||
108 | return { | |
109 | 'id': content_id, | |
110 | 'title': title, | |
111 | 'description': xpath_text(metadata, 'abstract'), | |
112 | 'duration': int_or_none(xpath_text(metadata, 'duration')), | |
113 | 'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')), | |
114 | 'thumbnails': thumbnails, | |
115 | 'formats': formats, | |
116 | } |