]>
Commit | Line | Data |
---|---|---|
c460bdd5 PH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import itertools | |
5 | import json | |
6 | import re | |
7 | ||
8 | from .common import InfoExtractor | |
9 | from ..compat import ( | |
10 | compat_urllib_request, | |
11 | compat_urlparse, | |
12 | ) | |
13 | from ..utils import ( | |
14 | int_or_none, | |
15 | js_to_json, | |
16 | mimetype2ext, | |
17 | unified_strdate, | |
18 | ) | |
19 | ||
20 | ||
21 | class SandiaIE(InfoExtractor): | |
22 | IE_DESC = 'Sandia National Laboratories' | |
23 | _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)' | |
24 | _TEST = { | |
25 | 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d', | |
26 | 'md5': '9422edc9b9a60151727e4b6d8bef393d', | |
27 | 'info_dict': { | |
28 | 'id': '24aace4429fc450fb5b38cdbf424a66e1d', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'Xyce Software Training - Section 1', | |
31 | 'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}', | |
32 | 'upload_date': '20120904', | |
33 | 'duration': 7794, | |
34 | } | |
35 | } | |
36 | ||
37 | def _real_extract(self, url): | |
38 | video_id = self._match_id(url) | |
39 | ||
40 | req = compat_urllib_request.Request(url) | |
41 | req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4') | |
42 | webpage = self._download_webpage(req, video_id) | |
43 | ||
44 | js_path = self._search_regex( | |
45 | r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"', | |
46 | webpage, 'JS code URL') | |
47 | js_url = compat_urlparse.urljoin(url, js_path) | |
48 | ||
49 | js_code = self._download_webpage( | |
50 | js_url, video_id, note='Downloading player') | |
51 | ||
52 | def extract_str(key, **args): | |
53 | return self._search_regex( | |
54 | r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key), | |
55 | js_code, key, **args) | |
56 | ||
57 | def extract_data(key, **args): | |
58 | data_json = extract_str(key, **args) | |
59 | if data_json is None: | |
60 | return data_json | |
61 | return self._parse_json( | |
62 | data_json, video_id, transform_source=js_to_json) | |
63 | ||
64 | formats = [] | |
65 | for i in itertools.count(): | |
66 | fd = extract_data('VideoUrls[%d]' % i, default=None) | |
67 | if fd is None: | |
68 | break | |
69 | formats.append({ | |
70 | 'format_id': '%s' % i, | |
71 | 'format_note': fd['MimeType'].partition('/')[2], | |
72 | 'ext': mimetype2ext(fd['MimeType']), | |
73 | 'url': fd['Location'], | |
74 | 'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None, | |
75 | }) | |
76 | self._sort_formats(formats) | |
77 | ||
78 | slide_baseurl = compat_urlparse.urljoin( | |
79 | url, extract_data('SlideBaseUrl')) | |
80 | slide_template = slide_baseurl + re.sub( | |
81 | r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate')) | |
82 | slides = [] | |
83 | last_slide_time = 0 | |
84 | for i in itertools.count(1): | |
85 | sd = extract_str('Slides[%d]' % i, default=None) | |
86 | if sd is None: | |
87 | break | |
88 | timestamp = int_or_none(self._search_regex( | |
89 | r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),', | |
90 | sd, 'slide %s timestamp' % i, fatal=False)) | |
91 | slides.append({ | |
92 | 'url': slide_template % i, | |
93 | 'duration': timestamp - last_slide_time, | |
94 | }) | |
95 | last_slide_time = timestamp | |
96 | formats.append({ | |
97 | 'format_id': 'slides', | |
98 | 'protocol': 'slideshow', | |
99 | 'url': json.dumps(slides), | |
100 | 'preference': -10000, # Downloader not yet written | |
101 | }) | |
102 | self._sort_formats(formats) | |
103 | ||
104 | title = extract_data('Title') | |
105 | description = extract_data('Description', fatal=False) | |
106 | duration = int_or_none(extract_data( | |
107 | 'Duration', fatal=False), scale=1000) | |
108 | upload_date = unified_strdate(extract_data('AirDate', fatal=False)) | |
109 | ||
110 | return { | |
111 | 'id': video_id, | |
112 | 'title': title, | |
113 | 'description': description, | |
114 | 'formats': formats, | |
115 | 'upload_date': upload_date, | |
116 | 'duration': duration, | |
117 | } |