]>
Commit | Line | Data |
---|---|---|
adc267ee | 1 | from __future__ import unicode_literals |
df537474 | 2 | |
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
1db82381 | 6 | from ..utils import ( |
bea7af69 | 7 | clean_html, |
b0f7f21c RA |
8 | int_or_none, |
9 | parse_iso8601, | |
a5d783f5 | 10 | qualities, |
bea7af69 | 11 | unescapeHTML, |
1db82381 | 12 | ) |
df537474 | 13 | |
5f6a1245 | 14 | |
df537474 | 15 | class Channel9IE(InfoExtractor): |
adc267ee | 16 | IE_DESC = 'Channel 9' |
17 | IE_NAME = 'channel9' | |
b0f7f21c | 18 | _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' |
762d44c9 S |
19 | |
20 | _TESTS = [{ | |
21 | 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', | |
b0f7f21c | 22 | 'md5': '32083d4eaf1946db6d454313f44510ca', |
762d44c9 | 23 | 'info_dict': { |
b0f7f21c RA |
24 | 'id': '6c413323-383a-49dc-88f9-a22800cab024', |
25 | 'ext': 'wmv', | |
762d44c9 | 26 | 'title': 'Developer Kick-Off Session: Stuff We Love', |
b0f7f21c | 27 | 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731', |
762d44c9 | 28 | 'duration': 4576, |
b0f7f21c RA |
29 | 'thumbnail': r're:https?://.*\.jpg', |
30 | 'timestamp': 1377717420, | |
31 | 'upload_date': '20130828', | |
762d44c9 | 32 | 'session_code': 'KOS002', |
762d44c9 | 33 | 'session_room': 'Arena 1A', |
a15adbe4 | 34 | 'session_speakers': 'count:5', |
df537474 | 35 | }, |
762d44c9 S |
36 | }, { |
37 | 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', | |
b0f7f21c | 38 | 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc', |
762d44c9 | 39 | 'info_dict': { |
b0f7f21c RA |
40 | 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024', |
41 | 'ext': 'wmv', | |
762d44c9 | 42 | 'title': 'Self-service BI with Power BI - nuclear testing', |
b0f7f21c | 43 | 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54', |
762d44c9 | 44 | 'duration': 1540, |
b0f7f21c RA |
45 | 'thumbnail': r're:https?://.*\.jpg', |
46 | 'timestamp': 1386381991, | |
47 | 'upload_date': '20131207', | |
762d44c9 | 48 | 'authors': ['Mike Wilmot'], |
a13d06de | 49 | }, |
762d44c9 S |
50 | }, { |
51 | # low quality mp4 is best | |
52 | 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', | |
53 | 'info_dict': { | |
b0f7f21c | 54 | 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76', |
762d44c9 S |
55 | 'ext': 'mp4', |
56 | 'title': 'Ranges for the Standard Library', | |
b0f7f21c | 57 | 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372', |
762d44c9 | 58 | 'duration': 5646, |
b0f7f21c RA |
59 | 'thumbnail': r're:https?://.*\.jpg', |
60 | 'upload_date': '20150930', | |
61 | 'timestamp': 1443640735, | |
762d44c9 S |
62 | }, |
63 | 'params': { | |
64 | 'skip_download': True, | |
65 | }, | |
66 | }, { | |
a15adbe4 | 67 | 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', |
762d44c9 | 68 | 'info_dict': { |
a15adbe4 RA |
69 | 'id': 'Events/DEVintersection/DEVintersection-2016', |
70 | 'title': 'DEVintersection 2016 Orlando Sessions', | |
762d44c9 | 71 | }, |
a15adbe4 | 72 | 'playlist_mincount': 14, |
762d44c9 | 73 | }, { |
a15adbe4 | 74 | 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', |
762d44c9 S |
75 | 'only_matching': True, |
76 | }, { | |
77 | 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', | |
78 | 'only_matching': True, | |
79 | }] | |
df537474 | 80 | |
81 | _RSS_URL = 'http://channel9.msdn.com/%s/RSS' | |
df537474 | 82 | |
26bae2d9 S |
83 | @staticmethod |
84 | def _extract_urls(webpage): | |
85 | return re.findall( | |
86 | r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b', | |
87 | webpage) | |
88 | ||
762d44c9 S |
89 | def _extract_list(self, video_id, rss_url=None): |
90 | if not rss_url: | |
91 | rss_url = self._RSS_URL % video_id | |
92 | rss = self._download_xml(rss_url, video_id, 'Downloading RSS') | |
4d2ebb6b | 93 | entries = [self.url_result(session_url.text, 'Channel9') |
94 | for session_url in rss.findall('./channel/item/link')] | |
95 | title_text = rss.find('./channel/title').text | |
762d44c9 | 96 | return self.playlist_result(entries, video_id, title_text) |
df537474 | 97 | |
98 | def _real_extract(self, url): | |
b0f7f21c | 99 | content_path, rss = re.match(self._VALID_URL, url).groups() |
762d44c9 S |
100 | |
101 | if rss: | |
102 | return self._extract_list(content_path, url) | |
df537474 | 103 | |
762d44c9 S |
104 | webpage = self._download_webpage( |
105 | url, content_path, 'Downloading web page') | |
df537474 | 106 | |
b0f7f21c RA |
107 | episode_data = self._search_regex( |
108 | r"data-episode='([^']+)'", webpage, 'episode data', default=None) | |
109 | if episode_data: | |
110 | episode_data = self._parse_json(unescapeHTML( | |
111 | episode_data), content_path) | |
112 | content_id = episode_data['contentId'] | |
113 | is_session = '/Sessions(' in episode_data['api'] | |
a15adbe4 | 114 | content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,' |
b0f7f21c | 115 | if is_session: |
a15adbe4 | 116 | content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers' |
b0f7f21c | 117 | else: |
a15adbe4 | 118 | content_url += 'Authors,Body&$expand=Authors' |
b0f7f21c RA |
119 | content_data = self._download_json(content_url, content_id) |
120 | title = content_data['Title'] | |
121 | ||
a5d783f5 S |
122 | QUALITIES = ( |
123 | 'mp3', | |
124 | 'wmv', 'mp4', | |
125 | 'wmv-low', 'mp4-low', | |
126 | 'wmv-mid', 'mp4-mid', | |
127 | 'wmv-high', 'mp4-high', | |
128 | ) | |
129 | ||
130 | quality_key = qualities(QUALITIES) | |
131 | ||
132 | def quality(quality_id, format_url): | |
133 | return (len(QUALITIES) if '_Source.' in format_url | |
134 | else quality_key(quality_id)) | |
135 | ||
b0f7f21c | 136 | formats = [] |
a5d783f5 S |
137 | urls = set() |
138 | ||
139 | SITE_QUALITIES = { | |
140 | 'MP3': 'mp3', | |
141 | 'MP4': 'mp4', | |
142 | 'Low Quality WMV': 'wmv-low', | |
143 | 'Low Quality MP4': 'mp4-low', | |
144 | 'Mid Quality WMV': 'wmv-mid', | |
145 | 'Mid Quality MP4': 'mp4-mid', | |
146 | 'High Quality WMV': 'wmv-high', | |
147 | 'High Quality MP4': 'mp4-high', | |
148 | } | |
149 | ||
150 | formats_select = self._search_regex( | |
151 | r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage, | |
152 | 'formats select', default=None) | |
153 | if formats_select: | |
154 | for mobj in re.finditer( | |
155 | r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<', | |
156 | formats_select): | |
157 | format_url = mobj.group('url') | |
158 | if format_url in urls: | |
159 | continue | |
160 | urls.add(format_url) | |
161 | format_id = mobj.group('format') | |
162 | quality_id = SITE_QUALITIES.get(format_id, format_id) | |
163 | formats.append({ | |
164 | 'url': format_url, | |
165 | 'format_id': quality_id, | |
166 | 'quality': quality(quality_id, format_url), | |
167 | 'vcodec': 'none' if quality_id == 'mp3' else None, | |
168 | }) | |
169 | ||
170 | API_QUALITIES = { | |
171 | 'VideoMP4Low': 'mp4-low', | |
172 | 'VideoWMV': 'wmv-mid', | |
173 | 'VideoMP4Medium': 'mp4-mid', | |
174 | 'VideoMP4High': 'mp4-high', | |
175 | 'VideoWMVHQ': 'wmv-hq', | |
176 | } | |
177 | ||
178 | for format_id, q in API_QUALITIES.items(): | |
179 | q_url = content_data.get(format_id) | |
180 | if not q_url or q_url in urls: | |
b0f7f21c | 181 | continue |
a5d783f5 | 182 | urls.add(q_url) |
b0f7f21c | 183 | formats.append({ |
b0f7f21c | 184 | 'url': q_url, |
a5d783f5 S |
185 | 'format_id': q, |
186 | 'quality': quality(q, q_url), | |
b0f7f21c | 187 | }) |
a5d783f5 | 188 | |
b0f7f21c RA |
189 | slides = content_data.get('Slides') |
190 | zip_file = content_data.get('ZipFile') | |
191 | ||
192 | if not formats and not slides and not zip_file: | |
b7da73eb | 193 | self.raise_no_formats( |
b0f7f21c | 194 | 'None of recording, slides or zip are available for %s' % content_path) |
b7da73eb | 195 | self._sort_formats(formats) |
b0f7f21c RA |
196 | |
197 | subtitles = {} | |
198 | for caption in content_data.get('Captions', []): | |
199 | caption_url = caption.get('Url') | |
200 | if not caption_url: | |
201 | continue | |
202 | subtitles.setdefault(caption.get('Language', 'en'), []).append({ | |
203 | 'url': caption_url, | |
204 | 'ext': 'vtt', | |
205 | }) | |
206 | ||
207 | common = { | |
208 | 'id': content_id, | |
209 | 'title': title, | |
210 | 'description': clean_html(content_data.get('Description') or content_data.get('Body')), | |
a15adbe4 | 211 | 'thumbnail': content_data.get('VideoPlayerPreviewImage'), |
b0f7f21c RA |
212 | 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), |
213 | 'timestamp': parse_iso8601(content_data.get('PublishedDate')), | |
214 | 'avg_rating': int_or_none(content_data.get('Rating')), | |
215 | 'rating_count': int_or_none(content_data.get('RatingCount')), | |
216 | 'view_count': int_or_none(content_data.get('Views')), | |
217 | 'comment_count': int_or_none(content_data.get('CommentCount')), | |
218 | 'subtitles': subtitles, | |
219 | } | |
220 | if is_session: | |
221 | speakers = [] | |
222 | for s in content_data.get('Speakers', []): | |
223 | speaker_name = s.get('FullName') | |
224 | if not speaker_name: | |
225 | continue | |
226 | speakers.append(speaker_name) | |
227 | ||
228 | common.update({ | |
229 | 'session_code': content_data.get('Code'), | |
230 | 'session_room': content_data.get('Room'), | |
231 | 'session_speakers': speakers, | |
232 | }) | |
a316a83d | 233 | else: |
b0f7f21c RA |
234 | authors = [] |
235 | for a in content_data.get('Authors', []): | |
236 | author_name = a.get('DisplayName') | |
237 | if not author_name: | |
238 | continue | |
239 | authors.append(author_name) | |
240 | common['authors'] = authors | |
241 | ||
242 | contents = [] | |
243 | ||
244 | if slides: | |
245 | d = common.copy() | |
246 | d.update({'title': title + '-Slides', 'url': slides}) | |
247 | contents.append(d) | |
248 | ||
249 | if zip_file: | |
250 | d = common.copy() | |
251 | d.update({'title': title + '-Zip', 'url': zip_file}) | |
252 | contents.append(d) | |
253 | ||
254 | if formats: | |
255 | d = common.copy() | |
256 | d.update({'title': title, 'formats': formats}) | |
257 | contents.append(d) | |
258 | return self.playlist_result(contents) | |
259 | else: | |
df537474 | 260 | return self._extract_list(content_path) |