]>
Commit | Line | Data |
---|---|---|
df537474 | 1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
1db82381 | 4 | from ..utils import ( |
bea7af69 | 5 | clean_html, |
b0f7f21c RA |
6 | int_or_none, |
7 | parse_iso8601, | |
a5d783f5 | 8 | qualities, |
bea7af69 | 9 | unescapeHTML, |
1db82381 | 10 | ) |
df537474 | 11 | |
5f6a1245 | 12 | |
df537474 | 13 | class Channel9IE(InfoExtractor): |
adc267ee | 14 | IE_DESC = 'Channel 9' |
15 | IE_NAME = 'channel9' | |
b0f7f21c | 16 | _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' |
bfd973ec | 17 | _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b'] |
762d44c9 S |
18 | |
19 | _TESTS = [{ | |
20 | 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', | |
b0f7f21c | 21 | 'md5': '32083d4eaf1946db6d454313f44510ca', |
762d44c9 | 22 | 'info_dict': { |
b0f7f21c RA |
23 | 'id': '6c413323-383a-49dc-88f9-a22800cab024', |
24 | 'ext': 'wmv', | |
762d44c9 | 25 | 'title': 'Developer Kick-Off Session: Stuff We Love', |
b0f7f21c | 26 | 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731', |
762d44c9 | 27 | 'duration': 4576, |
b0f7f21c RA |
28 | 'thumbnail': r're:https?://.*\.jpg', |
29 | 'timestamp': 1377717420, | |
30 | 'upload_date': '20130828', | |
762d44c9 | 31 | 'session_code': 'KOS002', |
762d44c9 | 32 | 'session_room': 'Arena 1A', |
a15adbe4 | 33 | 'session_speakers': 'count:5', |
df537474 | 34 | }, |
762d44c9 S |
35 | }, { |
36 | 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', | |
b0f7f21c | 37 | 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc', |
762d44c9 | 38 | 'info_dict': { |
b0f7f21c RA |
39 | 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024', |
40 | 'ext': 'wmv', | |
762d44c9 | 41 | 'title': 'Self-service BI with Power BI - nuclear testing', |
b0f7f21c | 42 | 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54', |
762d44c9 | 43 | 'duration': 1540, |
b0f7f21c RA |
44 | 'thumbnail': r're:https?://.*\.jpg', |
45 | 'timestamp': 1386381991, | |
46 | 'upload_date': '20131207', | |
762d44c9 | 47 | 'authors': ['Mike Wilmot'], |
a13d06de | 48 | }, |
762d44c9 S |
49 | }, { |
50 | # low quality mp4 is best | |
51 | 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', | |
52 | 'info_dict': { | |
b0f7f21c | 53 | 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76', |
762d44c9 S |
54 | 'ext': 'mp4', |
55 | 'title': 'Ranges for the Standard Library', | |
b0f7f21c | 56 | 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372', |
762d44c9 | 57 | 'duration': 5646, |
b0f7f21c RA |
58 | 'thumbnail': r're:https?://.*\.jpg', |
59 | 'upload_date': '20150930', | |
60 | 'timestamp': 1443640735, | |
762d44c9 S |
61 | }, |
62 | 'params': { | |
63 | 'skip_download': True, | |
64 | }, | |
65 | }, { | |
a15adbe4 | 66 | 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', |
762d44c9 | 67 | 'info_dict': { |
a15adbe4 RA |
68 | 'id': 'Events/DEVintersection/DEVintersection-2016', |
69 | 'title': 'DEVintersection 2016 Orlando Sessions', | |
762d44c9 | 70 | }, |
a15adbe4 | 71 | 'playlist_mincount': 14, |
762d44c9 | 72 | }, { |
a15adbe4 | 73 | 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', |
762d44c9 S |
74 | 'only_matching': True, |
75 | }, { | |
76 | 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', | |
77 | 'only_matching': True, | |
78 | }] | |
df537474 | 79 | |
80 | _RSS_URL = 'http://channel9.msdn.com/%s/RSS' | |
df537474 | 81 | |
762d44c9 S |
82 | def _extract_list(self, video_id, rss_url=None): |
83 | if not rss_url: | |
84 | rss_url = self._RSS_URL % video_id | |
85 | rss = self._download_xml(rss_url, video_id, 'Downloading RSS') | |
4d2ebb6b | 86 | entries = [self.url_result(session_url.text, 'Channel9') |
87 | for session_url in rss.findall('./channel/item/link')] | |
88 | title_text = rss.find('./channel/title').text | |
762d44c9 | 89 | return self.playlist_result(entries, video_id, title_text) |
df537474 | 90 | |
91 | def _real_extract(self, url): | |
5ad28e7f | 92 | content_path, rss = self._match_valid_url(url).groups() |
762d44c9 S |
93 | |
94 | if rss: | |
95 | return self._extract_list(content_path, url) | |
df537474 | 96 | |
762d44c9 S |
97 | webpage = self._download_webpage( |
98 | url, content_path, 'Downloading web page') | |
df537474 | 99 | |
b0f7f21c RA |
100 | episode_data = self._search_regex( |
101 | r"data-episode='([^']+)'", webpage, 'episode data', default=None) | |
102 | if episode_data: | |
103 | episode_data = self._parse_json(unescapeHTML( | |
104 | episode_data), content_path) | |
105 | content_id = episode_data['contentId'] | |
106 | is_session = '/Sessions(' in episode_data['api'] | |
a15adbe4 | 107 | content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,' |
b0f7f21c | 108 | if is_session: |
a15adbe4 | 109 | content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers' |
b0f7f21c | 110 | else: |
a15adbe4 | 111 | content_url += 'Authors,Body&$expand=Authors' |
b0f7f21c RA |
112 | content_data = self._download_json(content_url, content_id) |
113 | title = content_data['Title'] | |
114 | ||
a5d783f5 S |
115 | QUALITIES = ( |
116 | 'mp3', | |
117 | 'wmv', 'mp4', | |
118 | 'wmv-low', 'mp4-low', | |
119 | 'wmv-mid', 'mp4-mid', | |
120 | 'wmv-high', 'mp4-high', | |
121 | ) | |
122 | ||
123 | quality_key = qualities(QUALITIES) | |
124 | ||
125 | def quality(quality_id, format_url): | |
126 | return (len(QUALITIES) if '_Source.' in format_url | |
127 | else quality_key(quality_id)) | |
128 | ||
b0f7f21c | 129 | formats = [] |
a5d783f5 S |
130 | urls = set() |
131 | ||
132 | SITE_QUALITIES = { | |
133 | 'MP3': 'mp3', | |
134 | 'MP4': 'mp4', | |
135 | 'Low Quality WMV': 'wmv-low', | |
136 | 'Low Quality MP4': 'mp4-low', | |
137 | 'Mid Quality WMV': 'wmv-mid', | |
138 | 'Mid Quality MP4': 'mp4-mid', | |
139 | 'High Quality WMV': 'wmv-high', | |
140 | 'High Quality MP4': 'mp4-high', | |
141 | } | |
142 | ||
143 | formats_select = self._search_regex( | |
144 | r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage, | |
145 | 'formats select', default=None) | |
146 | if formats_select: | |
147 | for mobj in re.finditer( | |
148 | r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<', | |
149 | formats_select): | |
150 | format_url = mobj.group('url') | |
151 | if format_url in urls: | |
152 | continue | |
153 | urls.add(format_url) | |
154 | format_id = mobj.group('format') | |
155 | quality_id = SITE_QUALITIES.get(format_id, format_id) | |
156 | formats.append({ | |
157 | 'url': format_url, | |
158 | 'format_id': quality_id, | |
159 | 'quality': quality(quality_id, format_url), | |
160 | 'vcodec': 'none' if quality_id == 'mp3' else None, | |
161 | }) | |
162 | ||
163 | API_QUALITIES = { | |
164 | 'VideoMP4Low': 'mp4-low', | |
165 | 'VideoWMV': 'wmv-mid', | |
166 | 'VideoMP4Medium': 'mp4-mid', | |
167 | 'VideoMP4High': 'mp4-high', | |
168 | 'VideoWMVHQ': 'wmv-hq', | |
169 | } | |
170 | ||
171 | for format_id, q in API_QUALITIES.items(): | |
172 | q_url = content_data.get(format_id) | |
173 | if not q_url or q_url in urls: | |
b0f7f21c | 174 | continue |
a5d783f5 | 175 | urls.add(q_url) |
b0f7f21c | 176 | formats.append({ |
b0f7f21c | 177 | 'url': q_url, |
a5d783f5 S |
178 | 'format_id': q, |
179 | 'quality': quality(q, q_url), | |
b0f7f21c | 180 | }) |
a5d783f5 | 181 | |
b0f7f21c RA |
182 | slides = content_data.get('Slides') |
183 | zip_file = content_data.get('ZipFile') | |
184 | ||
185 | if not formats and not slides and not zip_file: | |
b7da73eb | 186 | self.raise_no_formats( |
b0f7f21c | 187 | 'None of recording, slides or zip are available for %s' % content_path) |
b7da73eb | 188 | self._sort_formats(formats) |
b0f7f21c RA |
189 | |
190 | subtitles = {} | |
191 | for caption in content_data.get('Captions', []): | |
192 | caption_url = caption.get('Url') | |
193 | if not caption_url: | |
194 | continue | |
195 | subtitles.setdefault(caption.get('Language', 'en'), []).append({ | |
196 | 'url': caption_url, | |
197 | 'ext': 'vtt', | |
198 | }) | |
199 | ||
200 | common = { | |
201 | 'id': content_id, | |
202 | 'title': title, | |
203 | 'description': clean_html(content_data.get('Description') or content_data.get('Body')), | |
a15adbe4 | 204 | 'thumbnail': content_data.get('VideoPlayerPreviewImage'), |
b0f7f21c RA |
205 | 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), |
206 | 'timestamp': parse_iso8601(content_data.get('PublishedDate')), | |
207 | 'avg_rating': int_or_none(content_data.get('Rating')), | |
208 | 'rating_count': int_or_none(content_data.get('RatingCount')), | |
209 | 'view_count': int_or_none(content_data.get('Views')), | |
210 | 'comment_count': int_or_none(content_data.get('CommentCount')), | |
211 | 'subtitles': subtitles, | |
212 | } | |
213 | if is_session: | |
214 | speakers = [] | |
215 | for s in content_data.get('Speakers', []): | |
216 | speaker_name = s.get('FullName') | |
217 | if not speaker_name: | |
218 | continue | |
219 | speakers.append(speaker_name) | |
220 | ||
221 | common.update({ | |
222 | 'session_code': content_data.get('Code'), | |
223 | 'session_room': content_data.get('Room'), | |
224 | 'session_speakers': speakers, | |
225 | }) | |
a316a83d | 226 | else: |
b0f7f21c RA |
227 | authors = [] |
228 | for a in content_data.get('Authors', []): | |
229 | author_name = a.get('DisplayName') | |
230 | if not author_name: | |
231 | continue | |
232 | authors.append(author_name) | |
233 | common['authors'] = authors | |
234 | ||
235 | contents = [] | |
236 | ||
237 | if slides: | |
238 | d = common.copy() | |
239 | d.update({'title': title + '-Slides', 'url': slides}) | |
240 | contents.append(d) | |
241 | ||
242 | if zip_file: | |
243 | d = common.copy() | |
244 | d.update({'title': title + '-Zip', 'url': zip_file}) | |
245 | contents.append(d) | |
246 | ||
247 | if formats: | |
248 | d = common.copy() | |
249 | d.update({'title': title, 'formats': formats}) | |
250 | contents.append(d) | |
251 | return self.playlist_result(contents) | |
252 | else: | |
df537474 | 253 | return self._extract_list(content_path) |