3 from .common
import InfoExtractor
13 class Channel9IE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
17 _EMBED_REGEX
= [r
'<iframe[^>]+src=["\'](?P
<url
>https?
://channel9\
.msdn\
.com
/(?
:[^
/]+/)+)player
\b']
20 'url
': 'http
://channel9
.msdn
.com
/Events
/TechEd
/Australia
/2013/KOS002
',
21 'md5
': '32083d4eaf1946db6d454313f44510ca
',
23 'id': '6c413323
-383a
-49dc
-88f9
-a22800cab024
',
25 'title
': 'Developer Kick
-Off Session
: Stuff We Love
',
26 'description
': 'md5
:b80bf9355a503c193aff7ec6cd5a7731
',
28 'thumbnail
': r're
:https?
://.*\
.jpg
',
29 'timestamp
': 1377717420,
30 'upload_date
': '20130828',
31 'session_code
': 'KOS002
',
32 'session_room
': 'Arena
1A
',
33 'session_speakers
': 'count
:5',
36 'url
': 'http
://channel9
.msdn
.com
/posts
/Self
-service
-BI
-with-Power
-BI
-nuclear
-testing
',
37 'md5
': 'dcf983ee6acd2088e7188c3cf79b46bc
',
39 'id': 'fe8e435f
-bb93
-4e01
-8e97
-a28c01887024
',
41 'title
': 'Self
-service BI
with Power BI
- nuclear testing
',
42 'description
': 'md5
:2d17fec927fc91e9e17783b3ecc88f54
',
44 'thumbnail
': r're
:https?
://.*\
.jpg
',
45 'timestamp
': 1386381991,
46 'upload_date
': '20131207',
47 'authors
': ['Mike Wilmot
'],
50 # low quality mp4 is best
51 'url
': 'https
://channel9
.msdn
.com
/Events
/CPP
/CppCon
-2015/Ranges
-for-the
-Standard
-Library
',
53 'id': '33ad69d2
-6a4e
-4172
-83a1
-a523013dec76
',
55 'title
': 'Ranges
for the Standard Library
',
56 'description
': 'md5
:9895e0a9fd80822d2f01c454b8f4a372
',
58 'thumbnail
': r're
:https?
://.*\
.jpg
',
59 'upload_date
': '20150930',
60 'timestamp
': 1443640735,
63 'skip_download
': True,
66 'url
': 'https
://channel9
.msdn
.com
/Events
/DEVintersection
/DEVintersection
-2016/RSS
',
68 'id': 'Events
/DEVintersection
/DEVintersection
-2016',
69 'title
': 'DEVintersection
2016 Orlando Sessions
',
71 'playlist_mincount
': 14,
73 'url
': 'https
://channel9
.msdn
.com
/Niners
/Splendid22
/Queue
/76acff796e8f411184b008028e0d492b
/RSS
',
74 'only_matching
': True,
76 'url
': 'https
://channel9
.msdn
.com
/Events
/Speakers
/scott
-hanselman
/RSS?UrlSafeName
=scott
-hanselman
',
77 'only_matching
': True,
80 _RSS_URL = 'http
://channel9
.msdn
.com
/%s/RSS
'
82 def _extract_list(self, video_id, rss_url=None):
84 rss_url = self._RSS_URL % video_id
85 rss = self._download_xml(rss_url, video_id, 'Downloading RSS
')
86 entries = [self.url_result(session_url.text, 'Channel9
')
87 for session_url in rss.findall('./channel
/item
/link
')]
88 title_text = rss.find('./channel
/title
').text
89 return self.playlist_result(entries, video_id, title_text)
91 def _real_extract(self, url):
92 content_path, rss = self._match_valid_url(url).groups()
95 return self._extract_list(content_path, url)
97 webpage = self._download_webpage(
98 url, content_path, 'Downloading web page
')
100 episode_data = self._search_regex(
101 r"data-episode='([^
']+)'", webpage, 'episode data', default=None)
103 episode_data = self._parse_json(unescapeHTML(
104 episode_data), content_path)
105 content_id = episode_data['contentId']
106 is_session = '/Sessions(' in episode_data['api']
107 content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,'
109 content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers'
111 content_url += 'Authors,Body&$expand=Authors'
112 content_data = self._download_json(content_url, content_id)
113 title = content_data['Title']
118 'wmv-low', 'mp4-low',
119 'wmv-mid', 'mp4-mid',
120 'wmv-high', 'mp4-high',
123 quality_key = qualities(QUALITIES)
125 def quality(quality_id, format_url):
126 return (len(QUALITIES) if '_Source.' in format_url
127 else quality_key(quality_id))
135 'Low Quality WMV': 'wmv-low',
136 'Low Quality MP4': 'mp4-low',
137 'Mid Quality WMV': 'wmv-mid',
138 'Mid Quality MP4': 'mp4-mid',
139 'High Quality WMV': 'wmv-high',
140 'High Quality MP4': 'mp4-high',
143 formats_select = self._search_regex(
144 r'(?s)<select[^>]+name=["\']format
[^
>]+>(.+?
)</select
', webpage,
145 'formats select
', default=None)
147 for mobj in re.finditer(
148 r'<option
\b[^
>]+\bvalue
=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
150 format_url = mobj.group('url')
151 if format_url in urls:
154 format_id = mobj.group('format')
155 quality_id = SITE_QUALITIES.get(format_id, format_id)
158 'format_id': quality_id,
159 'quality': quality(quality_id, format_url),
160 'vcodec': 'none' if quality_id == 'mp3' else None,
164 'VideoMP4Low': 'mp4-low',
165 'VideoWMV': 'wmv-mid',
166 'VideoMP4Medium': 'mp4-mid',
167 'VideoMP4High': 'mp4-high',
168 'VideoWMVHQ': 'wmv-hq',
171 for format_id, q in API_QUALITIES.items():
172 q_url = content_data.get(format_id)
173 if not q_url or q_url in urls:
179 'quality': quality(q, q_url),
182 slides = content_data.get('Slides')
183 zip_file = content_data.get('ZipFile')
185 if not formats and not slides and not zip_file:
186 self.raise_no_formats(
187 'None of recording, slides or zip are available for %s' % content_path)
188 self._sort_formats(formats)
191 for caption in content_data.get('Captions', []):
192 caption_url = caption.get('Url')
195 subtitles.setdefault(caption.get('Language', 'en'), []).append({
203 'description': clean_html(content_data.get('Description') or content_data.get('Body')),
204 'thumbnail': content_data.get('VideoPlayerPreviewImage'),
205 'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
206 'timestamp': parse_iso8601(content_data.get('PublishedDate')),
207 'avg_rating': int_or_none(content_data.get('Rating')),
208 'rating_count': int_or_none(content_data.get('RatingCount')),
209 'view_count': int_or_none(content_data.get('Views')),
210 'comment_count': int_or_none(content_data.get('CommentCount')),
211 'subtitles': subtitles,
215 for s in content_data.get('Speakers', []):
216 speaker_name = s.get('FullName')
219 speakers.append(speaker_name)
222 'session_code': content_data.get('Code'),
223 'session_room': content_data.get('Room'),
224 'session_speakers': speakers,
228 for a in content_data.get('Authors', []):
229 author_name = a.get('DisplayName')
232 authors.append(author_name)
233 common['authors'] = authors
239 d.update({'title': title + '-Slides', 'url': slides})
244 d.update({'title': title + '-Zip', 'url': zip_file})
249 d.update({'title': title, 'formats': formats})
251 return self.playlist_result(contents)
253 return self._extract_list(content_path)