]>
Commit | Line | Data |
---|---|---|
cd7ee7aa JMF |
1 | from __future__ import unicode_literals |
2 | ||
0bc56fa6 | 3 | import re |
0bc56fa6 JMF |
4 | |
5 | from .common import InfoExtractor | |
574b2a73 | 6 | from .theplatform import ThePlatformIE |
1cc79574 | 7 | from ..utils import ( |
37e64add | 8 | find_xpath_attr, |
0fe2ff78 | 9 | lowercase_escape, |
b46b65ed | 10 | smuggle_url, |
0fe2ff78 | 11 | unescapeHTML, |
6e416b21 | 12 | update_url_query, |
37e64add | 13 | ) |
0bc56fa6 JMF |
14 | |
15 | ||
020cf5eb | 16 | class NBCIE(InfoExtractor): |
92519402 | 17 | _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' |
58c1f6f0 S |
18 | |
19 | _TESTS = [ | |
20 | { | |
5c8a3f86 | 21 | 'url': 'http://www.nbc.com/the-tonight-show/segments/112966', |
58c1f6f0 | 22 | 'info_dict': { |
e881c4bc YCH |
23 | 'id': '112966', |
24 | 'ext': 'mp4', | |
5c8a3f86 JMF |
25 | 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', |
26 | 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', | |
79ba9140 | 27 | 'timestamp': 1424246400, |
28 | 'upload_date': '20150218', | |
29 | 'uploader': 'NBCU-COM', | |
58c1f6f0 | 30 | }, |
e881c4bc YCH |
31 | 'params': { |
32 | # m3u8 download | |
33 | 'skip_download': True, | |
34 | }, | |
020cf5eb | 35 | }, |
58c1f6f0 S |
36 | { |
37 | 'url': 'http://www.nbc.com/the-tonight-show/episodes/176', | |
38 | 'info_dict': { | |
e881c4bc | 39 | 'id': '176', |
58c1f6f0 S |
40 | 'ext': 'flv', |
41 | 'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen', | |
42 | 'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.', | |
43 | }, | |
e881c4bc | 44 | 'skip': '404 Not Found', |
58c1f6f0 | 45 | }, |
b9b3ab45 YCH |
46 | { |
47 | 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821', | |
48 | 'info_dict': { | |
e881c4bc YCH |
49 | 'id': '2832821', |
50 | 'ext': 'mp4', | |
b9b3ab45 YCH |
51 | 'title': 'Star Wars Teaser', |
52 | 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442', | |
79ba9140 | 53 | 'timestamp': 1417852800, |
54 | 'upload_date': '20141206', | |
55 | 'uploader': 'NBCU-COM', | |
b9b3ab45 | 56 | }, |
e881c4bc YCH |
57 | 'params': { |
58 | # m3u8 download | |
59 | 'skip_download': True, | |
60 | }, | |
b9b3ab45 | 61 | 'skip': 'Only works from US', |
0fe2ff78 YCH |
62 | }, |
63 | { | |
64 | # This video has expired but with an escaped embedURL | |
65 | 'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', | |
3e18700d | 66 | 'only_matching': True, |
e6e90515 YCH |
67 | }, |
68 | { | |
69 | # HLS streams requires the 'hdnea3' cookie | |
70 | 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', | |
71 | 'info_dict': { | |
72 | 'id': 'n1806', | |
73 | 'ext': 'mp4', | |
74 | 'title': 'Goliath', | |
75 | 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', | |
76 | 'timestamp': 1237100400, | |
77 | 'upload_date': '20090315', | |
78 | 'uploader': 'NBCU-COM', | |
79 | }, | |
80 | 'params': { | |
81 | 'skip_download': True, | |
82 | }, | |
83 | 'skip': 'Only works from US', | |
b9b3ab45 | 84 | } |
58c1f6f0 | 85 | ] |
020cf5eb JMF |
86 | |
87 | def _real_extract(self, url): | |
10e3d734 | 88 | video_id = self._match_id(url) |
020cf5eb | 89 | webpage = self._download_webpage(url, video_id) |
0fe2ff78 | 90 | theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( |
b9b3ab45 YCH |
91 | [ |
92 | r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', | |
b09c1223 | 93 | r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"', |
b9b3ab45 YCH |
94 | r'"embedURL"\s*:\s*"([^"]+)"' |
95 | ], | |
0fe2ff78 | 96 | webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) |
020cf5eb JMF |
97 | if theplatform_url.startswith('//'): |
98 | theplatform_url = 'http:' + theplatform_url | |
e881c4bc YCH |
99 | return { |
100 | '_type': 'url_transparent', | |
79ba9140 | 101 | 'ie_key': 'ThePlatform', |
e881c4bc YCH |
102 | 'url': smuggle_url(theplatform_url, {'source_url': url}), |
103 | 'id': video_id, | |
104 | } | |
020cf5eb JMF |
105 | |
106 | ||
a2a4d5fa | 107 | class NBCSportsVPlayerIE(InfoExtractor): |
a2edf2e7 | 108 | _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' |
a28ccbab | 109 | |
5cbb2699 | 110 | _TESTS = [{ |
a28ccbab | 111 | 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', |
a28ccbab YCH |
112 | 'info_dict': { |
113 | 'id': '9CsDKds0kvHI', | |
114 | 'ext': 'flv', | |
115 | 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', | |
116 | 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', | |
79ba9140 | 117 | 'timestamp': 1426270238, |
118 | 'upload_date': '20150313', | |
119 | 'uploader': 'NBCU-SPORTS', | |
a28ccbab | 120 | } |
5cbb2699 | 121 | }, { |
5cbb2699 YCH |
122 | 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', |
123 | 'only_matching': True, | |
124 | }] | |
a28ccbab | 125 | |
a2a4d5fa YCH |
126 | @staticmethod |
127 | def _extract_url(webpage): | |
128 | iframe_m = re.search( | |
129 | r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) | |
130 | if iframe_m: | |
131 | return iframe_m.group('url') | |
132 | ||
a28ccbab YCH |
133 | def _real_extract(self, url): |
134 | video_id = self._match_id(url) | |
135 | webpage = self._download_webpage(url, video_id) | |
136 | theplatform_url = self._og_search_video_url(webpage) | |
137 | return self.url_result(theplatform_url, 'ThePlatform') | |
138 | ||
139 | ||
a2a4d5fa | 140 | class NBCSportsIE(InfoExtractor): |
dfb1b146 | 141 | # Does not include https because its certificate is invalid |
92519402 | 142 | _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' |
a2a4d5fa YCH |
143 | |
144 | _TEST = { | |
145 | 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', | |
a2a4d5fa YCH |
146 | 'info_dict': { |
147 | 'id': 'PHJSaFWbrTY9', | |
148 | 'ext': 'flv', | |
149 | 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', | |
150 | 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', | |
0738187f YCH |
151 | 'uploader': 'NBCU-SPORTS', |
152 | 'upload_date': '20150330', | |
153 | 'timestamp': 1427726529, | |
a2a4d5fa YCH |
154 | } |
155 | } | |
156 | ||
157 | def _real_extract(self, url): | |
158 | video_id = self._match_id(url) | |
159 | webpage = self._download_webpage(url, video_id) | |
160 | return self.url_result( | |
161 | NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') | |
162 | ||
163 | ||
9cf01f7f | 164 | class CSNNEIE(InfoExtractor): |
92519402 | 165 | _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)' |
9cf01f7f | 166 | |
167 | _TEST = { | |
168 | 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter', | |
169 | 'info_dict': { | |
170 | 'id': 'yvBLLUgQ8WU0', | |
171 | 'ext': 'mp4', | |
172 | 'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.', | |
173 | 'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3', | |
79ba9140 | 174 | 'timestamp': 1459369979, |
175 | 'upload_date': '20160330', | |
176 | 'uploader': 'NBCU-SPORTS', | |
9cf01f7f | 177 | } |
178 | } | |
179 | ||
180 | def _real_extract(self, url): | |
181 | display_id = self._match_id(url) | |
182 | webpage = self._download_webpage(url, display_id) | |
183 | return { | |
184 | '_type': 'url_transparent', | |
185 | 'ie_key': 'ThePlatform', | |
186 | 'url': self._html_search_meta('twitter:player:stream', webpage), | |
187 | 'display_id': display_id, | |
188 | } | |
189 | ||
190 | ||
574b2a73 | 191 | class NBCNewsIE(ThePlatformIE): |
0437307a | 192 | _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/ |
a4f3d779 | 193 | (?:video/.+?/(?P<id>\d+)| |
0437307a | 194 | ([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+)) |
87fe568c | 195 | ''' |
0bc56fa6 | 196 | |
87fe568c JMF |
197 | _TESTS = [ |
198 | { | |
199 | 'url': 'http://www.nbcnews.com/video/nbc-news/52753292', | |
200 | 'md5': '47abaac93c6eaf9ad37ee6c4463a5179', | |
201 | 'info_dict': { | |
202 | 'id': '52753292', | |
203 | 'ext': 'flv', | |
204 | 'title': 'Crew emerges after four-month Mars food study', | |
205 | 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | |
206 | }, | |
0bc56fa6 | 207 | }, |
87fe568c | 208 | { |
574b2a73 | 209 | 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', |
210 | 'md5': 'af1adfa51312291a017720403826bb64', | |
87fe568c | 211 | 'info_dict': { |
6e416b21 | 212 | 'id': 'p_tweet_snow_140529', |
10e3d734 | 213 | 'ext': 'mp4', |
87fe568c JMF |
214 | 'title': 'How Twitter Reacted To The Snowden Interview', |
215 | 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', | |
0437307a RA |
216 | 'uploader': 'NBCU-NEWS', |
217 | 'timestamp': 1401363060, | |
218 | 'upload_date': '20140529', | |
87fe568c | 219 | }, |
87fe568c | 220 | }, |
2df54b4b S |
221 | { |
222 | 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156', | |
223 | 'md5': 'fdbf39ab73a72df5896b6234ff98518a', | |
224 | 'info_dict': { | |
0437307a | 225 | 'id': '529953347624', |
2df54b4b S |
226 | 'ext': 'mp4', |
227 | 'title': 'FULL EPISODE: Family Business', | |
228 | 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', | |
229 | }, | |
574b2a73 | 230 | 'skip': 'This page is unavailable.', |
2df54b4b | 231 | }, |
d9aa2b78 RS |
232 | { |
233 | 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', | |
574b2a73 | 234 | 'md5': '73135a2e0ef819107bbb55a5a9b2a802', |
d9aa2b78 | 235 | 'info_dict': { |
6e416b21 | 236 | 'id': 'nn_netcast_150204', |
d9aa2b78 RS |
237 | 'ext': 'mp4', |
238 | 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', | |
239 | 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', | |
0437307a RA |
240 | 'timestamp': 1423104900, |
241 | 'uploader': 'NBCU-NEWS', | |
242 | 'upload_date': '20150205', | |
d9aa2b78 RS |
243 | }, |
244 | }, | |
574b2a73 | 245 | { |
246 | 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', | |
247 | 'md5': 'a49e173825e5fcd15c13fc297fced39d', | |
248 | 'info_dict': { | |
6e416b21 | 249 | 'id': 'x_lon_vwhorn_150922', |
574b2a73 | 250 | 'ext': 'mp4', |
0437307a RA |
251 | 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', |
252 | 'description': 'md5:c8be487b2d80ff0594c005add88d8351', | |
253 | 'upload_date': '20150922', | |
254 | 'timestamp': 1442917800, | |
255 | 'uploader': 'NBCU-NEWS', | |
574b2a73 | 256 | }, |
574b2a73 | 257 | }, |
cb7d4d0e | 258 | { |
259 | 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', | |
260 | 'md5': '118d7ca3f0bea6534f119c68ef539f71', | |
261 | 'info_dict': { | |
6e416b21 | 262 | 'id': 'tdy_al_space_160420', |
cb7d4d0e | 263 | 'ext': 'mp4', |
264 | 'title': 'See the aurora borealis from space in stunning new NASA video', | |
265 | 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', | |
266 | 'upload_date': '20160420', | |
267 | 'timestamp': 1461152093, | |
0437307a RA |
268 | 'uploader': 'NBCU-NEWS', |
269 | }, | |
270 | }, | |
271 | { | |
272 | 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', | |
273 | 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', | |
274 | 'info_dict': { | |
6e416b21 | 275 | 'id': 'n_hayes_Aimm_140801_272214', |
0437307a RA |
276 | 'ext': 'mp4', |
277 | 'title': 'The chaotic GOP immigration vote', | |
278 | 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', | |
ec85ded8 | 279 | 'thumbnail': r're:^https?://.*\.jpg$', |
0437307a RA |
280 | 'timestamp': 1406937606, |
281 | 'upload_date': '20140802', | |
282 | 'uploader': 'NBCU-NEWS', | |
cb7d4d0e | 283 | }, |
284 | }, | |
3f125c8c S |
285 | { |
286 | 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', | |
287 | 'only_matching': True, | |
288 | }, | |
5de008e8 YCH |
289 | { |
290 | # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html | |
291 | 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682', | |
292 | 'only_matching': True, | |
293 | }, | |
87fe568c | 294 | ] |
0bc56fa6 JMF |
295 | |
296 | def _real_extract(self, url): | |
297 | mobj = re.match(self._VALID_URL, url) | |
298 | video_id = mobj.group('id') | |
87fe568c JMF |
299 | if video_id is not None: |
300 | all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) | |
301 | info = all_info.find('video') | |
0bc56fa6 | 302 | |
87fe568c JMF |
303 | return { |
304 | 'id': video_id, | |
305 | 'title': info.find('headline').text, | |
306 | 'ext': 'flv', | |
307 | 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | |
d890b4cc | 308 | 'description': info.find('caption').text, |
87fe568c JMF |
309 | 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, |
310 | } | |
311 | else: | |
d9aa2b78 | 312 | # "feature" and "nightly-news" pages use theplatform.com |
0437307a | 313 | video_id = mobj.group('mpx_id') |
6e416b21 RA |
314 | webpage = self._download_webpage(url, video_id) |
315 | ||
316 | filter_param = 'byId' | |
317 | bootstrap_json = self._search_regex( | |
318 | [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', | |
319 | r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"', | |
320 | r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'], | |
321 | webpage, 'bootstrap json', default=None) | |
322 | if bootstrap_json: | |
0437307a RA |
323 | bootstrap = self._parse_json( |
324 | bootstrap_json, video_id, transform_source=unescapeHTML) | |
6e416b21 RA |
325 | |
326 | info = None | |
0437307a RA |
327 | if 'results' in bootstrap: |
328 | info = bootstrap['results'][0]['video'] | |
329 | elif 'video' in bootstrap: | |
330 | info = bootstrap['video'] | |
6e416b21 RA |
331 | elif 'msnbcVideoInfo' in bootstrap: |
332 | info = bootstrap['msnbcVideoInfo']['meta'] | |
333 | elif 'msnbcThePlatform' in bootstrap: | |
334 | info = bootstrap['msnbcThePlatform']['videoPlayer']['video'] | |
574b2a73 | 335 | else: |
0437307a | 336 | info = bootstrap |
6e416b21 RA |
337 | |
338 | if 'guid' in info: | |
339 | video_id = info['guid'] | |
340 | filter_param = 'byGuid' | |
341 | elif 'mpxId' in info: | |
342 | video_id = info['mpxId'] | |
87fe568c JMF |
343 | |
344 | return { | |
0437307a | 345 | '_type': 'url_transparent', |
574b2a73 | 346 | 'id': video_id, |
0437307a | 347 | # http://feed.theplatform.com/f/2E2eJC/nbcnews also works |
6e416b21 | 348 | 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}), |
0437307a | 349 | 'ie_key': 'ThePlatformFeed', |
87fe568c | 350 | } |
be457302 YCH |
351 | |
352 | ||
353 | class NBCOlympicsIE(InfoExtractor): | |
354 | _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)' | |
355 | ||
356 | _TEST = { | |
357 | # Geo-restricted to US | |
358 | 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold', | |
359 | 'md5': '54fecf846d05429fbaa18af557ee523a', | |
360 | 'info_dict': { | |
361 | 'id': 'WjTBzDXx5AUq', | |
362 | 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold', | |
363 | 'ext': 'mp4', | |
364 | 'title': 'Rose\'s son Leo was in tears after his dad won gold', | |
365 | 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.', | |
366 | 'timestamp': 1471274964, | |
367 | 'upload_date': '20160815', | |
368 | 'uploader': 'NBCU-SPORTS', | |
369 | }, | |
370 | } | |
371 | ||
372 | def _real_extract(self, url): | |
373 | display_id = self._match_id(url) | |
374 | ||
375 | webpage = self._download_webpage(url, display_id) | |
376 | ||
377 | drupal_settings = self._parse_json(self._search_regex( | |
378 | r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', | |
379 | webpage, 'drupal settings'), display_id) | |
380 | ||
381 | iframe_url = drupal_settings['vod']['iframe_url'] | |
382 | theplatform_url = iframe_url.replace( | |
383 | 'vplayer.nbcolympics.com', 'player.theplatform.com') | |
384 | ||
385 | return { | |
386 | '_type': 'url_transparent', | |
387 | 'url': theplatform_url, | |
388 | 'ie_key': ThePlatformIE.ie_key(), | |
389 | 'display_id': display_id, | |
390 | } |