]>
Commit | Line | Data |
---|---|---|
cd7ee7aa JMF |
1 | from __future__ import unicode_literals |
2 | ||
0bc56fa6 | 3 | import re |
0bc56fa6 JMF |
4 | |
5 | from .common import InfoExtractor | |
574b2a73 | 6 | from .theplatform import ThePlatformIE |
fdf9b959 | 7 | from .adobepass import AdobePassIE |
1cc79574 | 8 | from ..utils import ( |
37e64add | 9 | find_xpath_attr, |
b46b65ed | 10 | smuggle_url, |
0fe2ff78 | 11 | unescapeHTML, |
6e416b21 | 12 | update_url_query, |
fdf9b959 | 13 | int_or_none, |
37e64add | 14 | ) |
0bc56fa6 JMF |
15 | |
16 | ||
fdf9b959 | 17 | class NBCIE(AdobePassIE): |
52294cdd | 18 | _VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))' |
58c1f6f0 S |
19 | |
20 | _TESTS = [ | |
21 | { | |
fdf9b959 | 22 | 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237', |
58c1f6f0 | 23 | 'info_dict': { |
fdf9b959 | 24 | 'id': '2848237', |
e881c4bc | 25 | 'ext': 'mp4', |
5c8a3f86 JMF |
26 | 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', |
27 | 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', | |
79ba9140 | 28 | 'timestamp': 1424246400, |
29 | 'upload_date': '20150218', | |
30 | 'uploader': 'NBCU-COM', | |
58c1f6f0 | 31 | }, |
e881c4bc YCH |
32 | 'params': { |
33 | # m3u8 download | |
34 | 'skip_download': True, | |
35 | }, | |
020cf5eb | 36 | }, |
b9b3ab45 YCH |
37 | { |
38 | 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821', | |
39 | 'info_dict': { | |
e881c4bc YCH |
40 | 'id': '2832821', |
41 | 'ext': 'mp4', | |
b9b3ab45 YCH |
42 | 'title': 'Star Wars Teaser', |
43 | 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442', | |
79ba9140 | 44 | 'timestamp': 1417852800, |
45 | 'upload_date': '20141206', | |
46 | 'uploader': 'NBCU-COM', | |
b9b3ab45 | 47 | }, |
e881c4bc YCH |
48 | 'params': { |
49 | # m3u8 download | |
50 | 'skip_download': True, | |
51 | }, | |
b9b3ab45 | 52 | 'skip': 'Only works from US', |
0fe2ff78 | 53 | }, |
e6e90515 YCH |
54 | { |
55 | # HLS streams requires the 'hdnea3' cookie | |
56 | 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', | |
57 | 'info_dict': { | |
fdf9b959 | 58 | 'id': '101528f5a9e8127b107e98c5e6ce4638', |
e6e90515 YCH |
59 | 'ext': 'mp4', |
60 | 'title': 'Goliath', | |
61 | 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', | |
62 | 'timestamp': 1237100400, | |
63 | 'upload_date': '20090315', | |
64 | 'uploader': 'NBCU-COM', | |
65 | }, | |
66 | 'params': { | |
67 | 'skip_download': True, | |
68 | }, | |
69 | 'skip': 'Only works from US', | |
b9b3ab45 | 70 | } |
58c1f6f0 | 71 | ] |
020cf5eb JMF |
72 | |
73 | def _real_extract(self, url): | |
52294cdd | 74 | permalink, video_id = re.match(self._VALID_URL, url).groups() |
2eeb588e RA |
75 | video_data = self._download_json( |
76 | 'https://api.nbc.com/v3/videos', video_id, query={ | |
52294cdd | 77 | 'filter[permalink]': permalink, |
2eeb588e RA |
78 | })['data'][0]['attributes'] |
79 | query = { | |
80 | 'mbr': 'true', | |
81 | 'manifest': 'm3u', | |
82 | } | |
83 | video_id = video_data['guid'] | |
84 | title = video_data['title'] | |
85 | if video_data.get('entitlement') == 'auth': | |
86 | resource = self._get_mvpd_resource( | |
87 | 'nbcentertainment', title, video_id, | |
88 | video_data.get('vChipRating')) | |
89 | query['auth'] = self._extract_mvpd_auth( | |
90 | url, video_id, 'nbcentertainment', resource) | |
91 | theplatform_url = smuggle_url(update_url_query( | |
92 | 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, | |
93 | query), {'force_smil_url': True}) | |
94 | return { | |
e881c4bc | 95 | '_type': 'url_transparent', |
e881c4bc | 96 | 'id': video_id, |
2eeb588e RA |
97 | 'title': title, |
98 | 'url': theplatform_url, | |
99 | 'description': video_data.get('description'), | |
100 | 'keywords': video_data.get('keywords'), | |
101 | 'season_number': int_or_none(video_data.get('seasonNumber')), | |
102 | 'episode_number': int_or_none(video_data.get('episodeNumber')), | |
103 | 'series': video_data.get('showName'), | |
104 | 'ie_key': 'ThePlatform', | |
e881c4bc | 105 | } |
020cf5eb JMF |
106 | |
107 | ||
a2a4d5fa | 108 | class NBCSportsVPlayerIE(InfoExtractor): |
a2edf2e7 | 109 | _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' |
a28ccbab | 110 | |
5cbb2699 | 111 | _TESTS = [{ |
a28ccbab | 112 | 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', |
a28ccbab YCH |
113 | 'info_dict': { |
114 | 'id': '9CsDKds0kvHI', | |
115 | 'ext': 'flv', | |
116 | 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', | |
117 | 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', | |
79ba9140 | 118 | 'timestamp': 1426270238, |
119 | 'upload_date': '20150313', | |
120 | 'uploader': 'NBCU-SPORTS', | |
a28ccbab | 121 | } |
5cbb2699 | 122 | }, { |
5cbb2699 YCH |
123 | 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', |
124 | 'only_matching': True, | |
125 | }] | |
a28ccbab | 126 | |
a2a4d5fa YCH |
127 | @staticmethod |
128 | def _extract_url(webpage): | |
129 | iframe_m = re.search( | |
130 | r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) | |
131 | if iframe_m: | |
132 | return iframe_m.group('url') | |
133 | ||
a28ccbab YCH |
134 | def _real_extract(self, url): |
135 | video_id = self._match_id(url) | |
136 | webpage = self._download_webpage(url, video_id) | |
137 | theplatform_url = self._og_search_video_url(webpage) | |
138 | return self.url_result(theplatform_url, 'ThePlatform') | |
139 | ||
140 | ||
a2a4d5fa | 141 | class NBCSportsIE(InfoExtractor): |
dfb1b146 | 142 | # Does not include https because its certificate is invalid |
92519402 | 143 | _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' |
a2a4d5fa YCH |
144 | |
145 | _TEST = { | |
146 | 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', | |
a2a4d5fa YCH |
147 | 'info_dict': { |
148 | 'id': 'PHJSaFWbrTY9', | |
149 | 'ext': 'flv', | |
150 | 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', | |
151 | 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', | |
0738187f YCH |
152 | 'uploader': 'NBCU-SPORTS', |
153 | 'upload_date': '20150330', | |
154 | 'timestamp': 1427726529, | |
a2a4d5fa YCH |
155 | } |
156 | } | |
157 | ||
158 | def _real_extract(self, url): | |
159 | video_id = self._match_id(url) | |
160 | webpage = self._download_webpage(url, video_id) | |
161 | return self.url_result( | |
162 | NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') | |
163 | ||
164 | ||
9cf01f7f | 165 | class CSNNEIE(InfoExtractor): |
92519402 | 166 | _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)' |
9cf01f7f | 167 | |
168 | _TEST = { | |
169 | 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter', | |
170 | 'info_dict': { | |
171 | 'id': 'yvBLLUgQ8WU0', | |
172 | 'ext': 'mp4', | |
173 | 'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.', | |
174 | 'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3', | |
79ba9140 | 175 | 'timestamp': 1459369979, |
176 | 'upload_date': '20160330', | |
177 | 'uploader': 'NBCU-SPORTS', | |
9cf01f7f | 178 | } |
179 | } | |
180 | ||
181 | def _real_extract(self, url): | |
182 | display_id = self._match_id(url) | |
183 | webpage = self._download_webpage(url, display_id) | |
184 | return { | |
185 | '_type': 'url_transparent', | |
186 | 'ie_key': 'ThePlatform', | |
187 | 'url': self._html_search_meta('twitter:player:stream', webpage), | |
188 | 'display_id': display_id, | |
189 | } | |
190 | ||
191 | ||
574b2a73 | 192 | class NBCNewsIE(ThePlatformIE): |
0437307a | 193 | _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/ |
a4f3d779 | 194 | (?:video/.+?/(?P<id>\d+)| |
0437307a | 195 | ([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+)) |
87fe568c | 196 | ''' |
0bc56fa6 | 197 | |
87fe568c JMF |
198 | _TESTS = [ |
199 | { | |
200 | 'url': 'http://www.nbcnews.com/video/nbc-news/52753292', | |
201 | 'md5': '47abaac93c6eaf9ad37ee6c4463a5179', | |
202 | 'info_dict': { | |
203 | 'id': '52753292', | |
204 | 'ext': 'flv', | |
205 | 'title': 'Crew emerges after four-month Mars food study', | |
206 | 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | |
207 | }, | |
0bc56fa6 | 208 | }, |
87fe568c | 209 | { |
574b2a73 | 210 | 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', |
211 | 'md5': 'af1adfa51312291a017720403826bb64', | |
87fe568c | 212 | 'info_dict': { |
6e416b21 | 213 | 'id': 'p_tweet_snow_140529', |
10e3d734 | 214 | 'ext': 'mp4', |
87fe568c JMF |
215 | 'title': 'How Twitter Reacted To The Snowden Interview', |
216 | 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', | |
0437307a RA |
217 | 'uploader': 'NBCU-NEWS', |
218 | 'timestamp': 1401363060, | |
219 | 'upload_date': '20140529', | |
87fe568c | 220 | }, |
87fe568c | 221 | }, |
2df54b4b S |
222 | { |
223 | 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156', | |
224 | 'md5': 'fdbf39ab73a72df5896b6234ff98518a', | |
225 | 'info_dict': { | |
0437307a | 226 | 'id': '529953347624', |
2df54b4b S |
227 | 'ext': 'mp4', |
228 | 'title': 'FULL EPISODE: Family Business', | |
229 | 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', | |
230 | }, | |
574b2a73 | 231 | 'skip': 'This page is unavailable.', |
2df54b4b | 232 | }, |
d9aa2b78 RS |
233 | { |
234 | 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', | |
574b2a73 | 235 | 'md5': '73135a2e0ef819107bbb55a5a9b2a802', |
d9aa2b78 | 236 | 'info_dict': { |
6e416b21 | 237 | 'id': 'nn_netcast_150204', |
d9aa2b78 RS |
238 | 'ext': 'mp4', |
239 | 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', | |
240 | 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', | |
0437307a RA |
241 | 'timestamp': 1423104900, |
242 | 'uploader': 'NBCU-NEWS', | |
243 | 'upload_date': '20150205', | |
d9aa2b78 RS |
244 | }, |
245 | }, | |
574b2a73 | 246 | { |
247 | 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', | |
248 | 'md5': 'a49e173825e5fcd15c13fc297fced39d', | |
249 | 'info_dict': { | |
6e416b21 | 250 | 'id': 'x_lon_vwhorn_150922', |
574b2a73 | 251 | 'ext': 'mp4', |
0437307a RA |
252 | 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', |
253 | 'description': 'md5:c8be487b2d80ff0594c005add88d8351', | |
254 | 'upload_date': '20150922', | |
255 | 'timestamp': 1442917800, | |
256 | 'uploader': 'NBCU-NEWS', | |
574b2a73 | 257 | }, |
574b2a73 | 258 | }, |
cb7d4d0e | 259 | { |
260 | 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', | |
261 | 'md5': '118d7ca3f0bea6534f119c68ef539f71', | |
262 | 'info_dict': { | |
6e416b21 | 263 | 'id': 'tdy_al_space_160420', |
cb7d4d0e | 264 | 'ext': 'mp4', |
265 | 'title': 'See the aurora borealis from space in stunning new NASA video', | |
266 | 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', | |
267 | 'upload_date': '20160420', | |
268 | 'timestamp': 1461152093, | |
0437307a RA |
269 | 'uploader': 'NBCU-NEWS', |
270 | }, | |
271 | }, | |
272 | { | |
273 | 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', | |
274 | 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', | |
275 | 'info_dict': { | |
6e416b21 | 276 | 'id': 'n_hayes_Aimm_140801_272214', |
0437307a RA |
277 | 'ext': 'mp4', |
278 | 'title': 'The chaotic GOP immigration vote', | |
279 | 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', | |
ec85ded8 | 280 | 'thumbnail': r're:^https?://.*\.jpg$', |
0437307a RA |
281 | 'timestamp': 1406937606, |
282 | 'upload_date': '20140802', | |
283 | 'uploader': 'NBCU-NEWS', | |
cb7d4d0e | 284 | }, |
285 | }, | |
3f125c8c S |
286 | { |
287 | 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', | |
288 | 'only_matching': True, | |
289 | }, | |
5de008e8 YCH |
290 | { |
291 | # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html | |
292 | 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682', | |
293 | 'only_matching': True, | |
294 | }, | |
87fe568c | 295 | ] |
0bc56fa6 JMF |
296 | |
297 | def _real_extract(self, url): | |
298 | mobj = re.match(self._VALID_URL, url) | |
299 | video_id = mobj.group('id') | |
87fe568c JMF |
300 | if video_id is not None: |
301 | all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) | |
302 | info = all_info.find('video') | |
0bc56fa6 | 303 | |
87fe568c JMF |
304 | return { |
305 | 'id': video_id, | |
306 | 'title': info.find('headline').text, | |
307 | 'ext': 'flv', | |
308 | 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | |
d890b4cc | 309 | 'description': info.find('caption').text, |
87fe568c JMF |
310 | 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, |
311 | } | |
312 | else: | |
d9aa2b78 | 313 | # "feature" and "nightly-news" pages use theplatform.com |
0437307a | 314 | video_id = mobj.group('mpx_id') |
6e416b21 RA |
315 | webpage = self._download_webpage(url, video_id) |
316 | ||
317 | filter_param = 'byId' | |
318 | bootstrap_json = self._search_regex( | |
319 | [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', | |
320 | r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"', | |
321 | r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'], | |
322 | webpage, 'bootstrap json', default=None) | |
323 | if bootstrap_json: | |
0437307a RA |
324 | bootstrap = self._parse_json( |
325 | bootstrap_json, video_id, transform_source=unescapeHTML) | |
6e416b21 RA |
326 | |
327 | info = None | |
0437307a RA |
328 | if 'results' in bootstrap: |
329 | info = bootstrap['results'][0]['video'] | |
330 | elif 'video' in bootstrap: | |
331 | info = bootstrap['video'] | |
6e416b21 RA |
332 | elif 'msnbcVideoInfo' in bootstrap: |
333 | info = bootstrap['msnbcVideoInfo']['meta'] | |
334 | elif 'msnbcThePlatform' in bootstrap: | |
335 | info = bootstrap['msnbcThePlatform']['videoPlayer']['video'] | |
574b2a73 | 336 | else: |
0437307a | 337 | info = bootstrap |
6e416b21 RA |
338 | |
339 | if 'guid' in info: | |
340 | video_id = info['guid'] | |
341 | filter_param = 'byGuid' | |
342 | elif 'mpxId' in info: | |
343 | video_id = info['mpxId'] | |
87fe568c JMF |
344 | |
345 | return { | |
0437307a | 346 | '_type': 'url_transparent', |
574b2a73 | 347 | 'id': video_id, |
0437307a | 348 | # http://feed.theplatform.com/f/2E2eJC/nbcnews also works |
6e416b21 | 349 | 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}), |
0437307a | 350 | 'ie_key': 'ThePlatformFeed', |
87fe568c | 351 | } |
be457302 YCH |
352 | |
353 | ||
354 | class NBCOlympicsIE(InfoExtractor): | |
355 | _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)' | |
356 | ||
357 | _TEST = { | |
358 | # Geo-restricted to US | |
359 | 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold', | |
360 | 'md5': '54fecf846d05429fbaa18af557ee523a', | |
361 | 'info_dict': { | |
362 | 'id': 'WjTBzDXx5AUq', | |
363 | 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold', | |
364 | 'ext': 'mp4', | |
365 | 'title': 'Rose\'s son Leo was in tears after his dad won gold', | |
366 | 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.', | |
367 | 'timestamp': 1471274964, | |
368 | 'upload_date': '20160815', | |
369 | 'uploader': 'NBCU-SPORTS', | |
370 | }, | |
371 | } | |
372 | ||
373 | def _real_extract(self, url): | |
374 | display_id = self._match_id(url) | |
375 | ||
376 | webpage = self._download_webpage(url, display_id) | |
377 | ||
378 | drupal_settings = self._parse_json(self._search_regex( | |
379 | r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', | |
380 | webpage, 'drupal settings'), display_id) | |
381 | ||
382 | iframe_url = drupal_settings['vod']['iframe_url'] | |
383 | theplatform_url = iframe_url.replace( | |
384 | 'vplayer.nbcolympics.com', 'player.theplatform.com') | |
385 | ||
386 | return { | |
387 | '_type': 'url_transparent', | |
388 | 'url': theplatform_url, | |
389 | 'ie_key': ThePlatformIE.ie_key(), | |
390 | 'display_id': display_id, | |
391 | } |