]>
Commit | Line | Data |
---|---|---|
cd7ee7aa JMF |
1 | from __future__ import unicode_literals |
2 | ||
0bc56fa6 | 3 | import re |
0bc56fa6 JMF |
4 | |
5 | from .common import InfoExtractor | |
574b2a73 | 6 | from .theplatform import ThePlatformIE |
1cc79574 | 7 | from ..utils import ( |
37e64add | 8 | find_xpath_attr, |
0fe2ff78 | 9 | lowercase_escape, |
b46b65ed | 10 | smuggle_url, |
0fe2ff78 | 11 | unescapeHTML, |
574b2a73 | 12 | update_url_query, |
13 | int_or_none, | |
14 | HEADRequest, | |
15 | parse_iso8601, | |
37e64add | 16 | ) |
0bc56fa6 JMF |
17 | |
18 | ||
020cf5eb | 19 | class NBCIE(InfoExtractor): |
cb88671e | 20 | _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' |
58c1f6f0 S |
21 | |
22 | _TESTS = [ | |
23 | { | |
5c8a3f86 | 24 | 'url': 'http://www.nbc.com/the-tonight-show/segments/112966', |
58c1f6f0 | 25 | 'info_dict': { |
e881c4bc YCH |
26 | 'id': '112966', |
27 | 'ext': 'mp4', | |
5c8a3f86 JMF |
28 | 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', |
29 | 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', | |
58c1f6f0 | 30 | }, |
e881c4bc YCH |
31 | 'params': { |
32 | # m3u8 download | |
33 | 'skip_download': True, | |
34 | }, | |
020cf5eb | 35 | }, |
58c1f6f0 S |
36 | { |
37 | 'url': 'http://www.nbc.com/the-tonight-show/episodes/176', | |
38 | 'info_dict': { | |
e881c4bc | 39 | 'id': '176', |
58c1f6f0 S |
40 | 'ext': 'flv', |
41 | 'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen', | |
42 | 'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.', | |
43 | }, | |
e881c4bc | 44 | 'skip': '404 Not Found', |
58c1f6f0 | 45 | }, |
b9b3ab45 YCH |
46 | { |
47 | 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821', | |
48 | 'info_dict': { | |
e881c4bc YCH |
49 | 'id': '2832821', |
50 | 'ext': 'mp4', | |
b9b3ab45 YCH |
51 | 'title': 'Star Wars Teaser', |
52 | 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442', | |
53 | }, | |
e881c4bc YCH |
54 | 'params': { |
55 | # m3u8 download | |
56 | 'skip_download': True, | |
57 | }, | |
b9b3ab45 | 58 | 'skip': 'Only works from US', |
0fe2ff78 YCH |
59 | }, |
60 | { | |
61 | # This video has expired but with an escaped embedURL | |
62 | 'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', | |
3e18700d | 63 | 'only_matching': True, |
b9b3ab45 | 64 | } |
58c1f6f0 | 65 | ] |
020cf5eb JMF |
66 | |
67 | def _real_extract(self, url): | |
10e3d734 | 68 | video_id = self._match_id(url) |
020cf5eb | 69 | webpage = self._download_webpage(url, video_id) |
0fe2ff78 | 70 | theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( |
b9b3ab45 YCH |
71 | [ |
72 | r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', | |
b09c1223 | 73 | r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"', |
b9b3ab45 YCH |
74 | r'"embedURL"\s*:\s*"([^"]+)"' |
75 | ], | |
0fe2ff78 | 76 | webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) |
020cf5eb JMF |
77 | if theplatform_url.startswith('//'): |
78 | theplatform_url = 'http:' + theplatform_url | |
e881c4bc YCH |
79 | return { |
80 | '_type': 'url_transparent', | |
81 | 'url': smuggle_url(theplatform_url, {'source_url': url}), | |
82 | 'id': video_id, | |
83 | } | |
020cf5eb JMF |
84 | |
85 | ||
a2a4d5fa | 86 | class NBCSportsVPlayerIE(InfoExtractor): |
a2edf2e7 | 87 | _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' |
a28ccbab | 88 | |
5cbb2699 | 89 | _TESTS = [{ |
a28ccbab | 90 | 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', |
a28ccbab YCH |
91 | 'info_dict': { |
92 | 'id': '9CsDKds0kvHI', | |
93 | 'ext': 'flv', | |
94 | 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', | |
95 | 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', | |
96 | } | |
5cbb2699 | 97 | }, { |
5cbb2699 YCH |
98 | 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', |
99 | 'only_matching': True, | |
100 | }] | |
a28ccbab | 101 | |
a2a4d5fa YCH |
102 | @staticmethod |
103 | def _extract_url(webpage): | |
104 | iframe_m = re.search( | |
105 | r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) | |
106 | if iframe_m: | |
107 | return iframe_m.group('url') | |
108 | ||
a28ccbab YCH |
109 | def _real_extract(self, url): |
110 | video_id = self._match_id(url) | |
111 | webpage = self._download_webpage(url, video_id) | |
112 | theplatform_url = self._og_search_video_url(webpage) | |
113 | return self.url_result(theplatform_url, 'ThePlatform') | |
114 | ||
115 | ||
a2a4d5fa | 116 | class NBCSportsIE(InfoExtractor): |
dfb1b146 | 117 | # Does not include https because its certificate is invalid |
5886b38d | 118 | _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' |
a2a4d5fa YCH |
119 | |
120 | _TEST = { | |
121 | 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', | |
a2a4d5fa YCH |
122 | 'info_dict': { |
123 | 'id': 'PHJSaFWbrTY9', | |
124 | 'ext': 'flv', | |
125 | 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', | |
126 | 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', | |
127 | } | |
128 | } | |
129 | ||
130 | def _real_extract(self, url): | |
131 | video_id = self._match_id(url) | |
132 | webpage = self._download_webpage(url, video_id) | |
133 | return self.url_result( | |
134 | NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') | |
135 | ||
136 | ||
574b2a73 | 137 | class NBCNewsIE(ThePlatformIE): |
a4f3d779 S |
138 | _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ |
139 | (?:video/.+?/(?P<id>\d+)| | |
574b2a73 | 140 | ([^/]+/)*(?P<display_id>[^/?]+)) |
87fe568c | 141 | ''' |
0bc56fa6 | 142 | |
87fe568c JMF |
143 | _TESTS = [ |
144 | { | |
145 | 'url': 'http://www.nbcnews.com/video/nbc-news/52753292', | |
146 | 'md5': '47abaac93c6eaf9ad37ee6c4463a5179', | |
147 | 'info_dict': { | |
148 | 'id': '52753292', | |
149 | 'ext': 'flv', | |
150 | 'title': 'Crew emerges after four-month Mars food study', | |
151 | 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | |
152 | }, | |
0bc56fa6 | 153 | }, |
87fe568c | 154 | { |
574b2a73 | 155 | 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', |
156 | 'md5': 'af1adfa51312291a017720403826bb64', | |
87fe568c | 157 | 'info_dict': { |
574b2a73 | 158 | 'id': '269389891880', |
10e3d734 | 159 | 'ext': 'mp4', |
87fe568c JMF |
160 | 'title': 'How Twitter Reacted To The Snowden Interview', |
161 | 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', | |
162 | }, | |
87fe568c | 163 | }, |
2df54b4b S |
164 | { |
165 | 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156', | |
166 | 'md5': 'fdbf39ab73a72df5896b6234ff98518a', | |
167 | 'info_dict': { | |
168 | 'id': 'Wjf9EDR3A_60', | |
169 | 'ext': 'mp4', | |
170 | 'title': 'FULL EPISODE: Family Business', | |
171 | 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', | |
172 | }, | |
574b2a73 | 173 | 'skip': 'This page is unavailable.', |
2df54b4b | 174 | }, |
d9aa2b78 RS |
175 | { |
176 | 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', | |
574b2a73 | 177 | 'md5': '73135a2e0ef819107bbb55a5a9b2a802', |
d9aa2b78 | 178 | 'info_dict': { |
574b2a73 | 179 | 'id': '394064451844', |
d9aa2b78 RS |
180 | 'ext': 'mp4', |
181 | 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', | |
182 | 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', | |
183 | }, | |
184 | }, | |
574b2a73 | 185 | { |
186 | 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', | |
187 | 'md5': 'a49e173825e5fcd15c13fc297fced39d', | |
188 | 'info_dict': { | |
189 | 'id': '529953347624', | |
190 | 'ext': 'mp4', | |
191 | 'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'', | |
192 | 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301', | |
193 | }, | |
194 | 'expected_warnings': ['http-6000 is not available'] | |
195 | }, | |
3f125c8c S |
196 | { |
197 | 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', | |
198 | 'only_matching': True, | |
199 | }, | |
87fe568c | 200 | ] |
0bc56fa6 JMF |
201 | |
202 | def _real_extract(self, url): | |
203 | mobj = re.match(self._VALID_URL, url) | |
204 | video_id = mobj.group('id') | |
87fe568c JMF |
205 | if video_id is not None: |
206 | all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) | |
207 | info = all_info.find('video') | |
0bc56fa6 | 208 | |
87fe568c JMF |
209 | return { |
210 | 'id': video_id, | |
211 | 'title': info.find('headline').text, | |
212 | 'ext': 'flv', | |
213 | 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | |
d890b4cc | 214 | 'description': info.find('caption').text, |
87fe568c JMF |
215 | 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, |
216 | } | |
217 | else: | |
d9aa2b78 | 218 | # "feature" and "nightly-news" pages use theplatform.com |
574b2a73 | 219 | display_id = mobj.group('display_id') |
220 | webpage = self._download_webpage(url, display_id) | |
221 | info = None | |
a4f3d779 | 222 | bootstrap_json = self._search_regex( |
574b2a73 | 223 | r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', |
224 | webpage, 'bootstrap json', default=None) | |
225 | if bootstrap_json: | |
226 | bootstrap = self._parse_json(bootstrap_json, display_id) | |
227 | info = bootstrap['results'][0]['video'] | |
228 | else: | |
229 | player_instance_json = self._search_regex( | |
230 | r'videoObj\s*:\s*({.+})', webpage, 'player instance') | |
231 | info = self._parse_json(player_instance_json, display_id) | |
232 | video_id = info['mpxId'] | |
233 | title = info['title'] | |
754d8a03 | 234 | |
574b2a73 | 235 | subtitles = {} |
236 | caption_links = info.get('captionLinks') | |
237 | if caption_links: | |
238 | for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')): | |
239 | sub_url = caption_links.get(sub_key) | |
240 | if sub_url: | |
241 | subtitles.setdefault('en', []).append({ | |
242 | 'url': sub_url, | |
243 | 'ext': sub_ext, | |
244 | }) | |
2df54b4b | 245 | |
574b2a73 | 246 | formats = [] |
247 | for video_asset in info['videoAssets']: | |
248 | video_url = video_asset.get('publicUrl') | |
249 | if not video_url: | |
2df54b4b | 250 | continue |
574b2a73 | 251 | container = video_asset.get('format') |
252 | asset_type = video_asset.get('assetType') or '' | |
253 | if container == 'ISM' or asset_type == 'FireTV-Once': | |
754d8a03 | 254 | continue |
574b2a73 | 255 | elif asset_type == 'OnceURL': |
256 | tp_formats, tp_subtitles = self._extract_theplatform_smil( | |
257 | video_url, video_id) | |
258 | formats.extend(tp_formats) | |
259 | subtitles = self._merge_subtitles(subtitles, tp_subtitles) | |
260 | else: | |
261 | tbr = int_or_none(video_asset.get('bitRate'), 1000) | |
262 | format_id = 'http%s' % ('-%d' % tbr if tbr else '') | |
263 | video_url = update_url_query( | |
264 | video_url, {'format': 'redirect'}) | |
265 | # resolve the url so that we can check availability and detect the correct extension | |
266 | head = self._request_webpage( | |
267 | HEADRequest(video_url), video_id, | |
268 | 'Checking %s url' % format_id, | |
269 | '%s is not available' % format_id, | |
270 | fatal=False) | |
271 | if head: | |
272 | video_url = head.geturl() | |
273 | formats.append({ | |
274 | 'format_id': format_id, | |
275 | 'url': video_url, | |
276 | 'width': int_or_none(video_asset.get('width')), | |
277 | 'height': int_or_none(video_asset.get('height')), | |
278 | 'tbr': tbr, | |
279 | 'container': video_asset.get('format'), | |
280 | }) | |
281 | self._sort_formats(formats) | |
87fe568c JMF |
282 | |
283 | return { | |
574b2a73 | 284 | 'id': video_id, |
285 | 'title': title, | |
286 | 'description': info.get('description'), | |
287 | 'thumbnail': info.get('description'), | |
288 | 'thumbnail': info.get('thumbnail'), | |
289 | 'duration': int_or_none(info.get('duration')), | |
290 | 'timestamp': parse_iso8601(info.get('pubDate')), | |
291 | 'formats': formats, | |
292 | 'subtitles': subtitles, | |
87fe568c | 293 | } |
dac14bf3 YCH |
294 | |
295 | ||
296 | class MSNBCIE(InfoExtractor): | |
297 | # https URLs redirect to corresponding http ones | |
5886b38d | 298 | _VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)' |
dac14bf3 YCH |
299 | _TEST = { |
300 | 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', | |
301 | 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', | |
302 | 'info_dict': { | |
303 | 'id': 'n_hayes_Aimm_140801_272214', | |
304 | 'ext': 'mp4', | |
305 | 'title': 'The chaotic GOP immigration vote', | |
306 | 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', | |
307 | 'thumbnail': 're:^https?://.*\.jpg$', | |
308 | 'timestamp': 1406937606, | |
309 | 'upload_date': '20140802', | |
310 | 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], | |
311 | }, | |
312 | } | |
313 | ||
314 | def _real_extract(self, url): | |
315 | video_id = self._match_id(url) | |
316 | webpage = self._download_webpage(url, video_id) | |
317 | embed_url = self._html_search_meta('embedURL', webpage) | |
318 | return self.url_result(embed_url) |