5 from .common
import InfoExtractor
6 from .theplatform
import ThePlatformIE
, default_ns
7 from .adobepass
import AdobePassIE
8 from ..compat
import compat_urllib_parse_unquote
9 from ..networking
import HEADRequest
32 class NBCIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
33 _VALID_URL
= r
'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
37 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
41 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
42 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
43 'timestamp': 1424246400,
44 'upload_date': '20150218',
45 'uploader': 'NBCU-COM',
46 'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
50 'series': 'Tonight Show: Jimmy Fallon',
52 'chapters': 'count:1',
54 'thumbnail': r
're:https?://.+\.jpg',
57 'skip_download': 'm3u8',
61 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
65 'title': 'Star Wars Teaser',
66 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
67 'timestamp': 1417852800,
68 'upload_date': '20141206',
69 'uploader': 'NBCU-COM',
71 'skip': 'page not found',
74 # HLS streams requires the 'hdnea3' cookie
75 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
77 'id': '101528f5a9e8127b107e98c5e6ce4638',
80 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
81 'timestamp': 1237100400,
82 'upload_date': '20090315',
83 'uploader': 'NBCU-COM',
85 'skip': 'page not found',
88 # manifest url does not have extension
89 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
93 'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
94 'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
96 'season': 'Season 75',
98 'series': 'The Golden Globe Awards',
99 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
100 'uploader': 'NBCU-COM',
101 'upload_date': '20180107',
102 'timestamp': 1515312000,
105 'thumbnail': r
're:https?://.+\.jpg',
106 'chapters': 'count:1',
109 'skip_download': 'm3u8',
113 # new video_id format
114 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
116 'id': 'NBCE125189978',
118 'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
119 'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
120 'uploader': 'NBCU-COM',
121 'series': 'Quantum Leap',
122 'season': 'Season 1',
124 'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
128 'timestamp': 1663956155,
129 'upload_date': '20220923',
132 'thumbnail': r
're:https?://.+\.jpg',
135 'skip_download': 'm3u8',
139 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
140 'only_matching': True,
143 # Percent escaped url
144 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
145 'only_matching': True,
149 def _real_extract(self
, url
):
150 permalink
, video_id
= self
._match
_valid
_url
(url
).groups()
151 permalink
= 'http' + compat_urllib_parse_unquote(permalink
)
152 video_data
= self
._download
_json
(
153 'https://friendship.nbc.co/v2/graphql', video_id
, query
={
154 'query': '''query bonanzaPage(
155 $app: NBCUBrands! = nbc
158 $platform: SupportedPlatforms! = web
159 $type: EntityPageType! = VIDEO
171 ... on VideoPageData {
187 'variables': json
.dumps({
192 })['data']['bonanzaPage']['metadata']
196 'switch': 'HLSServiceSecure',
198 video_id
= video_data
['mpxGuid']
199 tp_path
= 'NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
)
200 tpm
= self
._download
_theplatform
_metadata
(tp_path
, video_id
)
201 title
= tpm
.get('title') or video_data
.get('secondaryTitle')
202 if video_data
.get('locked'):
203 resource
= self
._get
_mvpd
_resource
(
204 video_data
.get('resourceId') or 'nbcentertainment',
205 title
, video_id
, video_data
.get('rating'))
206 query
['auth'] = self
._extract
_mvpd
_auth
(
207 url
, video_id
, 'nbcentertainment', resource
)
208 theplatform_url
= smuggle_url(update_url_query(
209 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
),
210 query
), {'force_smil_url': True}
)
212 # Empty string or 0 can be valid values for these. So the check must be `is None`
213 description
= video_data
.get('description')
214 if description
is None:
215 description
= tpm
.get('description')
216 episode_number
= int_or_none(video_data
.get('episodeNumber'))
217 if episode_number
is None:
218 episode_number
= int_or_none(tpm
.get('nbcu$airOrder'))
219 rating
= video_data
.get('rating')
221 try_get(tpm
, lambda x
: x
['ratings'][0]['rating'])
222 season_number
= int_or_none(video_data
.get('seasonNumber'))
223 if season_number
is None:
224 season_number
= int_or_none(tpm
.get('nbcu$seasonNumber'))
225 series
= video_data
.get('seriesShortTitle')
227 series
= tpm
.get('nbcu$seriesShortTitle')
228 tags
= video_data
.get('keywords')
229 if tags
is None or len(tags
) == 0:
230 tags
= tpm
.get('keywords')
233 '_type': 'url_transparent',
234 'age_limit': parse_age_limit(rating
),
235 'description': description
,
237 'episode_number': episode_number
,
239 'ie_key': 'ThePlatform',
240 'season_number': season_number
,
244 'url': theplatform_url
,
248 class NBCSportsVPlayerIE(InfoExtractor
):
249 _VALID_URL_BASE
= r
'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
250 _VALID_URL
= _VALID_URL_BASE
+ r
'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
251 _EMBED_REGEX
= [r
'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE
]
254 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
256 'id': '9CsDKds0kvHI',
258 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
259 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
260 'timestamp': 1426270238,
261 'upload_date': '20150313',
262 'uploader': 'NBCU-SPORTS',
265 'thumbnail': r
're:^https?://.*\.jpg$'
268 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2',
269 'only_matching': True,
271 'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
272 'only_matching': True,
275 def _real_extract(self
, url
):
276 video_id
= self
._match
_id
(url
)
277 webpage
= self
._download
_webpage
(url
, video_id
)
278 theplatform_url
= self
._html
_search
_regex
(r
'tp:releaseUrl="(.+?)"', webpage
, 'url')
279 return self
.url_result(theplatform_url
, 'ThePlatform')
282 class NBCSportsIE(InfoExtractor
):
283 _VALID_URL
= r
'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
287 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
289 'id': 'PHJSaFWbrTY9',
291 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
292 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
293 'uploader': 'NBCU-SPORTS',
294 'upload_date': '20150330',
295 'timestamp': 1427726529,
297 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg',
302 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
303 'only_matching': True,
306 'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
307 'only_matching': True,
310 def _real_extract(self
, url
):
311 video_id
= self
._match
_id
(url
)
312 webpage
= self
._download
_webpage
(url
, video_id
)
313 return self
.url_result(
314 NBCSportsVPlayerIE
._extract
_url
(webpage
), 'NBCSportsVPlayer')
317 class NBCSportsStreamIE(AdobePassIE
):
318 _VALID_URL
= r
'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
320 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
324 'title': 'Amgen Tour of California Women\'s Recap',
325 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
329 'skip_download': True,
331 'skip': 'Requires Adobe Pass Authentication',
334 def _real_extract(self
, url
):
335 video_id
= self
._match
_id
(url
)
336 live_source
= self
._download
_json
(
337 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id
,
339 video_source
= live_source
['videoSources'][0]
340 title
= video_source
['title']
342 for k
in ('source', 'msl4source', 'iossource', 'hlsv4'):
344 source_url
= video_source
.get(sk
) or video_source
.get(sk
+ 'Alt')
348 source_url
= video_source
['ottStreamUrl']
349 is_live
= video_source
.get('type') == 'live' or video_source
.get('status') == 'Live'
350 resource
= self
._get
_mvpd
_resource
('nbcsports', title
, video_id
, '')
351 token
= self
._extract
_mvpd
_auth
(url
, video_id
, 'nbcsports', resource
)
352 tokenized_url
= self
._download
_json
(
353 'https://token.playmakerservices.com/cdn',
354 video_id
, data
=json
.dumps({
355 'requestorId': 'nbcsports',
357 'application': 'NBCSports',
359 'platform': 'desktop',
361 'url': video_source
['sourceUrl'],
362 'token': base64
.b64encode(token
.encode()).decode(),
363 'resourceId': base64
.b64encode(resource
.encode()).decode(),
364 }).encode())['tokenizedUrl']
365 formats
= self
._extract
_m
3u8_formats
(tokenized_url
, video_id
, 'mp4')
369 'description': live_source
.get('description'),
375 class NBCNewsIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
376 _VALID_URL
= r
'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
377 _EMBED_REGEX
= [r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//www\
.nbcnews\
.com
/widget
/video
-embed
/[^
"\']+)\1']
381 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
382 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
384 'id': '269389891880',
386 'title': 'How Twitter Reacted To The Snowden Interview',
387 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
388 'timestamp': 1401363060,
389 'upload_date': '20140529',
393 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
394 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
396 'id': '529953347624',
398 'title': 'FULL EPISODE: Family Business',
399 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
401 'skip': 'This page is unavailable.',
404 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
405 'md5': '8eb831eca25bfa7d25ddd83e85946548',
407 'id': '394064451844',
409 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
410 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
411 'timestamp': 1423104900,
412 'upload_date': '20150205',
416 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
417 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
421 'title': "Volkswagen U
.S
. Chief
: We
'Totally Screwed Up'",
422 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
423 'upload_date': '20150922',
424 'timestamp': 1442917800,
428 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
429 'md5': '118d7ca3f0bea6534f119c68ef539f71',
431 'id': '669831235788',
433 'title': 'See the aurora borealis from space in stunning new NASA video',
434 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
435 'upload_date': '20160420',
436 'timestamp': 1461152093,
440 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
441 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
443 'id': '314487875924',
445 'title': 'The chaotic GOP immigration vote',
446 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
447 'thumbnail': r're:^https?://.*\.jpg$',
448 'timestamp': 1406937606,
449 'upload_date': '20140802',
453 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
454 'only_matching': True,
457 # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
458 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
459 'only_matching': True,
463 def _real_extract(self, url):
464 video_id = self._match_id(url)
465 webpage = self._download_webpage(url, video_id)
467 data = self._search_nextjs_data(webpage, video_id)['props']['initialState']
468 video_data = try_get(data, lambda x: x['video']['current'], dict)
470 video_data = data['article']['content'][0]['primaryMedia']['video']
471 title = video_data['headline']['primary']
474 for va in video_data.get('videoAssets', []):
475 public_url = va.get('publicUrl')
478 if '://link.theplatform.com/' in public_url:
479 public_url = update_url_query(public_url, {'format': 'redirect'})
480 format_id = va.get('format')
481 if format_id == 'M3U':
482 formats.extend(self._extract_m3u8_formats(
483 public_url, video_id, 'mp4', 'm3u8_native',
484 m3u8_id=format_id, fatal=False))
486 tbr = int_or_none(va.get('bitrate'), 1000)
488 format_id += '-%d' % tbr
490 'format_id': format_id,
492 'width': int_or_none(va.get('width')),
493 'height': int_or_none(va.get('height')),
499 closed_captioning = video_data.get('closedCaptioning')
500 if closed_captioning:
501 for cc_url in closed_captioning.values():
504 subtitles.setdefault('en', []).append({
511 'description': try_get(video_data, lambda x: x['description']['primary']),
512 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
513 'duration': parse_duration(video_data.get('duration')),
514 'timestamp': unified_timestamp(video_data.get('datePublished')),
516 'subtitles': subtitles,
520 class NBCOlympicsIE(InfoExtractor):
521 IE_NAME = 'nbcolympics'
522 _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
525 # Geo-restricted to US
526 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
527 'md5': '54fecf846d05429fbaa18af557ee523a',
529 'id': 'WjTBzDXx5AUq',
530 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
532 'title': 'Rose\'s son Leo was in tears after his dad won gold',
533 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
534 'timestamp': 1471274964,
535 'upload_date': '20160815',
536 'uploader': 'NBCU-SPORTS',
540 def _real_extract(self, url):
541 display_id = self._match_id(url)
543 webpage = self._download_webpage(url, display_id)
546 drupal_settings = self._parse_json(self._search_regex(
547 r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
548 webpage, 'drupal settings'), display_id)
550 iframe_url = drupal_settings['vod']['iframe_url']
551 theplatform_url = iframe_url.replace(
552 'vplayer.nbcolympics.com', 'player.theplatform.com')
553 except RegexNotFoundError:
554 theplatform_url = self._search_regex(
555 r"([\"'])embedUrl\1: *([\"'])(?P
<embedUrl
>.+)\
2",
556 webpage, 'embedding URL', group="embedUrl
")
559 '_type': 'url_transparent',
560 'url': theplatform_url,
561 'ie_key': ThePlatformIE.ie_key(),
562 'display_id': display_id,
566 class NBCOlympicsStreamIE(AdobePassIE):
567 IE_NAME = 'nbcolympics:stream'
568 _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
571 'note': 'Tokenized m3u8 source URL',
572 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
576 'title': r"re
:Women
's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
579 'skip_download
': 'm3u8
',
582 'note
': 'Plain m3u8 source URL
',
583 'url
': 'https
://stream
.nbcolympics
.com
/gymnastics
-event
-finals
-mens
-floor
-pommel
-horse
-womens
-vault
-bars
',
587 'title
': r're
:Event Finals
: M Floor
, W Vault
, M Pommel
, W Uneven Bars
[0-9]{4}
-[0-9]{2}
-[0-9]{2}
[0-9]{2}
:[0-9]{2}$
',
590 'skip_download
': 'm3u8
',
595 def _real_extract(self, url):
596 display_id = self._match_id(url)
597 webpage = self._download_webpage(url, display_id)
598 pid = self._search_regex(r'pid\s
*=\s
*(\d
+);', webpage, 'pid
')
600 event_config = self._download_json(
601 f'http
://stream
.nbcolympics
.com
/data
/event_config_{pid}
.json
',
602 pid, 'Downloading event config
')['eventConfig
']
604 title = event_config['eventTitle
']
605 is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus
'))
607 source_url = self._download_json(
608 f'https
://api
-leap
.nbcsports
.com
/feeds
/assets
/{pid}?application
=NBCOlympics
&platform
=desktop
&format
=nbc
-player
&env
=staging
',
609 pid, 'Downloading leap config
'
610 )['videoSources
'][0]['cdnSources
']['primary
'][0]['sourceUrl
']
612 if event_config.get('cdnToken
'):
613 ap_resource = self._get_mvpd_resource(
614 event_config.get('resourceId
', 'NBCOlympics
'),
615 re.sub(r'[^\w\d
]+', '', event_config['eventTitle
']), pid,
616 event_config.get('ratingId
', 'NO VALUE
'))
617 media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId
', 'NBCOlympics
'), ap_resource)
619 source_url = self._download_json(
620 'https
://tokens
.playmakerservices
.com
/', pid, 'Retrieving tokenized URL
',
622 'application
': 'NBCSports
',
623 'authentication
-type': 'adobe
-pass',
626 'platform
': 'desktop
',
627 'requestorId
': 'NBCOlympics
',
628 'resourceId
': base64.b64encode(ap_resource.encode()).decode(),
629 'token
': base64.b64encode(media_token.encode()).decode(),
633 )['akamai
'][0]['tokenizedUrl
']
635 formats = self._extract_m3u8_formats(source_url, pid, 'mp4
', live=is_live)
637 # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
638 # download with ffmpeg without this option
639 f['downloader_options
'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
643 'display_id
': display_id,
650 class NBCStationsIE(InfoExtractor):
651 _DOMAIN_RE = '|
'.join(map(re.escape, (
652 'nbcbayarea
', 'nbcboston
', 'nbcchicago
', 'nbcconnecticut
', 'nbcdfw
', 'nbclosangeles
',
653 'nbcmiami
', 'nbcnewyork
', 'nbcphiladelphia
', 'nbcsandiego
', 'nbcwashington
',
654 'necn
', 'telemundo52
', 'telemundoarizona
', 'telemundochicago
', 'telemundonuevainglaterra
',
656 _VALID_URL = rf'https?
://(?
:www\
.)?
(?P
<site
>{_DOMAIN_RE}
)\
.com
/(?
:[^
/?
#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
659 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
663 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
664 'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
666 'timestamp': 1661135892,
667 'upload_date': '20220822',
669 'channel_id': 'KNBC',
670 'channel': 'nbclosangeles',
673 'skip_download': 'm3u8',
676 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
680 'title': 'Huracán complica que televidente de Tucson reciba reembolso',
681 'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
683 'timestamp': 1660886507,
684 'upload_date': '20220819',
685 'uploader': 'Telemundo Arizona',
686 'channel_id': 'KTAZ',
687 'channel': 'telemundoarizona',
690 'skip_download': 'm3u8',
694 'url': 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/',
695 'md5': '9bf8c41dc7abbb75b1a44f1491a4cc85',
699 'title': 'Highs Near Freezing in Boston on Wednesday',
700 'description': 'md5:3ec486609a926c99f00a3512e6c0e85b',
702 'timestamp': 1675268656,
703 'upload_date': '20230201',
705 'channel_id': 'WBTS',
706 'channel': 'nbcboston',
718 def _real_extract(self
, url
):
719 channel
, video_id
= self
._match
_valid
_url
(url
).group('site', 'id')
720 webpage
= self
._download
_webpage
(url
, video_id
)
722 nbc_data
= self
._search
_json
(
723 r
'<script>\s*var\s+nbc\s*=', webpage
, 'NBC JSON data', video_id
)
724 pdk_acct
= nbc_data
.get('pdkAcct') or 'Yh1nAC'
725 fw_ssid
= traverse_obj(nbc_data
, ('video', 'fwSSID'))
727 video_data
= self
._search
_json
(
728 r
'data-videos="\[', webpage
, 'video data', video_id
, default
={}, transform_source
=unescapeHTML
)
729 video_data
.update(self
._search
_json
(
730 r
'data-meta="', webpage
, 'metadata', video_id
, default
={}, transform_source
=unescapeHTML
))
732 raise ExtractorError('No video metadata found in webpage', expected
=True)
734 info
, formats
= {}, []
735 is_live
= int_or_none(video_data
.get('mpx_is_livestream')) == 1
737 'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
739 'fwsitesection': fw_ssid
,
740 'fwNetworkID': traverse_obj(nbc_data
, ('video', 'fwNetworkID'), default
='382114'),
741 'pprofile': 'ots_desktop_html',
742 'sensitive': 'false',
745 'mode': 'LIVE' if is_live
else 'on-demand',
752 player_id
= traverse_obj(video_data
, ((None, ('video', 'meta')), (
753 'mpx_m3upid', 'mpx_pid', 'pid_streaming_web_medium')), get_all
=False)
754 info
['title'] = f
'{channel} livestream'
757 player_id
= traverse_obj(video_data
, (
758 (None, ('video', 'meta')), ('pid_streaming_web_high', 'mpx_pid')), get_all
=False)
760 date_string
= traverse_obj(video_data
, 'date_string', 'date_gmt')
762 date_string
= self
._search
_regex
(
763 r
'datetime="([^"]+)"', date_string
, 'date string', fatal
=False)
765 date_string
= traverse_obj(
766 nbc_data
, ('dataLayer', 'adobe', ('prop70', 'eVar70', 'eVar59')), get_all
=False)
768 video_url
= traverse_obj(video_data
, ((None, ('video', 'meta')), 'mp4_url'), get_all
=False)
770 ext
= determine_ext(video_url
)
771 height
= self
._search
_regex
(r
'\d+-(\d+)p', url_basename(video_url
), 'height', default
=None)
775 'width': int_or_none(self
._RESOLUTIONS
.get(height
)),
776 'height': int_or_none(height
),
777 'format_id': f
'http-{ext}',
781 'title': video_data
.get('title') or traverse_obj(nbc_data
, (
782 'dataLayer', (None, 'adobe'), ('contenttitle', 'title', 'prop22')), get_all
=False),
784 traverse_obj(video_data
, 'summary', 'excerpt', 'video_hero_text')
785 or clean_html(traverse_obj(nbc_data
, ('dataLayer', 'summary'))),
786 'timestamp': unified_timestamp(date_string
),
790 if player_id
and fw_ssid
:
791 smil
= self
._download
_xml
(
792 f
'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id
,
793 note
='Downloading SMIL data', query
=query
, fatal
=is_live
)
794 subtitles
= self
._parse
_smil
_subtitles
(smil
, default_ns
) if smil
else {}
795 for video
in smil
.findall(self
._xpath
_ns
('.//video', default_ns
)) if smil
else []:
796 info
['duration'] = float_or_none(remove_end(video
.get('dur'), 'ms'), 1000)
797 video_src_url
= video
.get('src')
798 ext
= mimetype2ext(video
.get('type'), default
=determine_ext(video_src_url
))
800 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
801 video_src_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=is_live
,
802 live
=is_live
, errnote
='No HLS formats found')
804 self
._merge
_subtitles
(subs
, target
=subtitles
)
807 'url': video_src_url
,
808 'format_id': f
'https-{ext}',
810 'width': int_or_none(video
.get('width')),
811 'height': int_or_none(video
.get('height')),
815 self
.raise_no_formats('No video content found in webpage', expected
=True)
818 self
._request
_webpage
(
819 HEADRequest(formats
[0]['url']), video_id
, note
='Checking live status')
820 except ExtractorError
:
821 raise UserNotLive(video_id
=channel
)
826 'channel_id': nbc_data
.get('callLetters'),
827 'uploader': nbc_data
.get('on_air_name'),
829 'subtitles': subtitles
,