5 from .common
import InfoExtractor
6 from .theplatform
import ThePlatformIE
, default_ns
7 from .adobepass
import AdobePassIE
8 from ..compat
import compat_urllib_parse_unquote
32 class NBCIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
33 _VALID_URL
= r
'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
37 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
41 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
42 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
43 'timestamp': 1424246400,
44 'upload_date': '20150218',
45 'uploader': 'NBCU-COM',
46 'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
50 'series': 'Tonight Show: Jimmy Fallon',
52 'chapters': 'count:1',
54 'thumbnail': r
're:https?://.+\.jpg',
57 'skip_download': 'm3u8',
61 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
65 'title': 'Star Wars Teaser',
66 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
67 'timestamp': 1417852800,
68 'upload_date': '20141206',
69 'uploader': 'NBCU-COM',
71 'skip': 'page not found',
74 # HLS streams requires the 'hdnea3' cookie
75 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
77 'id': '101528f5a9e8127b107e98c5e6ce4638',
80 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
81 'timestamp': 1237100400,
82 'upload_date': '20090315',
83 'uploader': 'NBCU-COM',
85 'skip': 'page not found',
88 # manifest url does not have extension
89 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
93 'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
94 'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
96 'season': 'Season 75',
98 'series': 'The Golden Globe Awards',
99 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
100 'uploader': 'NBCU-COM',
101 'upload_date': '20180107',
102 'timestamp': 1515312000,
105 'thumbnail': r
're:https?://.+\.jpg',
106 'chapters': 'count:1',
109 'skip_download': 'm3u8',
113 # new video_id format
114 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
116 'id': 'NBCE125189978',
118 'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
119 'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
120 'uploader': 'NBCU-COM',
121 'series': 'Quantum Leap',
122 'season': 'Season 1',
124 'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
128 'timestamp': 1663956155,
129 'upload_date': '20220923',
132 'thumbnail': r
're:https?://.+\.jpg',
134 'expected_warnings': ['Ignoring subtitle tracks'],
136 'skip_download': 'm3u8',
140 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
141 'only_matching': True,
144 # Percent escaped url
145 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
146 'only_matching': True,
150 def _real_extract(self
, url
):
151 permalink
, video_id
= self
._match
_valid
_url
(url
).groups()
152 permalink
= 'http' + compat_urllib_parse_unquote(permalink
)
153 video_data
= self
._download
_json
(
154 'https://friendship.nbc.co/v2/graphql', video_id
, query
={
155 'query': '''query bonanzaPage(
156 $app: NBCUBrands! = nbc
159 $platform: SupportedPlatforms! = web
160 $type: EntityPageType! = VIDEO
172 ... on VideoPageData {
188 'variables': json
.dumps({
193 })['data']['bonanzaPage']['metadata']
197 'switch': 'HLSServiceSecure',
199 video_id
= video_data
['mpxGuid']
200 tp_path
= 'NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
)
201 tpm
= self
._download
_theplatform
_metadata
(tp_path
, video_id
)
202 title
= tpm
.get('title') or video_data
.get('secondaryTitle')
203 if video_data
.get('locked'):
204 resource
= self
._get
_mvpd
_resource
(
205 video_data
.get('resourceId') or 'nbcentertainment',
206 title
, video_id
, video_data
.get('rating'))
207 query
['auth'] = self
._extract
_mvpd
_auth
(
208 url
, video_id
, 'nbcentertainment', resource
)
209 theplatform_url
= smuggle_url(update_url_query(
210 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
),
211 query
), {'force_smil_url': True}
)
213 # Empty string or 0 can be valid values for these. So the check must be `is None`
214 description
= video_data
.get('description')
215 if description
is None:
216 description
= tpm
.get('description')
217 episode_number
= int_or_none(video_data
.get('episodeNumber'))
218 if episode_number
is None:
219 episode_number
= int_or_none(tpm
.get('nbcu$airOrder'))
220 rating
= video_data
.get('rating')
222 try_get(tpm
, lambda x
: x
['ratings'][0]['rating'])
223 season_number
= int_or_none(video_data
.get('seasonNumber'))
224 if season_number
is None:
225 season_number
= int_or_none(tpm
.get('nbcu$seasonNumber'))
226 series
= video_data
.get('seriesShortTitle')
228 series
= tpm
.get('nbcu$seriesShortTitle')
229 tags
= video_data
.get('keywords')
230 if tags
is None or len(tags
) == 0:
231 tags
= tpm
.get('keywords')
234 '_type': 'url_transparent',
235 'age_limit': parse_age_limit(rating
),
236 'description': description
,
238 'episode_number': episode_number
,
240 'ie_key': 'ThePlatform',
241 'season_number': season_number
,
245 'url': theplatform_url
,
249 class NBCSportsVPlayerIE(InfoExtractor
):
250 _VALID_URL_BASE
= r
'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
251 _VALID_URL
= _VALID_URL_BASE
+ r
'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
252 _EMBED_REGEX
= [r
'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE
]
255 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
257 'id': '9CsDKds0kvHI',
259 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
260 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
261 'timestamp': 1426270238,
262 'upload_date': '20150313',
263 'uploader': 'NBCU-SPORTS',
266 'thumbnail': r
're:^https?://.*\.jpg$'
269 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2',
270 'only_matching': True,
272 'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
273 'only_matching': True,
276 def _real_extract(self
, url
):
277 video_id
= self
._match
_id
(url
)
278 webpage
= self
._download
_webpage
(url
, video_id
)
279 theplatform_url
= self
._html
_search
_regex
(r
'tp:releaseUrl="(.+?)"', webpage
, 'url')
280 return self
.url_result(theplatform_url
, 'ThePlatform')
283 class NBCSportsIE(InfoExtractor
):
284 _VALID_URL
= r
'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
288 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
290 'id': 'PHJSaFWbrTY9',
292 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
293 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
294 'uploader': 'NBCU-SPORTS',
295 'upload_date': '20150330',
296 'timestamp': 1427726529,
298 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg',
303 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
304 'only_matching': True,
307 'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
308 'only_matching': True,
311 def _real_extract(self
, url
):
312 video_id
= self
._match
_id
(url
)
313 webpage
= self
._download
_webpage
(url
, video_id
)
314 return self
.url_result(
315 NBCSportsVPlayerIE
._extract
_url
(webpage
), 'NBCSportsVPlayer')
318 class NBCSportsStreamIE(AdobePassIE
):
319 _VALID_URL
= r
'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
321 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
325 'title': 'Amgen Tour of California Women\'s Recap',
326 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
330 'skip_download': True,
332 'skip': 'Requires Adobe Pass Authentication',
335 def _real_extract(self
, url
):
336 video_id
= self
._match
_id
(url
)
337 live_source
= self
._download
_json
(
338 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id
,
340 video_source
= live_source
['videoSources'][0]
341 title
= video_source
['title']
343 for k
in ('source', 'msl4source', 'iossource', 'hlsv4'):
345 source_url
= video_source
.get(sk
) or video_source
.get(sk
+ 'Alt')
349 source_url
= video_source
['ottStreamUrl']
350 is_live
= video_source
.get('type') == 'live' or video_source
.get('status') == 'Live'
351 resource
= self
._get
_mvpd
_resource
('nbcsports', title
, video_id
, '')
352 token
= self
._extract
_mvpd
_auth
(url
, video_id
, 'nbcsports', resource
)
353 tokenized_url
= self
._download
_json
(
354 'https://token.playmakerservices.com/cdn',
355 video_id
, data
=json
.dumps({
356 'requestorId': 'nbcsports',
358 'application': 'NBCSports',
360 'platform': 'desktop',
362 'url': video_source
['sourceUrl'],
363 'token': base64
.b64encode(token
.encode()).decode(),
364 'resourceId': base64
.b64encode(resource
.encode()).decode(),
365 }).encode())['tokenizedUrl']
366 formats
= self
._extract
_m
3u8_formats
(tokenized_url
, video_id
, 'mp4')
370 'description': live_source
.get('description'),
376 class NBCNewsIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
377 _VALID_URL
= r
'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
378 _EMBED_REGEX
= [r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//www\
.nbcnews\
.com
/widget
/video
-embed
/[^
"\']+)\1']
382 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
383 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
385 'id': '269389891880',
387 'title': 'How Twitter Reacted To The Snowden Interview',
388 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
389 'timestamp': 1401363060,
390 'upload_date': '20140529',
394 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
395 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
397 'id': '529953347624',
399 'title': 'FULL EPISODE: Family Business',
400 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
402 'skip': 'This page is unavailable.',
405 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
406 'md5': '8eb831eca25bfa7d25ddd83e85946548',
408 'id': '394064451844',
410 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
411 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
412 'timestamp': 1423104900,
413 'upload_date': '20150205',
417 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
418 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
422 'title': "Volkswagen U
.S
. Chief
: We
'Totally Screwed Up'",
423 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
424 'upload_date': '20150922',
425 'timestamp': 1442917800,
429 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
430 'md5': '118d7ca3f0bea6534f119c68ef539f71',
432 'id': '669831235788',
434 'title': 'See the aurora borealis from space in stunning new NASA video',
435 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
436 'upload_date': '20160420',
437 'timestamp': 1461152093,
441 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
442 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
444 'id': '314487875924',
446 'title': 'The chaotic GOP immigration vote',
447 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
448 'thumbnail': r're:^https?://.*\.jpg$',
449 'timestamp': 1406937606,
450 'upload_date': '20140802',
454 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
455 'only_matching': True,
458 # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
459 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
460 'only_matching': True,
464 def _real_extract(self, url):
465 video_id = self._match_id(url)
466 webpage = self._download_webpage(url, video_id)
468 data = self._search_nextjs_data(webpage, video_id)['props']['initialState']
469 video_data = try_get(data, lambda x: x['video']['current'], dict)
471 video_data = data['article']['content'][0]['primaryMedia']['video']
472 title = video_data['headline']['primary']
475 for va in video_data.get('videoAssets', []):
476 public_url = va.get('publicUrl')
479 if '://link.theplatform.com/' in public_url:
480 public_url = update_url_query(public_url, {'format': 'redirect'})
481 format_id = va.get('format')
482 if format_id == 'M3U':
483 formats.extend(self._extract_m3u8_formats(
484 public_url, video_id, 'mp4', 'm3u8_native',
485 m3u8_id=format_id, fatal=False))
487 tbr = int_or_none(va.get('bitrate'), 1000)
489 format_id += '-%d' % tbr
491 'format_id': format_id,
493 'width': int_or_none(va.get('width')),
494 'height': int_or_none(va.get('height')),
500 closed_captioning = video_data.get('closedCaptioning')
501 if closed_captioning:
502 for cc_url in closed_captioning.values():
505 subtitles.setdefault('en', []).append({
512 'description': try_get(video_data, lambda x: x['description']['primary']),
513 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
514 'duration': parse_duration(video_data.get('duration')),
515 'timestamp': unified_timestamp(video_data.get('datePublished')),
517 'subtitles': subtitles,
521 class NBCOlympicsIE(InfoExtractor):
522 IE_NAME = 'nbcolympics'
523 _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
526 # Geo-restricted to US
527 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
528 'md5': '54fecf846d05429fbaa18af557ee523a',
530 'id': 'WjTBzDXx5AUq',
531 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
533 'title': 'Rose\'s son Leo was in tears after his dad won gold',
534 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
535 'timestamp': 1471274964,
536 'upload_date': '20160815',
537 'uploader': 'NBCU-SPORTS',
541 def _real_extract(self, url):
542 display_id = self._match_id(url)
544 webpage = self._download_webpage(url, display_id)
547 drupal_settings = self._parse_json(self._search_regex(
548 r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
549 webpage, 'drupal settings'), display_id)
551 iframe_url = drupal_settings['vod']['iframe_url']
552 theplatform_url = iframe_url.replace(
553 'vplayer.nbcolympics.com', 'player.theplatform.com')
554 except RegexNotFoundError:
555 theplatform_url = self._search_regex(
556 r"([\"'])embedUrl\1: *([\"'])(?P
<embedUrl
>.+)\
2",
557 webpage, 'embedding URL', group="embedUrl
")
560 '_type': 'url_transparent',
561 'url': theplatform_url,
562 'ie_key': ThePlatformIE.ie_key(),
563 'display_id': display_id,
567 class NBCOlympicsStreamIE(AdobePassIE):
568 IE_NAME = 'nbcolympics:stream'
569 _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
572 'note': 'Tokenized m3u8 source URL',
573 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
577 'title': r"re
:Women
's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
580 'skip_download
': 'm3u8
',
583 'note
': 'Plain m3u8 source URL
',
584 'url
': 'https
://stream
.nbcolympics
.com
/gymnastics
-event
-finals
-mens
-floor
-pommel
-horse
-womens
-vault
-bars
',
588 'title
': r're
:Event Finals
: M Floor
, W Vault
, M Pommel
, W Uneven Bars
[0-9]{4}
-[0-9]{2}
-[0-9]{2}
[0-9]{2}
:[0-9]{2}$
',
591 'skip_download
': 'm3u8
',
596 def _real_extract(self, url):
597 display_id = self._match_id(url)
598 webpage = self._download_webpage(url, display_id)
599 pid = self._search_regex(r'pid\s
*=\s
*(\d
+);', webpage, 'pid
')
601 event_config = self._download_json(
602 f'http
://stream
.nbcolympics
.com
/data
/event_config_{pid}
.json
',
603 pid, 'Downloading event config
')['eventConfig
']
605 title = event_config['eventTitle
']
606 is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus
'))
608 source_url = self._download_json(
609 f'https
://api
-leap
.nbcsports
.com
/feeds
/assets
/{pid}?application
=NBCOlympics
&platform
=desktop
&format
=nbc
-player
&env
=staging
',
610 pid, 'Downloading leap config
'
611 )['videoSources
'][0]['cdnSources
']['primary
'][0]['sourceUrl
']
613 if event_config.get('cdnToken
'):
614 ap_resource = self._get_mvpd_resource(
615 event_config.get('resourceId
', 'NBCOlympics
'),
616 re.sub(r'[^\w\d
]+', '', event_config['eventTitle
']), pid,
617 event_config.get('ratingId
', 'NO VALUE
'))
618 media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId
', 'NBCOlympics
'), ap_resource)
620 source_url = self._download_json(
621 'https
://tokens
.playmakerservices
.com
/', pid, 'Retrieving tokenized URL
',
623 'application
': 'NBCSports
',
624 'authentication
-type': 'adobe
-pass',
627 'platform
': 'desktop
',
628 'requestorId
': 'NBCOlympics
',
629 'resourceId
': base64.b64encode(ap_resource.encode()).decode(),
630 'token
': base64.b64encode(media_token.encode()).decode(),
634 )['akamai
'][0]['tokenizedUrl
']
636 formats = self._extract_m3u8_formats(source_url, pid, 'mp4
', live=is_live)
638 # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
639 # download with ffmpeg without this option
640 f['downloader_options
'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
644 'display_id
': display_id,
651 class NBCStationsIE(InfoExtractor):
652 _DOMAIN_RE = '|
'.join(map(re.escape, (
653 'nbcbayarea
', 'nbcboston
', 'nbcchicago
', 'nbcconnecticut
', 'nbcdfw
', 'nbclosangeles
',
654 'nbcmiami
', 'nbcnewyork
', 'nbcphiladelphia
', 'nbcsandiego
', 'nbcwashington
',
655 'necn
', 'telemundo52
', 'telemundoarizona
', 'telemundochicago
', 'telemundonuevainglaterra
',
657 _VALID_URL = rf'https?
://(?
:www\
.)?
(?P
<site
>{_DOMAIN_RE}
)\
.com
/(?
:[^
/?
#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
660 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
664 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
665 'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
667 'timestamp': 1661135892,
668 'upload_date': '20220822',
670 'channel_id': 'KNBC',
671 'channel': 'nbclosangeles',
674 'skip_download': 'm3u8',
677 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
681 'title': 'Huracán complica que televidente de Tucson reciba reembolso',
682 'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
684 'timestamp': 1660886507,
685 'upload_date': '20220819',
686 'uploader': 'Telemundo Arizona',
687 'channel_id': 'KTAZ',
688 'channel': 'telemundoarizona',
691 'skip_download': 'm3u8',
695 'url': 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/',
696 'md5': '9bf8c41dc7abbb75b1a44f1491a4cc85',
700 'title': 'Highs Near Freezing in Boston on Wednesday',
701 'description': 'md5:3ec486609a926c99f00a3512e6c0e85b',
703 'timestamp': 1675268656,
704 'upload_date': '20230201',
706 'channel_id': 'WBTS',
707 'channel': 'nbcboston',
719 def _real_extract(self
, url
):
720 channel
, video_id
= self
._match
_valid
_url
(url
).group('site', 'id')
721 webpage
= self
._download
_webpage
(url
, video_id
)
723 nbc_data
= self
._search
_json
(
724 r
'<script>\s*var\s+nbc\s*=', webpage
, 'NBC JSON data', video_id
)
725 pdk_acct
= nbc_data
.get('pdkAcct') or 'Yh1nAC'
726 fw_ssid
= traverse_obj(nbc_data
, ('video', 'fwSSID'))
728 video_data
= self
._search
_json
(
729 r
'data-videos="\[', webpage
, 'video data', video_id
, default
={}, transform_source
=unescapeHTML
)
730 video_data
.update(self
._search
_json
(
731 r
'data-meta="', webpage
, 'metadata', video_id
, default
={}, transform_source
=unescapeHTML
))
733 raise ExtractorError('No video metadata found in webpage', expected
=True)
735 info
, formats
= {}, []
736 is_live
= int_or_none(video_data
.get('mpx_is_livestream')) == 1
738 'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
740 'fwsitesection': fw_ssid
,
741 'fwNetworkID': traverse_obj(nbc_data
, ('video', 'fwNetworkID'), default
='382114'),
742 'pprofile': 'ots_desktop_html',
743 'sensitive': 'false',
746 'mode': 'LIVE' if is_live
else 'on-demand',
753 player_id
= traverse_obj(video_data
, ((None, ('video', 'meta')), (
754 'mpx_m3upid', 'mpx_pid', 'pid_streaming_web_medium')), get_all
=False)
755 info
['title'] = f
'{channel} livestream'
758 player_id
= traverse_obj(video_data
, (
759 (None, ('video', 'meta')), ('pid_streaming_web_high', 'mpx_pid')), get_all
=False)
761 date_string
= traverse_obj(video_data
, 'date_string', 'date_gmt')
763 date_string
= self
._search
_regex
(
764 r
'datetime="([^"]+)"', date_string
, 'date string', fatal
=False)
766 date_string
= traverse_obj(
767 nbc_data
, ('dataLayer', 'adobe', ('prop70', 'eVar70', 'eVar59')), get_all
=False)
769 video_url
= traverse_obj(video_data
, ((None, ('video', 'meta')), 'mp4_url'), get_all
=False)
771 ext
= determine_ext(video_url
)
772 height
= self
._search
_regex
(r
'\d+-(\d+)p', url_basename(video_url
), 'height', default
=None)
776 'width': int_or_none(self
._RESOLUTIONS
.get(height
)),
777 'height': int_or_none(height
),
778 'format_id': f
'http-{ext}',
782 'title': video_data
.get('title') or traverse_obj(nbc_data
, (
783 'dataLayer', (None, 'adobe'), ('contenttitle', 'title', 'prop22')), get_all
=False),
785 traverse_obj(video_data
, 'summary', 'excerpt', 'video_hero_text')
786 or clean_html(traverse_obj(nbc_data
, ('dataLayer', 'summary'))),
787 'timestamp': unified_timestamp(date_string
),
791 if player_id
and fw_ssid
:
792 smil
= self
._download
_xml
(
793 f
'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id
,
794 note
='Downloading SMIL data', query
=query
, fatal
=is_live
)
795 subtitles
= self
._parse
_smil
_subtitles
(smil
, default_ns
) if smil
else {}
796 for video
in smil
.findall(self
._xpath
_ns
('.//video', default_ns
)) if smil
else []:
797 info
['duration'] = float_or_none(remove_end(video
.get('dur'), 'ms'), 1000)
798 video_src_url
= video
.get('src')
799 ext
= mimetype2ext(video
.get('type'), default
=determine_ext(video_src_url
))
801 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
802 video_src_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=is_live
,
803 live
=is_live
, errnote
='No HLS formats found')
805 self
._merge
_subtitles
(subs
, target
=subtitles
)
808 'url': video_src_url
,
809 'format_id': f
'https-{ext}',
811 'width': int_or_none(video
.get('width')),
812 'height': int_or_none(video
.get('height')),
816 self
.raise_no_formats('No video content found in webpage', expected
=True)
819 self
._request
_webpage
(
820 HEADRequest(formats
[0]['url']), video_id
, note
='Checking live status')
821 except ExtractorError
:
822 raise UserNotLive(video_id
=channel
)
827 'channel_id': nbc_data
.get('callLetters'),
828 'uploader': nbc_data
.get('on_air_name'),
830 'subtitles': subtitles
,