5 from .common
import InfoExtractor
6 from .theplatform
import ThePlatformIE
, default_ns
7 from .adobepass
import AdobePassIE
8 from ..compat
import compat_urllib_parse_unquote
29 class NBCIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
30 _VALID_URL
= r
'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
34 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
38 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
39 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
40 'timestamp': 1424246400,
41 'upload_date': '20150218',
42 'uploader': 'NBCU-COM',
43 'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
47 'series': 'Tonight Show: Jimmy Fallon',
49 'chapters': 'count:1',
51 'thumbnail': r
're:https?://.+\.jpg',
54 'skip_download': 'm3u8',
58 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
62 'title': 'Star Wars Teaser',
63 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
64 'timestamp': 1417852800,
65 'upload_date': '20141206',
66 'uploader': 'NBCU-COM',
68 'skip': 'page not found',
71 # HLS streams requires the 'hdnea3' cookie
72 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
74 'id': '101528f5a9e8127b107e98c5e6ce4638',
77 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
78 'timestamp': 1237100400,
79 'upload_date': '20090315',
80 'uploader': 'NBCU-COM',
82 'skip': 'page not found',
85 # manifest url does not have extension
86 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
90 'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
91 'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
93 'season': 'Season 75',
95 'series': 'The Golden Globe Awards',
96 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
97 'uploader': 'NBCU-COM',
98 'upload_date': '20180107',
99 'timestamp': 1515312000,
102 'thumbnail': r
're:https?://.+\.jpg',
103 'chapters': 'count:1',
106 'skip_download': 'm3u8',
110 # new video_id format
111 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
113 'id': 'NBCE125189978',
115 'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
116 'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
117 'uploader': 'NBCU-COM',
118 'series': 'Quantum Leap',
119 'season': 'Season 1',
121 'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
125 'timestamp': 1663956155,
126 'upload_date': '20220923',
129 'thumbnail': r
're:https?://.+\.jpg',
131 'expected_warnings': ['Ignoring subtitle tracks'],
133 'skip_download': 'm3u8',
137 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
138 'only_matching': True,
141 # Percent escaped url
142 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
143 'only_matching': True,
147 def _real_extract(self
, url
):
148 permalink
, video_id
= self
._match
_valid
_url
(url
).groups()
149 permalink
= 'http' + compat_urllib_parse_unquote(permalink
)
150 video_data
= self
._download
_json
(
151 'https://friendship.nbc.co/v2/graphql', video_id
, query
={
152 'query': '''query bonanzaPage(
153 $app: NBCUBrands! = nbc
156 $platform: SupportedPlatforms! = web
157 $type: EntityPageType! = VIDEO
169 ... on VideoPageData {
185 'variables': json
.dumps({
190 })['data']['bonanzaPage']['metadata']
194 'switch': 'HLSServiceSecure',
196 video_id
= video_data
['mpxGuid']
197 tp_path
= 'NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
)
198 tpm
= self
._download
_theplatform
_metadata
(tp_path
, video_id
)
199 title
= tpm
.get('title') or video_data
.get('secondaryTitle')
200 if video_data
.get('locked'):
201 resource
= self
._get
_mvpd
_resource
(
202 video_data
.get('resourceId') or 'nbcentertainment',
203 title
, video_id
, video_data
.get('rating'))
204 query
['auth'] = self
._extract
_mvpd
_auth
(
205 url
, video_id
, 'nbcentertainment', resource
)
206 theplatform_url
= smuggle_url(update_url_query(
207 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
),
208 query
), {'force_smil_url': True}
)
210 # Empty string or 0 can be valid values for these. So the check must be `is None`
211 description
= video_data
.get('description')
212 if description
is None:
213 description
= tpm
.get('description')
214 episode_number
= int_or_none(video_data
.get('episodeNumber'))
215 if episode_number
is None:
216 episode_number
= int_or_none(tpm
.get('nbcu$airOrder'))
217 rating
= video_data
.get('rating')
219 try_get(tpm
, lambda x
: x
['ratings'][0]['rating'])
220 season_number
= int_or_none(video_data
.get('seasonNumber'))
221 if season_number
is None:
222 season_number
= int_or_none(tpm
.get('nbcu$seasonNumber'))
223 series
= video_data
.get('seriesShortTitle')
225 series
= tpm
.get('nbcu$seriesShortTitle')
226 tags
= video_data
.get('keywords')
227 if tags
is None or len(tags
) == 0:
228 tags
= tpm
.get('keywords')
231 '_type': 'url_transparent',
232 'age_limit': parse_age_limit(rating
),
233 'description': description
,
235 'episode_number': episode_number
,
237 'ie_key': 'ThePlatform',
238 'season_number': season_number
,
242 'url': theplatform_url
,
246 class NBCSportsVPlayerIE(InfoExtractor
):
247 _VALID_URL_BASE
= r
'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
248 _VALID_URL
= _VALID_URL_BASE
+ r
'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
249 _EMBED_REGEX
= [r
'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE
]
252 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
254 'id': '9CsDKds0kvHI',
256 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
257 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
258 'timestamp': 1426270238,
259 'upload_date': '20150313',
260 'uploader': 'NBCU-SPORTS',
263 'thumbnail': r
're:^https?://.*\.jpg$'
266 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2',
267 'only_matching': True,
269 'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
270 'only_matching': True,
273 def _real_extract(self
, url
):
274 video_id
= self
._match
_id
(url
)
275 webpage
= self
._download
_webpage
(url
, video_id
)
276 theplatform_url
= self
._html
_search
_regex
(r
'tp:releaseUrl="(.+?)"', webpage
, 'url')
277 return self
.url_result(theplatform_url
, 'ThePlatform')
280 class NBCSportsIE(InfoExtractor
):
281 _VALID_URL
= r
'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
285 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
287 'id': 'PHJSaFWbrTY9',
289 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
290 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
291 'uploader': 'NBCU-SPORTS',
292 'upload_date': '20150330',
293 'timestamp': 1427726529,
295 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg',
300 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
301 'only_matching': True,
304 'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
305 'only_matching': True,
308 def _real_extract(self
, url
):
309 video_id
= self
._match
_id
(url
)
310 webpage
= self
._download
_webpage
(url
, video_id
)
311 return self
.url_result(
312 NBCSportsVPlayerIE
._extract
_url
(webpage
), 'NBCSportsVPlayer')
315 class NBCSportsStreamIE(AdobePassIE
):
316 _VALID_URL
= r
'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
318 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
322 'title': 'Amgen Tour of California Women\'s Recap',
323 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
327 'skip_download': True,
329 'skip': 'Requires Adobe Pass Authentication',
332 def _real_extract(self
, url
):
333 video_id
= self
._match
_id
(url
)
334 live_source
= self
._download
_json
(
335 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id
,
337 video_source
= live_source
['videoSources'][0]
338 title
= video_source
['title']
340 for k
in ('source', 'msl4source', 'iossource', 'hlsv4'):
342 source_url
= video_source
.get(sk
) or video_source
.get(sk
+ 'Alt')
346 source_url
= video_source
['ottStreamUrl']
347 is_live
= video_source
.get('type') == 'live' or video_source
.get('status') == 'Live'
348 resource
= self
._get
_mvpd
_resource
('nbcsports', title
, video_id
, '')
349 token
= self
._extract
_mvpd
_auth
(url
, video_id
, 'nbcsports', resource
)
350 tokenized_url
= self
._download
_json
(
351 'https://token.playmakerservices.com/cdn',
352 video_id
, data
=json
.dumps({
353 'requestorId': 'nbcsports',
355 'application': 'NBCSports',
357 'platform': 'desktop',
359 'url': video_source
['sourceUrl'],
360 'token': base64
.b64encode(token
.encode()).decode(),
361 'resourceId': base64
.b64encode(resource
.encode()).decode(),
362 }).encode())['tokenizedUrl']
363 formats
= self
._extract
_m
3u8_formats
(tokenized_url
, video_id
, 'mp4')
367 'description': live_source
.get('description'),
373 class NBCNewsIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
374 _VALID_URL
= r
'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
375 _EMBED_REGEX
= [r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//www\
.nbcnews\
.com
/widget
/video
-embed
/[^
"\']+)\1']
379 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
380 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
382 'id': '269389891880',
384 'title': 'How Twitter Reacted To The Snowden Interview',
385 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
386 'timestamp': 1401363060,
387 'upload_date': '20140529',
391 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
392 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
394 'id': '529953347624',
396 'title': 'FULL EPISODE: Family Business',
397 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
399 'skip': 'This page is unavailable.',
402 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
403 'md5': '8eb831eca25bfa7d25ddd83e85946548',
405 'id': '394064451844',
407 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
408 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
409 'timestamp': 1423104900,
410 'upload_date': '20150205',
414 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
415 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
419 'title': "Volkswagen U
.S
. Chief
: We
'Totally Screwed Up'",
420 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
421 'upload_date': '20150922',
422 'timestamp': 1442917800,
426 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
427 'md5': '118d7ca3f0bea6534f119c68ef539f71',
429 'id': '669831235788',
431 'title': 'See the aurora borealis from space in stunning new NASA video',
432 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
433 'upload_date': '20160420',
434 'timestamp': 1461152093,
438 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
439 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
441 'id': '314487875924',
443 'title': 'The chaotic GOP immigration vote',
444 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
445 'thumbnail': r're:^https?://.*\.jpg$',
446 'timestamp': 1406937606,
447 'upload_date': '20140802',
451 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
452 'only_matching': True,
455 # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
456 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
457 'only_matching': True,
461 def _real_extract(self, url):
462 video_id = self._match_id(url)
463 webpage = self._download_webpage(url, video_id)
465 data = self._search_nextjs_data(webpage, video_id)['props']['initialState']
466 video_data = try_get(data, lambda x: x['video']['current'], dict)
468 video_data = data['article']['content'][0]['primaryMedia']['video']
469 title = video_data['headline']['primary']
472 for va in video_data.get('videoAssets', []):
473 public_url = va.get('publicUrl')
476 if '://link.theplatform.com/' in public_url:
477 public_url = update_url_query(public_url, {'format': 'redirect'})
478 format_id = va.get('format')
479 if format_id == 'M3U':
480 formats.extend(self._extract_m3u8_formats(
481 public_url, video_id, 'mp4', 'm3u8_native',
482 m3u8_id=format_id, fatal=False))
484 tbr = int_or_none(va.get('bitrate'), 1000)
486 format_id += '-%d' % tbr
488 'format_id': format_id,
490 'width': int_or_none(va.get('width')),
491 'height': int_or_none(va.get('height')),
497 closed_captioning = video_data.get('closedCaptioning')
498 if closed_captioning:
499 for cc_url in closed_captioning.values():
502 subtitles.setdefault('en', []).append({
509 'description': try_get(video_data, lambda x: x['description']['primary']),
510 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
511 'duration': parse_duration(video_data.get('duration')),
512 'timestamp': unified_timestamp(video_data.get('datePublished')),
514 'subtitles': subtitles,
518 class NBCOlympicsIE(InfoExtractor):
519 IE_NAME = 'nbcolympics'
520 _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
523 # Geo-restricted to US
524 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
525 'md5': '54fecf846d05429fbaa18af557ee523a',
527 'id': 'WjTBzDXx5AUq',
528 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
530 'title': 'Rose\'s son Leo was in tears after his dad won gold',
531 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
532 'timestamp': 1471274964,
533 'upload_date': '20160815',
534 'uploader': 'NBCU-SPORTS',
538 def _real_extract(self, url):
539 display_id = self._match_id(url)
541 webpage = self._download_webpage(url, display_id)
544 drupal_settings = self._parse_json(self._search_regex(
545 r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
546 webpage, 'drupal settings'), display_id)
548 iframe_url = drupal_settings['vod']['iframe_url']
549 theplatform_url = iframe_url.replace(
550 'vplayer.nbcolympics.com', 'player.theplatform.com')
551 except RegexNotFoundError:
552 theplatform_url = self._search_regex(
553 r"([\"'])embedUrl\1: *([\"'])(?P
<embedUrl
>.+)\
2",
554 webpage, 'embedding URL', group="embedUrl
")
557 '_type': 'url_transparent',
558 'url': theplatform_url,
559 'ie_key': ThePlatformIE.ie_key(),
560 'display_id': display_id,
564 class NBCOlympicsStreamIE(AdobePassIE):
565 IE_NAME = 'nbcolympics:stream'
566 _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
569 'note': 'Tokenized m3u8 source URL',
570 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
574 'title': r"re
:Women
's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
577 'skip_download
': 'm3u8
',
580 'note
': 'Plain m3u8 source URL
',
581 'url
': 'https
://stream
.nbcolympics
.com
/gymnastics
-event
-finals
-mens
-floor
-pommel
-horse
-womens
-vault
-bars
',
585 'title
': r're
:Event Finals
: M Floor
, W Vault
, M Pommel
, W Uneven Bars
[0-9]{4}
-[0-9]{2}
-[0-9]{2}
[0-9]{2}
:[0-9]{2}$
',
588 'skip_download
': 'm3u8
',
593 def _real_extract(self, url):
594 display_id = self._match_id(url)
595 webpage = self._download_webpage(url, display_id)
596 pid = self._search_regex(r'pid\s
*=\s
*(\d
+);', webpage, 'pid
')
598 event_config = self._download_json(
599 f'http
://stream
.nbcolympics
.com
/data
/event_config_{pid}
.json
',
600 pid, 'Downloading event config
')['eventConfig
']
602 title = event_config['eventTitle
']
603 is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus
'))
605 source_url = self._download_json(
606 f'https
://api
-leap
.nbcsports
.com
/feeds
/assets
/{pid}?application
=NBCOlympics
&platform
=desktop
&format
=nbc
-player
&env
=staging
',
607 pid, 'Downloading leap config
'
608 )['videoSources
'][0]['cdnSources
']['primary
'][0]['sourceUrl
']
610 if event_config.get('cdnToken
'):
611 ap_resource = self._get_mvpd_resource(
612 event_config.get('resourceId
', 'NBCOlympics
'),
613 re.sub(r'[^\w\d
]+', '', event_config['eventTitle
']), pid,
614 event_config.get('ratingId
', 'NO VALUE
'))
615 media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId
', 'NBCOlympics
'), ap_resource)
617 source_url = self._download_json(
618 'https
://tokens
.playmakerservices
.com
/', pid, 'Retrieving tokenized URL
',
620 'application
': 'NBCSports
',
621 'authentication
-type': 'adobe
-pass',
624 'platform
': 'desktop
',
625 'requestorId
': 'NBCOlympics
',
626 'resourceId
': base64.b64encode(ap_resource.encode()).decode(),
627 'token
': base64.b64encode(media_token.encode()).decode(),
631 )['akamai
'][0]['tokenizedUrl
']
633 formats = self._extract_m3u8_formats(source_url, pid, 'mp4
', live=is_live)
635 # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
636 # download with ffmpeg without this option
637 f['downloader_options
'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
641 'display_id
': display_id,
648 class NBCStationsIE(InfoExtractor):
649 _DOMAIN_RE = '|
'.join(map(re.escape, (
650 'nbcbayarea
', 'nbcboston
', 'nbcchicago
', 'nbcconnecticut
', 'nbcdfw
', 'nbclosangeles
',
651 'nbcmiami
', 'nbcnewyork
', 'nbcphiladelphia
', 'nbcsandiego
', 'nbcwashington
',
652 'necn
', 'telemundo52
', 'telemundoarizona
', 'telemundochicago
', 'telemundonuevainglaterra
',
654 _VALID_URL = rf'https?
://(?
:www\
.)?
(?P
<site
>{_DOMAIN_RE}
)\
.com
/(?
:[^
/?
#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
657 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
661 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
662 'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
663 'timestamp': 1661135892,
664 'upload_date': '20220822',
666 'channel_id': 'KNBC',
667 'channel': 'nbclosangeles',
670 'skip_download': 'm3u8',
673 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
677 'title': 'Huracán complica que televidente de Tucson reciba reembolso',
678 'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
679 'timestamp': 1660886507,
680 'upload_date': '20220819',
681 'uploader': 'Telemundo Arizona',
682 'channel_id': 'KTAZ',
683 'channel': 'telemundoarizona',
686 'skip_download': 'm3u8',
698 def _real_extract(self
, url
):
699 channel
, video_id
= self
._match
_valid
_url
(url
).group('site', 'id')
700 webpage
= self
._download
_webpage
(url
, video_id
)
702 nbc_data
= self
._search
_json
(
703 r
'<script>\s*var\s+nbc\s*=', webpage
, 'NBC JSON data', video_id
)
704 pdk_acct
= nbc_data
.get('pdkAcct') or 'Yh1nAC'
705 fw_ssid
= traverse_obj(nbc_data
, ('video', 'fwSSID'))
707 video_data
= self
._search
_json
(
708 r
'data-videos="\[', webpage
, 'video data', video_id
, default
={}, transform_source
=unescapeHTML
)
709 video_data
.update(self
._search
_json
(
710 r
'data-meta="', webpage
, 'metadata', video_id
, default
={}, transform_source
=unescapeHTML
))
712 raise ExtractorError('No video metadata found in webpage', expected
=True)
714 info
, formats
, subtitles
= {}, [], {}
715 is_live
= int_or_none(video_data
.get('mpx_is_livestream')) == 1
717 'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
719 'fwsitesection': fw_ssid
,
720 'fwNetworkID': traverse_obj(nbc_data
, ('video', 'fwNetworkID'), default
='382114'),
721 'pprofile': 'ots_desktop_html',
722 'sensitive': 'false',
725 'mode': 'LIVE' if is_live
else 'on-demand',
732 player_id
= traverse_obj(video_data
, ((None, ('video', 'meta')), (
733 'mpx_m3upid', 'mpx_pid', 'pid_streaming_web_medium')), get_all
=False)
734 info
['title'] = f
'{channel} livestream'
737 player_id
= traverse_obj(video_data
, (
738 (None, ('video', 'meta')), ('pid_streaming_web_high', 'mpx_pid')), get_all
=False)
740 date_string
= traverse_obj(video_data
, 'date_string', 'date_gmt')
742 date_string
= self
._search
_regex
(
743 r
'datetime="([^"]+)"', date_string
, 'date string', fatal
=False)
745 date_string
= traverse_obj(
746 nbc_data
, ('dataLayer', 'adobe', ('prop70', 'eVar70', 'eVar59')), get_all
=False)
748 video_url
= traverse_obj(video_data
, ((None, ('video', 'meta')), 'mp4_url'), get_all
=False)
750 height
= self
._search
_regex
(r
'\d+-(\d+)p', url_basename(video_url
), 'height', default
=None)
754 'width': int_or_none(self
._RESOLUTIONS
.get(height
)),
755 'height': int_or_none(height
),
756 'format_id': 'http-mp4',
760 'title': video_data
.get('title') or traverse_obj(nbc_data
, (
761 'dataLayer', (None, 'adobe'), ('contenttitle', 'title', 'prop22')), get_all
=False),
763 traverse_obj(video_data
, 'summary', 'excerpt', 'video_hero_text')
764 or clean_html(traverse_obj(nbc_data
, ('dataLayer', 'summary'))),
765 'timestamp': unified_timestamp(date_string
),
769 if player_id
and fw_ssid
:
770 smil
= self
._download
_xml
(
771 f
'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id
,
772 note
='Downloading SMIL data', query
=query
, fatal
=is_live
)
774 manifest_url
= xpath_attr(smil
, f
'.//{{{default_ns}}}video', 'src', fatal
=is_live
)
775 subtitles
= self
._parse
_smil
_subtitles
(smil
, default_ns
)
776 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
777 manifest_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=is_live
,
778 live
=is_live
, errnote
='No HLS formats found')
780 self
._merge
_subtitles
(subs
, target
=subtitles
)
783 self
.raise_no_formats('No video content found in webpage', expected
=True)
786 self
._request
_webpage
(
787 HEADRequest(formats
[0]['url']), video_id
, note
='Checking live status')
788 except ExtractorError
:
789 raise UserNotLive(video_id
=channel
)
794 'channel_id': nbc_data
.get('callLetters'),
795 'uploader': nbc_data
.get('on_air_name'),
797 'subtitles': subtitles
,