5 from .common
import InfoExtractor
6 from .theplatform
import ThePlatformIE
, default_ns
7 from .adobepass
import AdobePassIE
8 from ..compat
import compat_urllib_parse_unquote
9 from ..networking
import HEADRequest
32 class NBCIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
33 _VALID_URL
= r
'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
37 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
41 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
42 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
43 'timestamp': 1424246400,
44 'upload_date': '20150218',
45 'uploader': 'NBCU-COM',
46 'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
50 'series': 'Tonight Show: Jimmy Fallon',
52 'chapters': 'count:1',
54 'thumbnail': r
're:https?://.+\.jpg',
57 'skip_download': 'm3u8',
61 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
65 'title': 'Star Wars Teaser',
66 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
67 'timestamp': 1417852800,
68 'upload_date': '20141206',
69 'uploader': 'NBCU-COM',
71 'skip': 'page not found',
74 # HLS streams requires the 'hdnea3' cookie
75 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
77 'id': '101528f5a9e8127b107e98c5e6ce4638',
80 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
81 'timestamp': 1237100400,
82 'upload_date': '20090315',
83 'uploader': 'NBCU-COM',
85 'skip': 'page not found',
88 # manifest url does not have extension
89 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
93 'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
94 'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
96 'season': 'Season 75',
98 'series': 'The Golden Globe Awards',
99 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
100 'uploader': 'NBCU-COM',
101 'upload_date': '20180107',
102 'timestamp': 1515312000,
105 'thumbnail': r
're:https?://.+\.jpg',
106 'chapters': 'count:1',
109 'skip_download': 'm3u8',
113 # new video_id format
114 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
116 'id': 'NBCE125189978',
118 'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
119 'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
120 'uploader': 'NBCU-COM',
121 'series': 'Quantum Leap',
122 'season': 'Season 1',
124 'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
128 'timestamp': 1663956155,
129 'upload_date': '20220923',
132 'thumbnail': r
're:https?://.+\.jpg',
135 'skip_download': 'm3u8',
139 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
140 'only_matching': True,
143 # Percent escaped url
144 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
145 'only_matching': True,
149 def _real_extract(self
, url
):
150 permalink
, video_id
= self
._match
_valid
_url
(url
).groups()
151 permalink
= 'http' + compat_urllib_parse_unquote(permalink
)
152 video_data
= self
._download
_json
(
153 'https://friendship.nbc.co/v2/graphql', video_id
, query
={
154 'query': '''query bonanzaPage(
155 $app: NBCUBrands! = nbc
158 $platform: SupportedPlatforms! = web
159 $type: EntityPageType! = VIDEO
171 ... on VideoPageData {
187 'variables': json
.dumps({
192 })['data']['bonanzaPage']['metadata']
196 'switch': 'HLSServiceSecure',
198 video_id
= video_data
['mpxGuid']
199 tp_path
= 'NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
)
200 tpm
= self
._download
_theplatform
_metadata
(tp_path
, video_id
)
201 title
= tpm
.get('title') or video_data
.get('secondaryTitle')
202 if video_data
.get('locked'):
203 resource
= self
._get
_mvpd
_resource
(
204 video_data
.get('resourceId') or 'nbcentertainment',
205 title
, video_id
, video_data
.get('rating'))
206 query
['auth'] = self
._extract
_mvpd
_auth
(
207 url
, video_id
, 'nbcentertainment', resource
)
208 theplatform_url
= smuggle_url(update_url_query(
209 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
),
210 query
), {'force_smil_url': True}
)
212 # Empty string or 0 can be valid values for these. So the check must be `is None`
213 description
= video_data
.get('description')
214 if description
is None:
215 description
= tpm
.get('description')
216 episode_number
= int_or_none(video_data
.get('episodeNumber'))
217 if episode_number
is None:
218 episode_number
= int_or_none(tpm
.get('nbcu$airOrder'))
219 rating
= video_data
.get('rating')
221 try_get(tpm
, lambda x
: x
['ratings'][0]['rating'])
222 season_number
= int_or_none(video_data
.get('seasonNumber'))
223 if season_number
is None:
224 season_number
= int_or_none(tpm
.get('nbcu$seasonNumber'))
225 series
= video_data
.get('seriesShortTitle')
227 series
= tpm
.get('nbcu$seriesShortTitle')
228 tags
= video_data
.get('keywords')
229 if tags
is None or len(tags
) == 0:
230 tags
= tpm
.get('keywords')
233 '_type': 'url_transparent',
234 'age_limit': parse_age_limit(rating
),
235 'description': description
,
237 'episode_number': episode_number
,
239 'ie_key': 'ThePlatform',
240 'season_number': season_number
,
244 'url': theplatform_url
,
248 class NBCSportsVPlayerIE(InfoExtractor
):
249 _VALID_URL_BASE
= r
'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
250 _VALID_URL
= _VALID_URL_BASE
+ r
'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
251 _EMBED_REGEX
= [r
'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE
]
254 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
256 'id': '9CsDKds0kvHI',
258 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
259 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
260 'timestamp': 1426270238,
261 'upload_date': '20150313',
262 'uploader': 'NBCU-SPORTS',
265 'thumbnail': r
're:^https?://.*\.jpg$'
268 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2',
269 'only_matching': True,
271 'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
272 'only_matching': True,
275 def _real_extract(self
, url
):
276 video_id
= self
._match
_id
(url
)
277 webpage
= self
._download
_webpage
(url
, video_id
)
278 theplatform_url
= self
._html
_search
_regex
(r
'tp:releaseUrl="(.+?)"', webpage
, 'url')
279 return self
.url_result(theplatform_url
, 'ThePlatform')
282 class NBCSportsIE(InfoExtractor
):
283 _VALID_URL
= r
'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
287 'url': 'https://www.nbcsports.com/watch/nfl/profootballtalk/pft-pm/unpacking-addisons-reckless-driving-citation',
289 'id': 'PHJSaFWbrTY9',
291 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
292 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
293 'uploader': 'NBCU-SPORTS',
294 'upload_date': '20150330',
295 'timestamp': 1427726529,
297 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg',
302 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
303 'only_matching': True,
306 'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
307 'only_matching': True,
310 def _real_extract(self
, url
):
311 video_id
= self
._match
_id
(url
)
312 webpage
= self
._download
_webpage
(url
, video_id
)
313 return self
.url_result(
314 NBCSportsVPlayerIE
._extract
_url
(webpage
), 'NBCSportsVPlayer')
317 class NBCSportsStreamIE(AdobePassIE
):
318 _VALID_URL
= r
'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
320 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
324 'title': 'Amgen Tour of California Women\'s Recap',
325 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
329 'skip_download': True,
331 'skip': 'Requires Adobe Pass Authentication',
334 def _real_extract(self
, url
):
335 video_id
= self
._match
_id
(url
)
336 live_source
= self
._download
_json
(
337 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id
,
339 video_source
= live_source
['videoSources'][0]
340 title
= video_source
['title']
342 for k
in ('source', 'msl4source', 'iossource', 'hlsv4'):
344 source_url
= video_source
.get(sk
) or video_source
.get(sk
+ 'Alt')
348 source_url
= video_source
['ottStreamUrl']
349 is_live
= video_source
.get('type') == 'live' or video_source
.get('status') == 'Live'
350 resource
= self
._get
_mvpd
_resource
('nbcsports', title
, video_id
, '')
351 token
= self
._extract
_mvpd
_auth
(url
, video_id
, 'nbcsports', resource
)
352 tokenized_url
= self
._download
_json
(
353 'https://token.playmakerservices.com/cdn',
354 video_id
, data
=json
.dumps({
355 'requestorId': 'nbcsports',
357 'application': 'NBCSports',
359 'platform': 'desktop',
361 'url': video_source
['sourceUrl'],
362 'token': base64
.b64encode(token
.encode()).decode(),
363 'resourceId': base64
.b64encode(resource
.encode()).decode(),
364 }).encode())['tokenizedUrl']
365 formats
= self
._extract
_m
3u8_formats
(tokenized_url
, video_id
, 'mp4')
369 'description': live_source
.get('description'),
375 class NBCNewsIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
376 _VALID_URL
= r
'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
377 _EMBED_REGEX
= [r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//www\
.nbcnews\
.com
/widget
/video
-embed
/[^
"\']+)\1']
381 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
382 'md5': 'fb3dcd2d7b1dd9804305fa2fc95ab610', # md5 tends to fluctuate
384 'id': '269389891880',
386 'title': 'How Twitter Reacted To The Snowden Interview',
387 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
388 'timestamp': 1401363060,
389 'upload_date': '20140529',
391 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/140529/p_tweet_snow_140529.jpg',
395 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
396 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
398 'id': '529953347624',
400 'title': 'FULL EPISODE: Family Business',
401 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
403 'skip': 'This page is unavailable.',
406 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
407 'md5': '40d0e48c68896359c80372306ece0fc3',
409 'id': '394064451844',
411 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
412 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
413 'timestamp': 1423104900,
414 'upload_date': '20150205',
416 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/nn_netcast_150204.jpg',
420 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
421 'md5': 'ffb59bcf0733dc3c7f0ace907f5e3939',
425 'title': "Volkswagen U
.S
. Chief
: We
'Totally Screwed Up'",
426 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
427 'upload_date': '20150922',
428 'timestamp': 1442917800,
430 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/x_lon_vwhorn_150922.jpg',
434 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
435 'md5': '693d1fa21d23afcc9b04c66b227ed9ff',
437 'id': '669831235788',
439 'title': 'See the aurora borealis from space in stunning new NASA video',
440 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
441 'upload_date': '20160420',
442 'timestamp': 1461152093,
444 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/201604/2016-04-20T11-35-09-133Z--1280x720.jpg',
448 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
449 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
451 'id': '314487875924',
453 'title': 'The chaotic GOP immigration vote',
454 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
455 'thumbnail': r're:^https?://.*\.jpg$',
456 'timestamp': 1406937606,
457 'upload_date': '20140802',
462 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
463 'only_matching': True,
466 # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
467 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
468 'only_matching': True,
472 def _real_extract(self, url):
473 video_id = self._match_id(url)
474 webpage = self._download_webpage(url, video_id)
476 data = self._search_nextjs_data(webpage, video_id)['props']['initialState']
477 video_data = try_get(data, lambda x: x['video']['current'], dict)
479 video_data = data['article']['content'][0]['primaryMedia']['video']
480 title = video_data['headline']['primary']
483 for va in video_data.get('videoAssets', []):
484 public_url = va.get('publicUrl')
487 if '://link.theplatform.com/' in public_url:
488 public_url = update_url_query(public_url, {'format': 'redirect'})
489 format_id = va.get('format')
490 if format_id == 'M3U':
491 formats.extend(self._extract_m3u8_formats(
492 public_url, video_id, 'mp4', 'm3u8_native',
493 m3u8_id=format_id, fatal=False))
495 tbr = int_or_none(va.get('bitrate'), 1000)
497 format_id += '-%d' % tbr
499 'format_id': format_id,
501 'width': int_or_none(va.get('width')),
502 'height': int_or_none(va.get('height')),
508 closed_captioning = video_data.get('closedCaptioning')
509 if closed_captioning:
510 for cc_url in closed_captioning.values():
513 subtitles.setdefault('en', []).append({
520 'description': try_get(video_data, lambda x: x['description']['primary']),
521 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
522 'duration': parse_duration(video_data.get('duration')),
523 'timestamp': unified_timestamp(video_data.get('datePublished')),
525 'subtitles': subtitles,
529 class NBCOlympicsIE(InfoExtractor):
530 IE_NAME = 'nbcolympics'
531 _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
534 # Geo-restricted to US
535 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
536 'md5': '54fecf846d05429fbaa18af557ee523a',
538 'id': 'WjTBzDXx5AUq',
539 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
541 'title': 'Rose\'s son Leo was in tears after his dad won gold',
542 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
543 'timestamp': 1471274964,
544 'upload_date': '20160815',
545 'uploader': 'NBCU-SPORTS',
547 'skip': '404 Not Found',
550 def _real_extract(self, url):
551 display_id = self._match_id(url)
553 webpage = self._download_webpage(url, display_id)
556 drupal_settings = self._parse_json(self._search_regex(
557 r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
558 webpage, 'drupal settings'), display_id)
560 iframe_url = drupal_settings['vod']['iframe_url']
561 theplatform_url = iframe_url.replace(
562 'vplayer.nbcolympics.com', 'player.theplatform.com')
563 except RegexNotFoundError:
564 theplatform_url = self._search_regex(
565 r"([\"'])embedUrl\1: *([\"'])(?P
<embedUrl
>.+)\
2",
566 webpage, 'embedding URL', group="embedUrl
")
569 '_type': 'url_transparent',
570 'url': theplatform_url,
571 'ie_key': ThePlatformIE.ie_key(),
572 'display_id': display_id,
576 class NBCOlympicsStreamIE(AdobePassIE):
577 IE_NAME = 'nbcolympics:stream'
578 _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
581 'note': 'Tokenized m3u8 source URL',
582 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
586 'title': r"re
:Women
's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
589 'skip_download
': 'm3u8
',
591 'skip
': 'Livestream
',
593 'note
': 'Plain m3u8 source URL
',
594 'url
': 'https
://stream
.nbcolympics
.com
/gymnastics
-event
-finals
-mens
-floor
-pommel
-horse
-womens
-vault
-bars
',
598 'title
': r're
:Event Finals
: M Floor
, W Vault
, M Pommel
, W Uneven Bars
[0-9]{4}
-[0-9]{2}
-[0-9]{2}
[0-9]{2}
:[0-9]{2}$
',
601 'skip_download
': 'm3u8
',
603 'skip
': 'Livestream
',
607 def _real_extract(self, url):
608 display_id = self._match_id(url)
609 webpage = self._download_webpage(url, display_id)
610 pid = self._search_regex(r'pid\s
*=\s
*(\d
+);', webpage, 'pid
')
612 event_config = self._download_json(
613 f'http
://stream
.nbcolympics
.com
/data
/event_config_{pid}
.json
',
614 pid, 'Downloading event config
')['eventConfig
']
616 title = event_config['eventTitle
']
617 is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus
'))
619 source_url = self._download_json(
620 f'https
://api
-leap
.nbcsports
.com
/feeds
/assets
/{pid}?application
=NBCOlympics
&platform
=desktop
&format
=nbc
-player
&env
=staging
',
621 pid, 'Downloading leap config
'
622 )['videoSources
'][0]['cdnSources
']['primary
'][0]['sourceUrl
']
624 if event_config.get('cdnToken
'):
625 ap_resource = self._get_mvpd_resource(
626 event_config.get('resourceId
', 'NBCOlympics
'),
627 re.sub(r'[^\w\d
]+', '', event_config['eventTitle
']), pid,
628 event_config.get('ratingId
', 'NO VALUE
'))
629 media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId
', 'NBCOlympics
'), ap_resource)
631 source_url = self._download_json(
632 'https
://tokens
.playmakerservices
.com
/', pid, 'Retrieving tokenized URL
',
634 'application
': 'NBCSports
',
635 'authentication
-type': 'adobe
-pass',
638 'platform
': 'desktop
',
639 'requestorId
': 'NBCOlympics
',
640 'resourceId
': base64.b64encode(ap_resource.encode()).decode(),
641 'token
': base64.b64encode(media_token.encode()).decode(),
645 )['akamai
'][0]['tokenizedUrl
']
647 formats = self._extract_m3u8_formats(source_url, pid, 'mp4
', live=is_live)
649 # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
650 # download with ffmpeg without this option
651 f['downloader_options
'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
655 'display_id
': display_id,
662 class NBCStationsIE(InfoExtractor):
663 _DOMAIN_RE = '|
'.join(map(re.escape, (
664 'nbcbayarea
', 'nbcboston
', 'nbcchicago
', 'nbcconnecticut
', 'nbcdfw
', 'nbclosangeles
',
665 'nbcmiami
', 'nbcnewyork
', 'nbcphiladelphia
', 'nbcsandiego
', 'nbcwashington
',
666 'necn
', 'telemundo52
', 'telemundoarizona
', 'telemundochicago
', 'telemundonuevainglaterra
',
668 _VALID_URL = rf'https?
://(?
:www\
.)?
(?P
<site
>{_DOMAIN_RE}
)\
.com
/(?
:[^
/?
#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
671 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
675 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
676 'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
678 'timestamp': 1661135892,
679 'upload_date': '20220822',
681 'channel_id': 'KNBC',
682 'channel': 'nbclosangeles',
685 'skip_download': 'm3u8',
688 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
692 'title': 'Huracán complica que televidente de Tucson reciba reembolso',
693 'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
695 'timestamp': 1660886507,
696 'upload_date': '20220819',
697 'uploader': 'Telemundo Arizona',
698 'channel_id': 'KTAZ',
699 'channel': 'telemundoarizona',
702 'skip_download': 'm3u8',
706 'url': 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/',
707 'md5': '9bf8c41dc7abbb75b1a44f1491a4cc85',
711 'title': 'Highs Near Freezing in Boston on Wednesday',
712 'description': 'md5:3ec486609a926c99f00a3512e6c0e85b',
714 'timestamp': 1675268656,
715 'upload_date': '20230201',
717 'channel_id': 'WBTS',
718 'channel': 'nbcboston',
730 def _real_extract(self
, url
):
731 channel
, video_id
= self
._match
_valid
_url
(url
).group('site', 'id')
732 webpage
= self
._download
_webpage
(url
, video_id
)
734 nbc_data
= self
._search
_json
(
735 r
'<script>\s*var\s+nbc\s*=', webpage
, 'NBC JSON data', video_id
)
736 pdk_acct
= nbc_data
.get('pdkAcct') or 'Yh1nAC'
737 fw_ssid
= traverse_obj(nbc_data
, ('video', 'fwSSID'))
739 video_data
= self
._search
_json
(
740 r
'data-videos="\[', webpage
, 'video data', video_id
, default
={}, transform_source
=unescapeHTML
)
741 video_data
.update(self
._search
_json
(
742 r
'data-meta="', webpage
, 'metadata', video_id
, default
={}, transform_source
=unescapeHTML
))
744 raise ExtractorError('No video metadata found in webpage', expected
=True)
746 info
, formats
= {}, []
747 is_live
= int_or_none(video_data
.get('mpx_is_livestream')) == 1
749 'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
751 'fwsitesection': fw_ssid
,
752 'fwNetworkID': traverse_obj(nbc_data
, ('video', 'fwNetworkID'), default
='382114'),
753 'pprofile': 'ots_desktop_html',
754 'sensitive': 'false',
757 'mode': 'LIVE' if is_live
else 'on-demand',
764 player_id
= traverse_obj(video_data
, ((None, ('video', 'meta')), (
765 'mpx_m3upid', 'mpx_pid', 'pid_streaming_web_medium')), get_all
=False)
766 info
['title'] = f
'{channel} livestream'
769 player_id
= traverse_obj(video_data
, (
770 (None, ('video', 'meta')), ('pid_streaming_web_high', 'mpx_pid')), get_all
=False)
772 date_string
= traverse_obj(video_data
, 'date_string', 'date_gmt')
774 date_string
= self
._search
_regex
(
775 r
'datetime="([^"]+)"', date_string
, 'date string', fatal
=False)
777 date_string
= traverse_obj(
778 nbc_data
, ('dataLayer', 'adobe', ('prop70', 'eVar70', 'eVar59')), get_all
=False)
780 video_url
= traverse_obj(video_data
, ((None, ('video', 'meta')), 'mp4_url'), get_all
=False)
782 ext
= determine_ext(video_url
)
783 height
= self
._search
_regex
(r
'\d+-(\d+)p', url_basename(video_url
), 'height', default
=None)
787 'width': int_or_none(self
._RESOLUTIONS
.get(height
)),
788 'height': int_or_none(height
),
789 'format_id': f
'http-{ext}',
793 'title': video_data
.get('title') or traverse_obj(nbc_data
, (
794 'dataLayer', (None, 'adobe'), ('contenttitle', 'title', 'prop22')), get_all
=False),
796 traverse_obj(video_data
, 'summary', 'excerpt', 'video_hero_text')
797 or clean_html(traverse_obj(nbc_data
, ('dataLayer', 'summary'))),
798 'timestamp': unified_timestamp(date_string
),
802 if player_id
and fw_ssid
:
803 smil
= self
._download
_xml
(
804 f
'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id
,
805 note
='Downloading SMIL data', query
=query
, fatal
=is_live
)
806 subtitles
= self
._parse
_smil
_subtitles
(smil
, default_ns
) if smil
else {}
807 for video
in smil
.findall(self
._xpath
_ns
('.//video', default_ns
)) if smil
else []:
808 info
['duration'] = float_or_none(remove_end(video
.get('dur'), 'ms'), 1000)
809 video_src_url
= video
.get('src')
810 ext
= mimetype2ext(video
.get('type'), default
=determine_ext(video_src_url
))
812 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
813 video_src_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=is_live
,
814 live
=is_live
, errnote
='No HLS formats found')
816 self
._merge
_subtitles
(subs
, target
=subtitles
)
819 'url': video_src_url
,
820 'format_id': f
'https-{ext}',
822 'width': int_or_none(video
.get('width')),
823 'height': int_or_none(video
.get('height')),
827 self
.raise_no_formats('No video content found in webpage', expected
=True)
830 self
._request
_webpage
(
831 HEADRequest(formats
[0]['url']), video_id
, note
='Checking live status')
832 except ExtractorError
:
833 raise UserNotLive(video_id
=channel
)
838 'channel_id': nbc_data
.get('callLetters'),
839 'uploader': nbc_data
.get('on_air_name'),
841 'subtitles': subtitles
,