]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/nbc.py
5 import xml
. etree
. ElementTree
7 from . adobepass
import AdobePassIE
8 from . common
import InfoExtractor
9 from . theplatform
import ThePlatformIE
, default_ns
10 from .. networking
import HEADRequest
33 class NBCIE ( ThePlatformIE
): # XXX: Do not subclass from concrete IE
34 _VALID_URL
= r
'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
38 'url' : 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237' ,
42 'title' : 'Jimmy Fallon Surprises Fans at Ben & Jerry \' s' ,
43 'description' : 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry \' s scoop shop.' ,
44 'timestamp' : 1424246400 ,
45 'upload_date' : '20150218' ,
46 'uploader' : 'NBCU-COM' ,
47 'episode' : 'Jimmy Fallon Surprises Fans at Ben & Jerry \' s' ,
51 'series' : 'Tonight Show: Jimmy Fallon' ,
53 'chapters' : 'count:1' ,
55 'thumbnail' : r
're:https?://.+\.jpg' ,
56 'categories' : [ 'Series/The Tonight Show Starring Jimmy Fallon' ],
57 'media_type' : 'Full Episode' ,
60 'skip_download' : 'm3u8' ,
64 'url' : 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821' ,
68 'title' : 'Star Wars Teaser' ,
69 'description' : 'md5:0b40f9cbde5b671a7ff62fceccc4f442' ,
70 'timestamp' : 1417852800 ,
71 'upload_date' : '20141206' ,
72 'uploader' : 'NBCU-COM' ,
74 'skip' : 'page not found' ,
77 # HLS streams requires the 'hdnea3' cookie
78 'url' : 'http://www.nbc.com/Kings/video/goliath/n1806' ,
80 'id' : '101528f5a9e8127b107e98c5e6ce4638' ,
83 'description' : 'When an unknown soldier saves the life of the King \' s son in battle, he \' s thrust into the limelight and politics of the kingdom.' ,
84 'timestamp' : 1237100400 ,
85 'upload_date' : '20090315' ,
86 'uploader' : 'NBCU-COM' ,
88 'skip' : 'page not found' ,
91 # manifest url does not have extension
92 'url' : 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439' ,
96 'title' : 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes' ,
97 'episode' : 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes' ,
99 'season' : 'Season 75' ,
101 'series' : 'The Golden Globe Awards' ,
102 'description' : 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.' ,
103 'uploader' : 'NBCU-COM' ,
104 'upload_date' : '20180107' ,
105 'timestamp' : 1515312000 ,
108 'thumbnail' : r
're:https?://.+\.jpg' ,
109 'chapters' : 'count:1' ,
112 'skip_download' : 'm3u8' ,
116 # new video_id format
117 'url' : 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978' ,
119 'id' : 'NBCE125189978' ,
121 'title' : 'Ben \' s First Leap | NBC \' s Quantum Leap' ,
122 'description' : 'md5:a82762449b7ec4bb83291a7b355ebf8e' ,
123 'uploader' : 'NBCU-COM' ,
124 'series' : 'Quantum Leap' ,
125 'season' : 'Season 1' ,
127 'episode' : 'Ben \' s First Leap | NBC \' s Quantum Leap' ,
131 'timestamp' : 1663956155 ,
132 'upload_date' : '20220923' ,
135 'thumbnail' : r
're:https?://.+\.jpg' ,
136 'categories' : [ 'Series/Quantum Leap 2022' ],
137 'media_type' : 'Highlight' ,
140 'skip_download' : 'm3u8' ,
144 'url' : 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310' ,
145 'only_matching' : True ,
148 # Percent escaped url
149 'url' : 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189' ,
150 'only_matching' : True ,
154 def _real_extract ( self
, url
):
155 permalink
, video_id
= self
._ match
_ valid
_u rl
( url
). groups ()
156 permalink
= 'http' + urllib
. parse
. unquote ( permalink
)
157 video_data
= self
._ download
_ json
(
158 'https://friendship.nbc.co/v2/graphql' , video_id
, query
={
159 'query' : '''query bonanzaPage(
160 $app: NBCUBrands! = nbc
163 $platform: SupportedPlatforms! = web
164 $type: EntityPageType! = VIDEO
176 ... on VideoPageData {
192 'variables' : json
. dumps ({
197 })[ 'data' ][ 'bonanzaPage' ][ 'metadata' ]
201 'switch' : 'HLSServiceSecure' ,
203 video_id
= video_data
[ 'mpxGuid' ]
204 tp_path
= 'NnzsPC/media/guid/ {}/{} ' . format ( video_data
. get ( 'mpxAccountId' ) or '2410887629' , video_id
)
205 tpm
= self
._ download
_ theplatform
_ metadata
( tp_path
, video_id
)
206 title
= tpm
. get ( 'title' ) or video_data
. get ( 'secondaryTitle' )
207 if video_data
. get ( 'locked' ):
208 resource
= self
._ get
_ mvpd
_ resource
(
209 video_data
. get ( 'resourceId' ) or 'nbcentertainment' ,
210 title
, video_id
, video_data
. get ( 'rating' ))
211 query
[ 'auth' ] = self
._ extract
_ mvpd
_ auth
(
212 url
, video_id
, 'nbcentertainment' , resource
)
213 theplatform_url
= smuggle_url ( update_url_query (
214 'http://link.theplatform.com/s/NnzsPC/media/guid/ {}/{} ' . format ( video_data
. get ( 'mpxAccountId' ) or '2410887629' , video_id
),
215 query
), {'force_smil_url': True}
)
217 # Empty string or 0 can be valid values for these. So the check must be `is None`
218 description
= video_data
. get ( 'description' )
219 if description
is None :
220 description
= tpm
. get ( 'description' )
221 episode_number
= int_or_none ( video_data
. get ( 'episodeNumber' ))
222 if episode_number
is None :
223 episode_number
= int_or_none ( tpm
. get ( 'nbcu$airOrder' ))
224 rating
= video_data
. get ( 'rating' )
226 try_get ( tpm
, lambda x
: x
[ 'ratings' ][ 0 ][ 'rating' ])
227 season_number
= int_or_none ( video_data
. get ( 'seasonNumber' ))
228 if season_number
is None :
229 season_number
= int_or_none ( tpm
. get ( 'nbcu$seasonNumber' ))
230 series
= video_data
. get ( 'seriesShortTitle' )
232 series
= tpm
. get ( 'nbcu$seriesShortTitle' )
233 tags
= video_data
. get ( 'keywords' )
234 if tags
is None or len ( tags
) == 0 :
235 tags
= tpm
. get ( 'keywords' )
238 '_type' : 'url_transparent' ,
239 'age_limit' : parse_age_limit ( rating
),
240 'description' : description
,
242 'episode_number' : episode_number
,
244 'ie_key' : 'ThePlatform' ,
245 'season_number' : season_number
,
249 'url' : theplatform_url
,
253 class NBCSportsVPlayerIE ( InfoExtractor
):
254 _VALID_URL_BASE
= r
'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
255 _VALID_URL
= _VALID_URL_BASE
+ r
'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
256 _EMBED_REGEX
= [ rf
'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url> {_VALID_URL_BASE} [^ \" ]+)' ]
259 'url' : 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI' ,
261 'id' : '9CsDKds0kvHI' ,
263 'description' : 'md5:df390f70a9ba7c95ff1daace988f0d8d' ,
264 'title' : 'Tyler Kalinoski hits buzzer-beater to lift Davidson' ,
265 'timestamp' : 1426270238 ,
266 'upload_date' : '20150313' ,
267 'uploader' : 'NBCU-SPORTS' ,
270 'thumbnail' : r
're:^https?://.*\.jpg$' ,
273 'url' : 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2' ,
274 'only_matching' : True ,
276 'url' : 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true' ,
277 'only_matching' : True ,
280 def _real_extract ( self
, url
):
281 video_id
= self
._ match
_ id
( url
)
282 webpage
= self
._ download
_ webpage
( url
, video_id
)
283 theplatform_url
= self
._ html
_ search
_ regex
( r
'tp:releaseUrl="(.+?)"' , webpage
, 'url' )
284 return self
. url_result ( theplatform_url
, 'ThePlatform' )
287 class NBCSportsIE ( InfoExtractor
):
288 _VALID_URL
= r
'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
292 'url' : 'https://www.nbcsports.com/watch/nfl/profootballtalk/pft-pm/unpacking-addisons-reckless-driving-citation' ,
294 'id' : 'PHJSaFWbrTY9' ,
296 'title' : 'Tom Izzo, Michigan St. has \' so much respect \' for Duke' ,
297 'description' : 'md5:ecb459c9d59e0766ac9c7d5d0eda8113' ,
298 'uploader' : 'NBCU-SPORTS' ,
299 'upload_date' : '20150330' ,
300 'timestamp' : 1427726529 ,
302 'thumbnail' : 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg' ,
307 'url' : 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot' ,
308 'only_matching' : True ,
311 'url' : 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen' ,
312 'only_matching' : True ,
315 def _real_extract ( self
, url
):
316 video_id
= self
._ match
_ id
( url
)
317 webpage
= self
._ download
_ webpage
( url
, video_id
)
318 return self
. url_result (
319 NBCSportsVPlayerIE
._ extract
_u rl
( webpage
), 'NBCSportsVPlayer' )
322 class NBCSportsStreamIE ( AdobePassIE
):
323 _VALID_URL
= r
'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
325 'url' : 'http://stream.nbcsports.com/nbcsn/generic?pid=206559' ,
329 'title' : 'Amgen Tour of California Women \' s Recap' ,
330 'description' : 'md5:66520066b3b5281ada7698d0ea2aa894' ,
334 'skip_download' : True ,
336 'skip' : 'Requires Adobe Pass Authentication' ,
339 def _real_extract ( self
, url
):
340 video_id
= self
._ match
_ id
( url
)
341 live_source
= self
._ download
_ json
(
342 f
'http://stream.nbcsports.com/data/live_sources_ {video_id} .json' ,
344 video_source
= live_source
[ 'videoSources' ][ 0 ]
345 title
= video_source
[ 'title' ]
347 for k
in ( 'source' , 'msl4source' , 'iossource' , 'hlsv4' ):
349 source_url
= video_source
. get ( sk
) or video_source
. get ( sk
+ 'Alt' )
353 source_url
= video_source
[ 'ottStreamUrl' ]
354 is_live
= video_source
. get ( 'type' ) == 'live' or video_source
. get ( 'status' ) == 'Live'
355 resource
= self
._ get
_ mvpd
_ resource
( 'nbcsports' , title
, video_id
, '' )
356 token
= self
._ extract
_ mvpd
_ auth
( url
, video_id
, 'nbcsports' , resource
)
357 tokenized_url
= self
._ download
_ json
(
358 'https://token.playmakerservices.com/cdn' ,
359 video_id
, data
= json
. dumps ({
360 'requestorId' : 'nbcsports' ,
362 'application' : 'NBCSports' ,
364 'platform' : 'desktop' ,
366 'url' : video_source
[ 'sourceUrl' ],
367 'token' : base64
. b64encode ( token
. encode ()). decode (),
368 'resourceId' : base64
. b64encode ( resource
. encode ()). decode (),
369 }). encode ())[ 'tokenizedUrl' ]
370 formats
= self
._ extract
_ m
3u8_ formats
( tokenized_url
, video_id
, 'mp4' )
374 'description' : live_source
. get ( 'description' ),
380 class NBCNewsIE ( ThePlatformIE
): # XXX: Do not subclass from concrete IE
381 _VALID_URL
= r
'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
382 _EMBED_REGEX
= [ r
'<iframe[^>]+src=(["\' ])( ?P
< url
>( ?
: https?
:) ?
// www\
. nbcnews\
. com
/ widget
/ video
- embed
/[ ^
" \' ]+)\1']
386 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
387 'md5': 'fb3dcd2d7b1dd9804305fa2fc95ab610', # md5 tends to fluctuate
389 'id': '269389891880',
391 'title': 'How Twitter Reacted To The Snowden Interview',
392 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
393 'timestamp': 1401363060,
394 'upload_date': '20140529',
396 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/140529/p_tweet_snow_140529.jpg',
400 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
401 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
403 'id': '529953347624',
405 'title': 'FULL EPISODE: Family Business',
406 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
408 'skip': 'This page is unavailable.',
411 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
412 'md5': '40d0e48c68896359c80372306ece0fc3',
414 'id': '394064451844',
416 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
417 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
418 'timestamp': 1423104900,
419 'upload_date': '20150205',
421 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/nn_netcast_150204.jpg',
425 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
426 'md5': 'ffb59bcf0733dc3c7f0ace907f5e3939',
430 'title': " Volkswagen U
. S
. Chief
: We
'Totally Screwed Up' ",
431 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
432 'upload_date': '20150922',
433 'timestamp': 1442917800,
435 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/x_lon_vwhorn_150922.jpg',
439 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
440 'md5': '693d1fa21d23afcc9b04c66b227ed9ff',
442 'id': '669831235788',
444 'title': 'See the aurora borealis from space in stunning new NASA video',
445 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
446 'upload_date': '20160420',
447 'timestamp': 1461152093,
449 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/201604/2016-04-20T11-35-09-133Z--1280x720.jpg',
453 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
454 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
456 'id': '314487875924',
458 'title': 'The chaotic GOP immigration vote',
459 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
460 'thumbnail': r're:^https?://.*\.jpg$',
461 'timestamp': 1406937606,
462 'upload_date': '20140802',
467 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
468 'only_matching': True,
471 # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
472 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
473 'only_matching': True,
477 def _real_extract(self, url):
478 video_id = self._match_id(url)
479 webpage = self._download_webpage(url, video_id)
481 data = self._search_nextjs_data(webpage, video_id)['props']['initialState']
482 video_data = try_get(data, lambda x: x['video']['current'], dict)
484 video_data = data['article']['content'][0]['primaryMedia']['video']
485 title = video_data['headline']['primary']
488 for va in video_data.get('videoAssets', []):
489 public_url = va.get('publicUrl')
492 if '://link.theplatform.com/' in public_url:
493 public_url = update_url_query(public_url, {'format': 'redirect'} )
494 format_id = va.get('format')
495 if format_id == 'M3U':
496 formats.extend(self._extract_m3u8_formats(
497 public_url, video_id, 'mp4', 'm3u8_native',
498 m3u8_id=format_id, fatal=False))
500 tbr = int_or_none(va.get('bitrate'), 1000)
502 format_id += f'- {tbr} '
504 'format_id': format_id,
506 'width': int_or_none(va.get('width')),
507 'height': int_or_none(va.get('height')),
513 closed_captioning = video_data.get('closedCaptioning')
514 if closed_captioning:
515 for cc_url in closed_captioning.values():
518 subtitles.setdefault('en', []).append({
525 'description': try_get(video_data, lambda x: x['description']['primary']),
526 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
527 'duration': parse_duration(video_data.get('duration')),
528 'timestamp': unified_timestamp(video_data.get('datePublished')),
530 'subtitles': subtitles,
534 class NBCOlympicsIE(InfoExtractor):
535 IE_NAME = 'nbcolympics'
536 _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
539 # Geo-restricted to US
540 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
541 'md5': '54fecf846d05429fbaa18af557ee523a',
543 'id': 'WjTBzDXx5AUq',
544 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
546 'title': 'Rose \' s son Leo was in tears after his dad won gold',
547 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men \' s golf has already had on his children.',
548 'timestamp': 1471274964,
549 'upload_date': '20160815',
550 'uploader': 'NBCU-SPORTS',
552 'skip': '404 Not Found',
555 def _real_extract(self, url):
556 display_id = self._match_id(url)
558 webpage = self._download_webpage(url, display_id)
561 drupal_settings = self._parse_json(self._search_regex(
562 r'jQuery\.extend\(Drupal\.settings\s*,\s*( {.+?} )\);',
563 webpage, 'drupal settings'), display_id)
565 iframe_url = drupal_settings['vod']['iframe_url']
566 theplatform_url = iframe_url.replace(
567 'vplayer.nbcolympics.com', 'player.theplatform.com')
568 except RegexNotFoundError:
569 theplatform_url = self._search_regex(
570 r" ([ \" '])embedUrl\1: *([ \" ' ])( ?P
< embedUrl
>.+) \
2 ",
571 webpage, 'embedding URL', group='embedUrl')
574 '_type': 'url_transparent',
575 'url': theplatform_url,
576 'ie_key': ThePlatformIE.ie_key(),
577 'display_id': display_id,
581 class NBCOlympicsStreamIE(AdobePassIE):
582 IE_NAME = 'nbcolympics:stream'
583 _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
586 'note': 'Tokenized m3u8 source URL',
587 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
591 'title': r" re
: Women
's Group Stage - Netherlands vs\. Brazil [0-9] {4} -[0-9] {2} -[0-9] {2} [0-9] {2} :[0-9] {2} $",
594 ' skip_download
': ' m3u8
',
596 ' skip
': ' Livestream
',
598 ' note
': ' Plain m3u8 source URL
',
599 ' url
': ' https
:// stream
. nbcolympics
. com
/ gymnastics
- event
- finals
- mens
- floor
- pommel
- horse
- womens
- vault
- bars
',
603 ' title
': r' re
: Event Finals
: M Floor
, W Vault
, M Pommel
, W Uneven Bars
[ 0 - 9 ] {4}
-[ 0 - 9 ] {2}
-[ 0 - 9 ] {2}
[ 0 - 9 ] {2}
:[ 0 - 9 ] {2}$
',
606 ' skip_download
': ' m3u8
',
608 ' skip
': ' Livestream
',
612 def _real_extract(self, url):
613 display_id = self._match_id(url)
614 webpage = self._download_webpage(url, display_id)
615 pid = self._search_regex(r' pid\s
*= \s
*( \d
+); ', webpage, ' pid
')
617 event_config = self._download_json(
618 f' http
:// stream
. nbcolympics
. com
/ data
/ event_config_{pid}
. json
',
619 pid, ' Downloading event config
')[' eventConfig
']
621 title = event_config[' eventTitle
']
622 is_live = {'live': True, 'replay': False} .get(event_config.get(' eventStatus
'))
624 source_url = self._download_json(
625 f' https
:// api
- leap
. nbcsports
. com
/ feeds
/ assets
/ {pid}?application
= NBCOlympics
& platform
= desktop
& format
= nbc
- player
& env
= staging
',
626 pid, ' Downloading leap config
',
627 )[' videoSources
'][0][' cdnSources
'][' primary
'][0][' sourceUrl
']
629 if event_config.get(' cdnToken
'):
630 ap_resource = self._get_mvpd_resource(
631 event_config.get(' resourceId
', ' NBCOlympics
'),
632 re.sub(r' [ ^\w\d
]+ ', ' ', event_config[' eventTitle
']), pid,
633 event_config.get(' ratingId
', ' NO VALUE
'))
634 media_token = self._extract_mvpd_auth(url, pid, event_config.get(' requestorId
', ' NBCOlympics
'), ap_resource)
636 source_url = self._download_json(
637 ' https
:// tokens
. playmakerservices
. com
/ ', pid, ' Retrieving tokenized URL
',
639 ' application
': ' NBCSports
',
640 ' authentication
- type ': ' adobe
- pass ',
643 ' platform
': ' desktop
',
644 ' requestorId
': ' NBCOlympics
',
645 ' resourceId
': base64.b64encode(ap_resource.encode()).decode(),
646 ' token
': base64.b64encode(media_token.encode()).decode(),
650 )[' akamai
'][0][' tokenizedUrl
']
652 formats = self._extract_m3u8_formats(source_url, pid, ' mp4
', live=is_live)
654 # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
655 # download with ffmpeg without this option
656 f[' downloader_options
'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
660 ' display_id
': display_id,
667 class NBCStationsIE(InfoExtractor):
668 _DOMAIN_RE = ' |
'.join(map(re.escape, (
669 ' nbcbayarea
', ' nbcboston
', ' nbcchicago
', ' nbcconnecticut
', ' nbcdfw
', ' nbclosangeles
',
670 ' nbcmiami
', ' nbcnewyork
', ' nbcphiladelphia
', ' nbcsandiego
', ' nbcwashington
',
671 ' necn
', ' telemundo52
', ' telemundoarizona
', ' telemundochicago
', ' telemundonuevainglaterra
',
673 _VALID_URL = rf' https?
://( ?
: www\
.) ?
( ?P
< site
> {_DOMAIN_RE}
) \
. com
/( ?
:[ ^
/ ?
#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
676 'url' : 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/' ,
680 'title' : 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory' ,
681 'description' : 'md5:417ed3c2d91fe9d301e6db7b0942f182' ,
683 'timestamp' : 1661135892 ,
684 'upload_date' : '20220822' ,
686 'channel_id' : 'KNBC' ,
687 'channel' : 'nbclosangeles' ,
690 'skip_download' : 'm3u8' ,
693 'url' : 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/' ,
697 'title' : 'Huracán complica que televidente de Tucson reciba reembolso' ,
698 'description' : 'md5:af298dc73aab74d4fca6abfb12acb6cf' ,
700 'timestamp' : 1660886507 ,
701 'upload_date' : '20220819' ,
702 'uploader' : 'Telemundo Arizona' ,
703 'channel_id' : 'KTAZ' ,
704 'channel' : 'telemundoarizona' ,
707 'skip_download' : 'm3u8' ,
711 'url' : 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/' ,
712 'md5' : '9bf8c41dc7abbb75b1a44f1491a4cc85' ,
716 'title' : 'Highs Near Freezing in Boston on Wednesday' ,
717 'description' : 'md5:3ec486609a926c99f00a3512e6c0e85b' ,
719 'timestamp' : 1675268656 ,
720 'upload_date' : '20230201' ,
722 'channel_id' : 'WBTS' ,
723 'channel' : 'nbcboston' ,
735 def _real_extract ( self
, url
):
736 channel
, video_id
= self
._ match
_ valid
_u rl
( url
). group ( 'site' , 'id' )
737 webpage
= self
._ download
_ webpage
( url
, video_id
)
739 nbc_data
= self
._ search
_ json
(
740 r
'<script>\s*var\s+nbc\s*=' , webpage
, 'NBC JSON data' , video_id
)
741 pdk_acct
= nbc_data
. get ( 'pdkAcct' ) or 'Yh1nAC'
742 fw_ssid
= traverse_obj ( nbc_data
, ( 'video' , 'fwSSID' ))
744 video_data
= self
._ search
_ json
(
745 r
'data-videos="\[' , webpage
, 'video data' , video_id
, default
={}, transform_source
= unescapeHTML
)
746 video_data
. update ( self
._ search
_ json
(
747 r
'data-meta="' , webpage
, 'metadata' , video_id
, default
={}, transform_source
= unescapeHTML
))
749 raise ExtractorError ( 'No video metadata found in webpage' , expected
= True )
751 info
, formats
= {}, []
752 is_live
= int_or_none ( video_data
. get ( 'mpx_is_livestream' )) == 1
754 'formats' : 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3' ,
756 'fwsitesection' : fw_ssid
,
757 'fwNetworkID' : traverse_obj ( nbc_data
, ( 'video' , 'fwNetworkID' ), default
= '382114' ),
758 'pprofile' : 'ots_desktop_html' ,
759 'sensitive' : 'false' ,
762 'mode' : 'LIVE' if is_live
else 'on-demand' ,
769 player_id
= traverse_obj ( video_data
, (( None , ( 'video' , 'meta' )), (
770 'mpx_m3upid' , 'mpx_pid' , 'pid_streaming_web_medium' )), get_all
= False )
771 info
[ 'title' ] = f
' {channel} livestream'
774 player_id
= traverse_obj ( video_data
, (
775 ( None , ( 'video' , 'meta' )), ( 'pid_streaming_web_high' , 'mpx_pid' )), get_all
= False )
777 date_string
= traverse_obj ( video_data
, 'date_string' , 'date_gmt' )
779 date_string
= self
._ search
_ regex
(
780 r
'datetime="([^"]+)"' , date_string
, 'date string' , fatal
= False )
782 date_string
= traverse_obj (
783 nbc_data
, ( 'dataLayer' , 'adobe' , ( 'prop70' , 'eVar70' , 'eVar59' )), get_all
= False )
785 video_url
= traverse_obj ( video_data
, (( None , ( 'video' , 'meta' )), 'mp4_url' ), get_all
= False )
787 ext
= determine_ext ( video_url
)
788 height
= self
._ search
_ regex
( r
'\d+-(\d+)p' , url_basename ( video_url
), 'height' , default
= None )
792 'width' : int_or_none ( self
._ RESOLUTIONS
. get ( height
)),
793 'height' : int_or_none ( height
),
794 'format_id' : f
'http- {ext} ' ,
798 'title' : video_data
. get ( 'title' ) or traverse_obj ( nbc_data
, (
799 'dataLayer' , ( None , 'adobe' ), ( 'contenttitle' , 'title' , 'prop22' )), get_all
= False ),
801 traverse_obj ( video_data
, 'summary' , 'excerpt' , 'video_hero_text' )
802 or clean_html ( traverse_obj ( nbc_data
, ( 'dataLayer' , 'summary' ))),
803 'timestamp' : unified_timestamp ( date_string
),
807 if player_id
and fw_ssid
:
808 smil
= self
._ download
_ xml
(
809 f
'https://link.theplatform.com/s/ {pdk_acct} / {player_id} ' , video_id
,
810 note
= 'Downloading SMIL data' , query
= query
, fatal
= is_live
)
811 if not isinstance ( smil
, xml
. etree
. ElementTree
. Element
):
813 subtitles
= self
._ parse
_ smil
_ subtitles
( smil
, default_ns
) if smil
is not None else {}
814 for video
in smil
. findall ( self
._ xpath
_ ns
( './/video' , default_ns
)) if smil
is not None else []:
815 info
[ 'duration' ] = float_or_none ( remove_end ( video
. get ( 'dur' ), 'ms' ), 1000 )
816 video_src_url
= video
. get ( 'src' )
817 ext
= mimetype2ext ( video
. get ( 'type' ), default
= determine_ext ( video_src_url
))
819 fmts
, subs
= self
._ extract
_ m
3u8_ formats
_ and
_ subtitles
(
820 video_src_url
, video_id
, 'mp4' , m3u8_id
= 'hls' , fatal
= is_live
,
821 live
= is_live
, errnote
= 'No HLS formats found' )
823 self
._ merge
_ subtitles
( subs
, target
= subtitles
)
826 'url' : video_src_url
,
827 'format_id' : f
'https- {ext} ' ,
829 'width' : int_or_none ( video
. get ( 'width' )),
830 'height' : int_or_none ( video
. get ( 'height' )),
834 self
. raise_no_formats ( 'No video content found in webpage' , expected
= True )
837 self
._ request
_ webpage
(
838 HEADRequest ( formats
[ 0 ][ 'url' ]), video_id
, note
= 'Checking live status' )
839 except ExtractorError
:
840 raise UserNotLive ( video_id
= channel
)
845 'channel_id' : nbc_data
. get ( 'callLetters' ),
846 'uploader' : nbc_data
. get ( 'on_air_name' ),
848 'subtitles' : subtitles
,