4 import xml
.etree
.ElementTree
6 from .common
import InfoExtractor
7 from .theplatform
import ThePlatformIE
, default_ns
8 from .adobepass
import AdobePassIE
9 from ..compat
import compat_urllib_parse_unquote
10 from ..networking
import HEADRequest
33 class NBCIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
34 _VALID_URL
= r
'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
38 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
42 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
43 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
44 'timestamp': 1424246400,
45 'upload_date': '20150218',
46 'uploader': 'NBCU-COM',
47 'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
51 'series': 'Tonight Show: Jimmy Fallon',
53 'chapters': 'count:1',
55 'thumbnail': r
're:https?://.+\.jpg',
56 'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
57 'media_type': 'Full Episode',
60 'skip_download': 'm3u8',
64 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
68 'title': 'Star Wars Teaser',
69 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
70 'timestamp': 1417852800,
71 'upload_date': '20141206',
72 'uploader': 'NBCU-COM',
74 'skip': 'page not found',
77 # HLS streams requires the 'hdnea3' cookie
78 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
80 'id': '101528f5a9e8127b107e98c5e6ce4638',
83 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
84 'timestamp': 1237100400,
85 'upload_date': '20090315',
86 'uploader': 'NBCU-COM',
88 'skip': 'page not found',
91 # manifest url does not have extension
92 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
96 'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
97 'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
99 'season': 'Season 75',
101 'series': 'The Golden Globe Awards',
102 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
103 'uploader': 'NBCU-COM',
104 'upload_date': '20180107',
105 'timestamp': 1515312000,
108 'thumbnail': r
're:https?://.+\.jpg',
109 'chapters': 'count:1',
112 'skip_download': 'm3u8',
116 # new video_id format
117 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
119 'id': 'NBCE125189978',
121 'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
122 'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
123 'uploader': 'NBCU-COM',
124 'series': 'Quantum Leap',
125 'season': 'Season 1',
127 'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
131 'timestamp': 1663956155,
132 'upload_date': '20220923',
135 'thumbnail': r
're:https?://.+\.jpg',
136 'categories': ['Series/Quantum Leap 2022'],
137 'media_type': 'Highlight',
140 'skip_download': 'm3u8',
144 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
145 'only_matching': True,
148 # Percent escaped url
149 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
150 'only_matching': True,
154 def _real_extract(self
, url
):
155 permalink
, video_id
= self
._match
_valid
_url
(url
).groups()
156 permalink
= 'http' + compat_urllib_parse_unquote(permalink
)
157 video_data
= self
._download
_json
(
158 'https://friendship.nbc.co/v2/graphql', video_id
, query
={
159 'query': '''query bonanzaPage(
160 $app: NBCUBrands! = nbc
163 $platform: SupportedPlatforms! = web
164 $type: EntityPageType! = VIDEO
176 ... on VideoPageData {
192 'variables': json
.dumps({
197 })['data']['bonanzaPage']['metadata']
201 'switch': 'HLSServiceSecure',
203 video_id
= video_data
['mpxGuid']
204 tp_path
= 'NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
)
205 tpm
= self
._download
_theplatform
_metadata
(tp_path
, video_id
)
206 title
= tpm
.get('title') or video_data
.get('secondaryTitle')
207 if video_data
.get('locked'):
208 resource
= self
._get
_mvpd
_resource
(
209 video_data
.get('resourceId') or 'nbcentertainment',
210 title
, video_id
, video_data
.get('rating'))
211 query
['auth'] = self
._extract
_mvpd
_auth
(
212 url
, video_id
, 'nbcentertainment', resource
)
213 theplatform_url
= smuggle_url(update_url_query(
214 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data
.get('mpxAccountId') or '2410887629', video_id
),
215 query
), {'force_smil_url': True}
)
217 # Empty string or 0 can be valid values for these. So the check must be `is None`
218 description
= video_data
.get('description')
219 if description
is None:
220 description
= tpm
.get('description')
221 episode_number
= int_or_none(video_data
.get('episodeNumber'))
222 if episode_number
is None:
223 episode_number
= int_or_none(tpm
.get('nbcu$airOrder'))
224 rating
= video_data
.get('rating')
226 try_get(tpm
, lambda x
: x
['ratings'][0]['rating'])
227 season_number
= int_or_none(video_data
.get('seasonNumber'))
228 if season_number
is None:
229 season_number
= int_or_none(tpm
.get('nbcu$seasonNumber'))
230 series
= video_data
.get('seriesShortTitle')
232 series
= tpm
.get('nbcu$seriesShortTitle')
233 tags
= video_data
.get('keywords')
234 if tags
is None or len(tags
) == 0:
235 tags
= tpm
.get('keywords')
238 '_type': 'url_transparent',
239 'age_limit': parse_age_limit(rating
),
240 'description': description
,
242 'episode_number': episode_number
,
244 'ie_key': 'ThePlatform',
245 'season_number': season_number
,
249 'url': theplatform_url
,
253 class NBCSportsVPlayerIE(InfoExtractor
):
254 _VALID_URL_BASE
= r
'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
255 _VALID_URL
= _VALID_URL_BASE
+ r
'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
256 _EMBED_REGEX
= [r
'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE
]
259 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
261 'id': '9CsDKds0kvHI',
263 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
264 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
265 'timestamp': 1426270238,
266 'upload_date': '20150313',
267 'uploader': 'NBCU-SPORTS',
270 'thumbnail': r
're:^https?://.*\.jpg$'
273 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2',
274 'only_matching': True,
276 'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
277 'only_matching': True,
280 def _real_extract(self
, url
):
281 video_id
= self
._match
_id
(url
)
282 webpage
= self
._download
_webpage
(url
, video_id
)
283 theplatform_url
= self
._html
_search
_regex
(r
'tp:releaseUrl="(.+?)"', webpage
, 'url')
284 return self
.url_result(theplatform_url
, 'ThePlatform')
287 class NBCSportsIE(InfoExtractor
):
288 _VALID_URL
= r
'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
292 'url': 'https://www.nbcsports.com/watch/nfl/profootballtalk/pft-pm/unpacking-addisons-reckless-driving-citation',
294 'id': 'PHJSaFWbrTY9',
296 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
297 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
298 'uploader': 'NBCU-SPORTS',
299 'upload_date': '20150330',
300 'timestamp': 1427726529,
302 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg',
307 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
308 'only_matching': True,
311 'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
312 'only_matching': True,
315 def _real_extract(self
, url
):
316 video_id
= self
._match
_id
(url
)
317 webpage
= self
._download
_webpage
(url
, video_id
)
318 return self
.url_result(
319 NBCSportsVPlayerIE
._extract
_url
(webpage
), 'NBCSportsVPlayer')
322 class NBCSportsStreamIE(AdobePassIE
):
323 _VALID_URL
= r
'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
325 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
329 'title': 'Amgen Tour of California Women\'s Recap',
330 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
334 'skip_download': True,
336 'skip': 'Requires Adobe Pass Authentication',
339 def _real_extract(self
, url
):
340 video_id
= self
._match
_id
(url
)
341 live_source
= self
._download
_json
(
342 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id
,
344 video_source
= live_source
['videoSources'][0]
345 title
= video_source
['title']
347 for k
in ('source', 'msl4source', 'iossource', 'hlsv4'):
349 source_url
= video_source
.get(sk
) or video_source
.get(sk
+ 'Alt')
353 source_url
= video_source
['ottStreamUrl']
354 is_live
= video_source
.get('type') == 'live' or video_source
.get('status') == 'Live'
355 resource
= self
._get
_mvpd
_resource
('nbcsports', title
, video_id
, '')
356 token
= self
._extract
_mvpd
_auth
(url
, video_id
, 'nbcsports', resource
)
357 tokenized_url
= self
._download
_json
(
358 'https://token.playmakerservices.com/cdn',
359 video_id
, data
=json
.dumps({
360 'requestorId': 'nbcsports',
362 'application': 'NBCSports',
364 'platform': 'desktop',
366 'url': video_source
['sourceUrl'],
367 'token': base64
.b64encode(token
.encode()).decode(),
368 'resourceId': base64
.b64encode(resource
.encode()).decode(),
369 }).encode())['tokenizedUrl']
370 formats
= self
._extract
_m
3u8_formats
(tokenized_url
, video_id
, 'mp4')
374 'description': live_source
.get('description'),
380 class NBCNewsIE(ThePlatformIE
): # XXX: Do not subclass from concrete IE
381 _VALID_URL
= r
'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
382 _EMBED_REGEX
= [r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//www\
.nbcnews\
.com
/widget
/video
-embed
/[^
"\']+)\1']
386 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
387 'md5': 'fb3dcd2d7b1dd9804305fa2fc95ab610', # md5 tends to fluctuate
389 'id': '269389891880',
391 'title': 'How Twitter Reacted To The Snowden Interview',
392 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
393 'timestamp': 1401363060,
394 'upload_date': '20140529',
396 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/140529/p_tweet_snow_140529.jpg',
400 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
401 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
403 'id': '529953347624',
405 'title': 'FULL EPISODE: Family Business',
406 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
408 'skip': 'This page is unavailable.',
411 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
412 'md5': '40d0e48c68896359c80372306ece0fc3',
414 'id': '394064451844',
416 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
417 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
418 'timestamp': 1423104900,
419 'upload_date': '20150205',
421 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/nn_netcast_150204.jpg',
425 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
426 'md5': 'ffb59bcf0733dc3c7f0ace907f5e3939',
430 'title': "Volkswagen U
.S
. Chief
: We
'Totally Screwed Up'",
431 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
432 'upload_date': '20150922',
433 'timestamp': 1442917800,
435 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/x_lon_vwhorn_150922.jpg',
439 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
440 'md5': '693d1fa21d23afcc9b04c66b227ed9ff',
442 'id': '669831235788',
444 'title': 'See the aurora borealis from space in stunning new NASA video',
445 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
446 'upload_date': '20160420',
447 'timestamp': 1461152093,
449 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/201604/2016-04-20T11-35-09-133Z--1280x720.jpg',
453 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
454 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
456 'id': '314487875924',
458 'title': 'The chaotic GOP immigration vote',
459 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
460 'thumbnail': r're:^https?://.*\.jpg$',
461 'timestamp': 1406937606,
462 'upload_date': '20140802',
467 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
468 'only_matching': True,
471 # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
472 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
473 'only_matching': True,
477 def _real_extract(self, url):
478 video_id = self._match_id(url)
479 webpage = self._download_webpage(url, video_id)
481 data = self._search_nextjs_data(webpage, video_id)['props']['initialState']
482 video_data = try_get(data, lambda x: x['video']['current'], dict)
484 video_data = data['article']['content'][0]['primaryMedia']['video']
485 title = video_data['headline']['primary']
488 for va in video_data.get('videoAssets', []):
489 public_url = va.get('publicUrl')
492 if '://link.theplatform.com/' in public_url:
493 public_url = update_url_query(public_url, {'format': 'redirect'})
494 format_id = va.get('format')
495 if format_id == 'M3U':
496 formats.extend(self._extract_m3u8_formats(
497 public_url, video_id, 'mp4', 'm3u8_native',
498 m3u8_id=format_id, fatal=False))
500 tbr = int_or_none(va.get('bitrate'), 1000)
502 format_id += '-%d' % tbr
504 'format_id': format_id,
506 'width': int_or_none(va.get('width')),
507 'height': int_or_none(va.get('height')),
513 closed_captioning = video_data.get('closedCaptioning')
514 if closed_captioning:
515 for cc_url in closed_captioning.values():
518 subtitles.setdefault('en', []).append({
525 'description': try_get(video_data, lambda x: x['description']['primary']),
526 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
527 'duration': parse_duration(video_data.get('duration')),
528 'timestamp': unified_timestamp(video_data.get('datePublished')),
530 'subtitles': subtitles,
534 class NBCOlympicsIE(InfoExtractor):
535 IE_NAME = 'nbcolympics'
536 _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
539 # Geo-restricted to US
540 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
541 'md5': '54fecf846d05429fbaa18af557ee523a',
543 'id': 'WjTBzDXx5AUq',
544 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
546 'title': 'Rose\'s son Leo was in tears after his dad won gold',
547 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
548 'timestamp': 1471274964,
549 'upload_date': '20160815',
550 'uploader': 'NBCU-SPORTS',
552 'skip': '404 Not Found',
555 def _real_extract(self, url):
556 display_id = self._match_id(url)
558 webpage = self._download_webpage(url, display_id)
561 drupal_settings = self._parse_json(self._search_regex(
562 r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
563 webpage, 'drupal settings'), display_id)
565 iframe_url = drupal_settings['vod']['iframe_url']
566 theplatform_url = iframe_url.replace(
567 'vplayer.nbcolympics.com', 'player.theplatform.com')
568 except RegexNotFoundError:
569 theplatform_url = self._search_regex(
570 r"([\"'])embedUrl\1: *([\"'])(?P
<embedUrl
>.+)\
2",
571 webpage, 'embedding URL', group="embedUrl
")
574 '_type': 'url_transparent',
575 'url': theplatform_url,
576 'ie_key': ThePlatformIE.ie_key(),
577 'display_id': display_id,
581 class NBCOlympicsStreamIE(AdobePassIE):
582 IE_NAME = 'nbcolympics:stream'
583 _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
586 'note': 'Tokenized m3u8 source URL',
587 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
591 'title': r"re
:Women
's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
594 'skip_download
': 'm3u8
',
596 'skip
': 'Livestream
',
598 'note
': 'Plain m3u8 source URL
',
599 'url
': 'https
://stream
.nbcolympics
.com
/gymnastics
-event
-finals
-mens
-floor
-pommel
-horse
-womens
-vault
-bars
',
603 'title
': r're
:Event Finals
: M Floor
, W Vault
, M Pommel
, W Uneven Bars
[0-9]{4}
-[0-9]{2}
-[0-9]{2}
[0-9]{2}
:[0-9]{2}$
',
606 'skip_download
': 'm3u8
',
608 'skip
': 'Livestream
',
612 def _real_extract(self, url):
613 display_id = self._match_id(url)
614 webpage = self._download_webpage(url, display_id)
615 pid = self._search_regex(r'pid\s
*=\s
*(\d
+);', webpage, 'pid
')
617 event_config = self._download_json(
618 f'http
://stream
.nbcolympics
.com
/data
/event_config_{pid}
.json
',
619 pid, 'Downloading event config
')['eventConfig
']
621 title = event_config['eventTitle
']
622 is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus
'))
624 source_url = self._download_json(
625 f'https
://api
-leap
.nbcsports
.com
/feeds
/assets
/{pid}?application
=NBCOlympics
&platform
=desktop
&format
=nbc
-player
&env
=staging
',
626 pid, 'Downloading leap config
'
627 )['videoSources
'][0]['cdnSources
']['primary
'][0]['sourceUrl
']
629 if event_config.get('cdnToken
'):
630 ap_resource = self._get_mvpd_resource(
631 event_config.get('resourceId
', 'NBCOlympics
'),
632 re.sub(r'[^\w\d
]+', '', event_config['eventTitle
']), pid,
633 event_config.get('ratingId
', 'NO VALUE
'))
634 media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId
', 'NBCOlympics
'), ap_resource)
636 source_url = self._download_json(
637 'https
://tokens
.playmakerservices
.com
/', pid, 'Retrieving tokenized URL
',
639 'application
': 'NBCSports
',
640 'authentication
-type': 'adobe
-pass',
643 'platform
': 'desktop
',
644 'requestorId
': 'NBCOlympics
',
645 'resourceId
': base64.b64encode(ap_resource.encode()).decode(),
646 'token
': base64.b64encode(media_token.encode()).decode(),
650 )['akamai
'][0]['tokenizedUrl
']
652 formats = self._extract_m3u8_formats(source_url, pid, 'mp4
', live=is_live)
654 # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
655 # download with ffmpeg without this option
656 f['downloader_options
'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
660 'display_id
': display_id,
667 class NBCStationsIE(InfoExtractor):
668 _DOMAIN_RE = '|
'.join(map(re.escape, (
669 'nbcbayarea
', 'nbcboston
', 'nbcchicago
', 'nbcconnecticut
', 'nbcdfw
', 'nbclosangeles
',
670 'nbcmiami
', 'nbcnewyork
', 'nbcphiladelphia
', 'nbcsandiego
', 'nbcwashington
',
671 'necn
', 'telemundo52
', 'telemundoarizona
', 'telemundochicago
', 'telemundonuevainglaterra
',
673 _VALID_URL = rf'https?
://(?
:www\
.)?
(?P
<site
>{_DOMAIN_RE}
)\
.com
/(?
:[^
/?
#]+/)*(?P<id>[^/?#]+)/?(?:$|[#?])'
676 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
680 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
681 'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
683 'timestamp': 1661135892,
684 'upload_date': '20220822',
686 'channel_id': 'KNBC',
687 'channel': 'nbclosangeles',
690 'skip_download': 'm3u8',
693 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
697 'title': 'Huracán complica que televidente de Tucson reciba reembolso',
698 'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
700 'timestamp': 1660886507,
701 'upload_date': '20220819',
702 'uploader': 'Telemundo Arizona',
703 'channel_id': 'KTAZ',
704 'channel': 'telemundoarizona',
707 'skip_download': 'm3u8',
711 'url': 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/',
712 'md5': '9bf8c41dc7abbb75b1a44f1491a4cc85',
716 'title': 'Highs Near Freezing in Boston on Wednesday',
717 'description': 'md5:3ec486609a926c99f00a3512e6c0e85b',
719 'timestamp': 1675268656,
720 'upload_date': '20230201',
722 'channel_id': 'WBTS',
723 'channel': 'nbcboston',
735 def _real_extract(self
, url
):
736 channel
, video_id
= self
._match
_valid
_url
(url
).group('site', 'id')
737 webpage
= self
._download
_webpage
(url
, video_id
)
739 nbc_data
= self
._search
_json
(
740 r
'<script>\s*var\s+nbc\s*=', webpage
, 'NBC JSON data', video_id
)
741 pdk_acct
= nbc_data
.get('pdkAcct') or 'Yh1nAC'
742 fw_ssid
= traverse_obj(nbc_data
, ('video', 'fwSSID'))
744 video_data
= self
._search
_json
(
745 r
'data-videos="\[', webpage
, 'video data', video_id
, default
={}, transform_source
=unescapeHTML
)
746 video_data
.update(self
._search
_json
(
747 r
'data-meta="', webpage
, 'metadata', video_id
, default
={}, transform_source
=unescapeHTML
))
749 raise ExtractorError('No video metadata found in webpage', expected
=True)
751 info
, formats
= {}, []
752 is_live
= int_or_none(video_data
.get('mpx_is_livestream')) == 1
754 'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
756 'fwsitesection': fw_ssid
,
757 'fwNetworkID': traverse_obj(nbc_data
, ('video', 'fwNetworkID'), default
='382114'),
758 'pprofile': 'ots_desktop_html',
759 'sensitive': 'false',
762 'mode': 'LIVE' if is_live
else 'on-demand',
769 player_id
= traverse_obj(video_data
, ((None, ('video', 'meta')), (
770 'mpx_m3upid', 'mpx_pid', 'pid_streaming_web_medium')), get_all
=False)
771 info
['title'] = f
'{channel} livestream'
774 player_id
= traverse_obj(video_data
, (
775 (None, ('video', 'meta')), ('pid_streaming_web_high', 'mpx_pid')), get_all
=False)
777 date_string
= traverse_obj(video_data
, 'date_string', 'date_gmt')
779 date_string
= self
._search
_regex
(
780 r
'datetime="([^"]+)"', date_string
, 'date string', fatal
=False)
782 date_string
= traverse_obj(
783 nbc_data
, ('dataLayer', 'adobe', ('prop70', 'eVar70', 'eVar59')), get_all
=False)
785 video_url
= traverse_obj(video_data
, ((None, ('video', 'meta')), 'mp4_url'), get_all
=False)
787 ext
= determine_ext(video_url
)
788 height
= self
._search
_regex
(r
'\d+-(\d+)p', url_basename(video_url
), 'height', default
=None)
792 'width': int_or_none(self
._RESOLUTIONS
.get(height
)),
793 'height': int_or_none(height
),
794 'format_id': f
'http-{ext}',
798 'title': video_data
.get('title') or traverse_obj(nbc_data
, (
799 'dataLayer', (None, 'adobe'), ('contenttitle', 'title', 'prop22')), get_all
=False),
801 traverse_obj(video_data
, 'summary', 'excerpt', 'video_hero_text')
802 or clean_html(traverse_obj(nbc_data
, ('dataLayer', 'summary'))),
803 'timestamp': unified_timestamp(date_string
),
807 if player_id
and fw_ssid
:
808 smil
= self
._download
_xml
(
809 f
'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id
,
810 note
='Downloading SMIL data', query
=query
, fatal
=is_live
)
811 if not isinstance(smil
, xml
.etree
.ElementTree
.Element
):
813 subtitles
= self
._parse
_smil
_subtitles
(smil
, default_ns
) if smil
is not None else {}
814 for video
in smil
.findall(self
._xpath
_ns
('.//video', default_ns
)) if smil
is not None else []:
815 info
['duration'] = float_or_none(remove_end(video
.get('dur'), 'ms'), 1000)
816 video_src_url
= video
.get('src')
817 ext
= mimetype2ext(video
.get('type'), default
=determine_ext(video_src_url
))
819 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
820 video_src_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=is_live
,
821 live
=is_live
, errnote
='No HLS formats found')
823 self
._merge
_subtitles
(subs
, target
=subtitles
)
826 'url': video_src_url
,
827 'format_id': f
'https-{ext}',
829 'width': int_or_none(video
.get('width')),
830 'height': int_or_none(video
.get('height')),
834 self
.raise_no_formats('No video content found in webpage', expected
=True)
837 self
._request
_webpage
(
838 HEADRequest(formats
[0]['url']), video_id
, note
='Checking live status')
839 except ExtractorError
:
840 raise UserNotLive(video_id
=channel
)
845 'channel_id': nbc_data
.get('callLetters'),
846 'uploader': nbc_data
.get('on_air_name'),
848 'subtitles': subtitles
,