]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nba.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / nba.py
CommitLineData
3cfeb162 1import functools
ecf6de5b 2import re
3
29825140 4from .turner import TurnerBaseIE
3cfeb162 5from ..compat import (
29f7c58a 6 compat_str,
7 compat_urllib_parse_unquote,
3cfeb162 8)
7bbc6428 9from ..utils import (
e897bd82 10 OnDemandPagedList,
29f7c58a 11 int_or_none,
12 merge_dicts,
29f7c58a 13 parse_duration,
14 parse_iso8601,
4dfbf869 15 parse_qs,
29f7c58a 16 try_get,
17 update_url_query,
18 urljoin,
7bbc6428 19)
5b286728
PH
20
21
29f7c58a 22class NBACVPBaseIE(TurnerBaseIE):
23 def _extract_nba_cvp_info(self, path, video_id, fatal=False):
24 return self._extract_cvp_info(
25 'http://secure.nba.com/%s' % path, video_id, {
26 'default': {
27 'media_src': 'http://nba.cdn.turner.com/nba/big',
28 },
29 'm3u8': {
30 'media_src': 'http://nbavod-f.akamaihd.net',
31 },
32 }, fatal=fatal)
33
34
35class NBAWatchBaseIE(NBACVPBaseIE):
36 _VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
37
38 def _extract_video(self, filter_key, filter_value):
39 video = self._download_json(
40 'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
41 filter_value, query={
42 'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
43 'q': filter_key + ':' + filter_value,
44 'wt': 'json',
45 })['response']['docs'][0]
46
47 video_id = str(video['pid'])
48 title = video['name']
49
50 formats = []
51 m3u8_url = (self._download_json(
52 'https://watch.nba.com/service/publishpoint', video_id, query={
53 'type': 'video',
54 'format': 'json',
55 'id': video_id,
56 }, headers={
57 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
58 }, fatal=False) or {}).get('path')
59 if m3u8_url:
60 m3u8_formats = self._extract_m3u8_formats(
61 re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
62 'm3u8_native', m3u8_id='hls', fatal=False)
63 formats.extend(m3u8_formats)
64 for f in m3u8_formats:
65 http_f = f.copy()
66 http_f.update({
67 'format_id': http_f['format_id'].replace('hls-', 'http-'),
68 'protocol': 'http',
69 'url': http_f['url'].replace('.m3u8', ''),
70 })
71 formats.append(http_f)
72
73 info = {
74 'id': video_id,
75 'title': title,
76 'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
77 'description': video.get('description'),
78 'duration': int_or_none(video.get('runtime')),
79 'timestamp': parse_iso8601(video.get('releaseDate')),
80 'tags': video.get('tags'),
81 }
82
83 seo_name = video.get('seoName')
84 if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
85 base_path = ''
86 if seo_name.startswith('teams/'):
87 base_path += seo_name.split('/')[1] + '/'
88 base_path += 'video/'
89 cvp_info = self._extract_nba_cvp_info(
90 base_path + seo_name + '.xml', video_id, False)
91 if cvp_info:
92 formats.extend(cvp_info['formats'])
93 info = merge_dicts(info, cvp_info)
94
29f7c58a 95 info['formats'] = formats
96 return info
97
98
99class NBAWatchEmbedIE(NBAWatchBaseIE):
bc4ab17b 100 IE_NAME = 'nba:watch:embed'
29f7c58a 101 _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
102 _TESTS = [{
103 'url': 'http://watch.nba.com/embed?id=659395',
104 'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
105 'info_dict': {
106 'id': '659395',
107 'ext': 'mp4',
108 'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
109 'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
110 'timestamp': 1492228800,
111 'upload_date': '20170415',
112 },
113 }]
114
115 def _real_extract(self, url):
116 video_id = self._match_id(url)
117 return self._extract_video('pid', video_id)
118
119
120class NBAWatchIE(NBAWatchBaseIE):
121 IE_NAME = 'nba:watch'
122 _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
6a3e0103 123 _TESTS = [{
26a78d4b 124 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
29f7c58a 125 'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
26a78d4b 126 'info_dict': {
29f7c58a 127 'id': '70946',
db9b1dbc 128 'ext': 'mp4',
26a78d4b 129 'title': 'Thunder vs. Nets',
7bbc6428
S
130 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
131 'duration': 181,
29f7c58a 132 'timestamp': 1354597200,
c233e6bc 133 'upload_date': '20121204',
26a78d4b 134 },
6a3e0103
PH
135 }, {
136 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
137 'only_matching': True,
46cc1c65 138 }, {
6a11bb77 139 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
8fc226ef 140 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
8a278a1d 141 'info_dict': {
29f7c58a 142 'id': '330865',
8a278a1d 143 'ext': 'mp4',
8fc226ef 144 'title': 'Hawks vs. Cavaliers Game 1',
8a278a1d
YCH
145 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
146 'duration': 228,
29f7c58a 147 'timestamp': 1432094400,
148 'upload_date': '20150521',
3c77a54d 149 },
86a7dbe6 150 }, {
29f7c58a 151 'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
152 'only_matching': True,
3cfeb162 153 }, {
29f7c58a 154 # only CVP mp4 format available
155 'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
156 'only_matching': True,
3cfeb162 157 }, {
29f7c58a 158 'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
159 'only_matching': True,
160 }]
161
162 def _real_extract(self, url):
163 display_id = self._match_id(url)
4dfbf869 164 collection_id = parse_qs(url).get('collection', [None])[0]
f40ee5e9 165 if self._yes_playlist(collection_id, display_id):
166 return self.url_result(
167 'https://www.nba.com/watch/list/collection/' + collection_id,
168 NBAWatchCollectionIE.ie_key(), collection_id)
29f7c58a 169 return self._extract_video('seoName', display_id)
170
171
172class NBAWatchCollectionIE(NBAWatchBaseIE):
173 IE_NAME = 'nba:watch:collection'
174 _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
175 _TESTS = [{
176 'url': 'https://watch.nba.com/list/collection/season-preview-2020',
3cfeb162 177 'info_dict': {
29f7c58a 178 'id': 'season-preview-2020',
3cfeb162 179 },
29f7c58a 180 'playlist_mincount': 43,
6a3e0103 181 }]
29f7c58a 182 _PAGE_SIZE = 100
5b286728 183
29f7c58a 184 def _fetch_page(self, collection_id, page):
185 page += 1
186 videos = self._download_json(
187 'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
188 collection_id, 'Downloading page %d JSON metadata' % page, query={
189 'count': self._PAGE_SIZE,
190 'page': page,
191 })['results']['videos']
192 for video in videos:
193 program = video.get('program') or {}
194 seo_name = program.get('seoName') or program.get('slug')
195 if not seo_name:
196 continue
197 yield {
198 '_type': 'url',
199 'id': program.get('id'),
200 'title': program.get('title') or video.get('title'),
201 'url': 'https://www.nba.com/watch/video/' + seo_name,
202 'thumbnail': video.get('image'),
203 'description': program.get('description') or video.get('description'),
204 'duration': parse_duration(program.get('runtimeHours')),
205 'timestamp': parse_iso8601(video.get('releaseDate')),
206 }
3cfeb162 207
29f7c58a 208 def _real_extract(self, url):
209 collection_id = self._match_id(url)
3cfeb162 210 entries = OnDemandPagedList(
29f7c58a 211 functools.partial(self._fetch_page, collection_id),
6be08ce6 212 self._PAGE_SIZE)
29f7c58a 213 return self.playlist_result(entries, collection_id)
3cfeb162 214
3cfeb162 215
29f7c58a 216class NBABaseIE(NBACVPBaseIE):
217 _VALID_URL_BASE = r'''(?x)
218 https?://(?:www\.)?nba\.com/
219 (?P<team>
220 blazers|
221 bucks|
222 bulls|
223 cavaliers|
224 celtics|
225 clippers|
226 grizzlies|
227 hawks|
228 heat|
229 hornets|
230 jazz|
231 kings|
232 knicks|
233 lakers|
234 magic|
235 mavericks|
236 nets|
237 nuggets|
238 pacers|
239 pelicans|
240 pistons|
241 raptors|
242 rockets|
243 sixers|
244 spurs|
245 suns|
246 thunder|
247 timberwolves|
248 warriors|
249 wizards
250 )
251 (?:/play\#)?/'''
252 _CHANNEL_PATH_REGEX = r'video/channel|series'
86a7dbe6 253
29f7c58a 254 def _embed_url_result(self, team, content_id):
255 return self.url_result(update_url_query(
256 'https://secure.nba.com/assets/amp/include/video/iframe.html', {
257 'contentId': content_id,
258 'team': team,
259 }), NBAEmbedIE.ie_key())
3cfeb162 260
29f7c58a 261 def _call_api(self, team, content_id, query, resource):
262 return self._download_json(
263 'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
264 content_id, 'Download %s JSON metadata' % resource,
265 query=query, headers={
266 'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
267 })['response']['result']
3cfeb162 268
29f7c58a 269 def _extract_video(self, video, team, extract_all=True):
270 video_id = compat_str(video['nid'])
271 team = video['brand']
86a7dbe6 272
29f7c58a 273 info = {
274 'id': video_id,
275 'title': video.get('title') or video.get('headline') or video['shortHeadline'],
276 'description': video.get('description'),
277 'timestamp': parse_iso8601(video.get('published')),
278 }
279
280 subtitles = {}
281 captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
282 for caption_url in captions.values():
283 subtitles.setdefault('en', []).append({'url': caption_url})
284
285 formats = []
286 mp4_url = video.get('mp4')
287 if mp4_url:
288 formats.append({
289 'url': mp4_url,
c233e6bc 290 })
29f7c58a 291
292 if extract_all:
293 source_url = video.get('videoSource')
294 if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
295 formats.append({
296 'format_id': 'source',
297 'url': source_url,
f983b875 298 'quality': 1,
29f7c58a 299 })
300
301 m3u8_url = video.get('m3u8')
302 if m3u8_url:
303 if '.akamaihd.net/i/' in m3u8_url:
304 formats.extend(self._extract_akamai_formats(
305 m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
306 else:
307 formats.extend(self._extract_m3u8_formats(
308 m3u8_url, video_id, 'mp4',
309 'm3u8_native', m3u8_id='hls', fatal=False))
310
311 content_xml = video.get('contentXml')
312 if team and content_xml:
313 cvp_info = self._extract_nba_cvp_info(
314 team + content_xml, video_id, fatal=False)
315 if cvp_info:
316 formats.extend(cvp_info['formats'])
317 subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
318 info = merge_dicts(info, cvp_info)
319
29f7c58a 320 else:
321 info.update(self._embed_url_result(team, video['videoId']))
322
323 info.update({
324 'formats': formats,
325 'subtitles': subtitles,
326 })
327
328 return info
329
330 def _real_extract(self, url):
5ad28e7f 331 team, display_id = self._match_valid_url(url).groups()
29f7c58a 332 if '/play#/' in url:
333 display_id = compat_urllib_parse_unquote(display_id)
334 else:
335 webpage = self._download_webpage(url, display_id)
336 display_id = self._search_regex(
337 self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
338 return self._extract_url_results(team, display_id)
339
340
341class NBAEmbedIE(NBABaseIE):
bc4ab17b 342 IE_NAME = 'nba:embed'
29f7c58a 343 _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
344 _TESTS = [{
345 'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
346 'only_matching': True,
347 }, {
348 'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
349 'only_matching': True,
350 }]
351
352 def _real_extract(self, url):
4dfbf869 353 qs = parse_qs(url)
29f7c58a 354 content_id = qs['contentId'][0]
355 team = qs.get('team', [None])[0]
356 if not team:
357 return self.url_result(
358 'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
359 video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
360 return self._extract_video(video, team)
361
362
363class NBAIE(NBABaseIE):
bc4ab17b 364 IE_NAME = 'nba'
29f7c58a 365 _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
366 _TESTS = [{
367 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
368 'info_dict': {
369 'id': '45039',
370 'ext': 'mp4',
371 'title': 'AND WE BACK.',
372 'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
373 'duration': 94,
374 'timestamp': 1607112000,
375 'upload_date': '20201218',
376 },
377 }, {
378 'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
379 'only_matching': True,
380 }, {
381 'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
382 'only_matching': True,
383 }]
384 _CONTENT_ID_REGEX = r'videoID'
385
386 def _extract_url_results(self, team, content_id):
387 return self._embed_url_result(team, content_id)
388
389
390class NBAChannelIE(NBABaseIE):
bc4ab17b 391 IE_NAME = 'nba:channel'
29f7c58a 392 _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
393 _TESTS = [{
394 'url': 'https://www.nba.com/blazers/video/channel/summer_league',
395 'info_dict': {
396 'title': 'Summer League',
397 },
398 'playlist_mincount': 138,
399 }, {
400 'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
401 'only_matching': True,
402 }]
403 _CONTENT_ID_REGEX = r'videoSubCategory'
404 _PAGE_SIZE = 100
405
406 def _fetch_page(self, team, channel, page):
407 results = self._call_api(team, channel, {
408 'channels': channel,
409 'count': self._PAGE_SIZE,
410 'offset': page * self._PAGE_SIZE,
411 }, 'page %d' % (page + 1))
412 for video in results:
413 yield self._extract_video(video, team, False)
414
415 def _extract_url_results(self, team, content_id):
416 entries = OnDemandPagedList(
417 functools.partial(self._fetch_page, team, content_id),
418 self._PAGE_SIZE)
419 return self.playlist_result(entries, playlist_title=content_id)