]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/espn.py
[compat] Remove deprecated functions from core code
[yt-dlp.git] / yt_dlp / extractor / espn.py
1 import base64
2 import json
3 import re
4 import urllib.parse
5
6 from .adobepass import AdobePassIE
7 from .common import InfoExtractor
8 from .once import OnceIE
9 from ..utils import (
10 determine_ext,
11 dict_get,
12 int_or_none,
13 unified_strdate,
14 unified_timestamp,
15 )
16
17
18 class ESPNIE(OnceIE):
19 _VALID_URL = r'''(?x)
20 https?://
21 (?:
22 (?:
23 (?:
24 (?:(?:\w+\.)+)?espn\.go|
25 (?:www\.)?espn
26 )\.com/
27 (?:
28 (?:
29 video/(?:clip|iframe/twitter)|
30 )
31 (?:
32 .*?\?.*?\bid=|
33 /_/id/
34 )|
35 [^/]+/video/
36 )
37 )|
38 (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
39 )
40 (?P<id>\d+)
41 '''
42
43 _TESTS = [{
44 'url': 'http://espn.go.com/video/clip?id=10365079',
45 'info_dict': {
46 'id': '10365079',
47 'ext': 'mp4',
48 'title': '30 for 30 Shorts: Judging Jewell',
49 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
50 'timestamp': 1390936111,
51 'upload_date': '20140128',
52 'duration': 1302,
53 'thumbnail': r're:https://.+\.jpg',
54 },
55 'params': {
56 'skip_download': True,
57 },
58 }, {
59 'url': 'https://broadband.espn.go.com/video/clip?id=18910086',
60 'info_dict': {
61 'id': '18910086',
62 'ext': 'mp4',
63 'title': 'Kyrie spins around defender for two',
64 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b',
65 'timestamp': 1489539155,
66 'upload_date': '20170315',
67 },
68 'params': {
69 'skip_download': True,
70 },
71 'expected_warnings': ['Unable to download f4m manifest'],
72 }, {
73 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672',
74 'only_matching': True,
75 }, {
76 'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
77 'only_matching': True,
78 }, {
79 'url': 'http://www.espn.com/video/clip?id=10365079',
80 'only_matching': True,
81 }, {
82 'url': 'http://www.espn.com/video/clip/_/id/17989860',
83 'only_matching': True,
84 }, {
85 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
86 'only_matching': True,
87 }, {
88 'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls',
89 'only_matching': True,
90 }, {
91 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
92 'only_matching': True,
93 }, {
94 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
95 'only_matching': True,
96 }, {
97 'url': 'http://www.espn.com/watch/player?id=19141491',
98 'only_matching': True,
99 }, {
100 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
101 'only_matching': True,
102 }, ]
103
104 def _real_extract(self, url):
105 video_id = self._match_id(url)
106
107 clip = self._download_json(
108 'http://api-app.espn.com/v1/video/clips/%s' % video_id,
109 video_id)['videos'][0]
110
111 title = clip['headline']
112
113 format_urls = set()
114 formats = []
115
116 def traverse_source(source, base_source_id=None):
117 for source_id, source in source.items():
118 if source_id == 'alert':
119 continue
120 elif isinstance(source, str):
121 extract_source(source, base_source_id)
122 elif isinstance(source, dict):
123 traverse_source(
124 source,
125 '%s-%s' % (base_source_id, source_id)
126 if base_source_id else source_id)
127
128 def extract_source(source_url, source_id=None):
129 if source_url in format_urls:
130 return
131 format_urls.add(source_url)
132 ext = determine_ext(source_url)
133 if OnceIE.suitable(source_url):
134 formats.extend(self._extract_once_formats(source_url))
135 elif ext == 'smil':
136 formats.extend(self._extract_smil_formats(
137 source_url, video_id, fatal=False))
138 elif ext == 'f4m':
139 formats.extend(self._extract_f4m_formats(
140 source_url, video_id, f4m_id=source_id, fatal=False))
141 elif ext == 'm3u8':
142 formats.extend(self._extract_m3u8_formats(
143 source_url, video_id, 'mp4', entry_protocol='m3u8_native',
144 m3u8_id=source_id, fatal=False))
145 else:
146 f = {
147 'url': source_url,
148 'format_id': source_id,
149 }
150 mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url)
151 if mobj:
152 f.update({
153 'height': int(mobj.group(1)),
154 'fps': int(mobj.group(2)),
155 'tbr': int(mobj.group(3)),
156 })
157 if source_id == 'mezzanine':
158 f['quality'] = 1
159 formats.append(f)
160
161 links = clip.get('links', {})
162 traverse_source(links.get('source', {}))
163 traverse_source(links.get('mobile', {}))
164 self._sort_formats(formats)
165
166 description = clip.get('caption') or clip.get('description')
167 thumbnail = clip.get('thumbnail')
168 duration = int_or_none(clip.get('duration'))
169 timestamp = unified_timestamp(clip.get('originalPublishDate'))
170
171 return {
172 'id': video_id,
173 'title': title,
174 'description': description,
175 'thumbnail': thumbnail,
176 'timestamp': timestamp,
177 'duration': duration,
178 'formats': formats,
179 }
180
181
182 class ESPNArticleIE(InfoExtractor):
183 _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
184 _TESTS = [{
185 'url': 'http://espn.go.com/nba/recap?gameId=400793786',
186 'only_matching': True,
187 }, {
188 'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
189 'only_matching': True,
190 }, {
191 'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
192 'only_matching': True,
193 }, {
194 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
195 'only_matching': True,
196 }]
197
198 @classmethod
199 def suitable(cls, url):
200 return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
201
202 def _real_extract(self, url):
203 video_id = self._match_id(url)
204
205 webpage = self._download_webpage(url, video_id)
206
207 video_id = self._search_regex(
208 r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
209 webpage, 'video id', group='id')
210
211 return self.url_result(
212 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
213
214
215 class FiveThirtyEightIE(InfoExtractor):
216 _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)'
217 _TEST = {
218 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
219 'info_dict': {
220 'id': '56032156',
221 'ext': 'flv',
222 'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
223 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
224 },
225 'params': {
226 'skip_download': True,
227 },
228 }
229
230 def _real_extract(self, url):
231 video_id = self._match_id(url)
232
233 webpage = self._download_webpage(url, video_id)
234
235 embed_url = self._search_regex(
236 r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
237 webpage, 'embed url')
238
239 return self.url_result(embed_url, 'AbcNewsVideo')
240
241
242 class ESPNCricInfoIE(InfoExtractor):
243 _VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/video/[^#$&?/]+-(?P<id>\d+)'
244 _TESTS = [{
245 'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135',
246 'info_dict': {
247 'id': '1289135',
248 'ext': 'mp4',
249 'title': 'Finch: Chasing comes with \'risks\' despite World Cup trend',
250 'description': 'md5:ea32373303e25efbb146efdfc8a37829',
251 'upload_date': '20211113',
252 'duration': 96,
253 },
254 'params': {'skip_download': True}
255 }]
256
257 def _real_extract(self, url):
258 id = self._match_id(url)
259 data_json = self._download_json(f'https://hs-consumer-api.espncricinfo.com/v1/pages/video/video-details?videoId={id}', id)['video']
260 formats, subtitles = [], {}
261 for item in data_json.get('playbacks') or []:
262 if item.get('type') == 'HLS' and item.get('url'):
263 m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(item['url'], id)
264 formats.extend(m3u8_frmts)
265 subtitles = self._merge_subtitles(subtitles, m3u8_subs)
266 elif item.get('type') == 'AUDIO' and item.get('url'):
267 formats.append({
268 'url': item['url'],
269 'vcodec': 'none',
270 })
271 self._sort_formats(formats)
272 return {
273 'id': id,
274 'title': data_json.get('title'),
275 'description': data_json.get('summary'),
276 'upload_date': unified_strdate(dict_get(data_json, ('publishedAt', 'recordedAt'))),
277 'duration': data_json.get('duration'),
278 'formats': formats,
279 'subtitles': subtitles,
280 }
281
282
283 class WatchESPNIE(AdobePassIE):
284 _VALID_URL = r'https://www.espn.com/watch/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
285 _TESTS = [{
286 'url': 'https://www.espn.com/watch/player/_/id/ba7d17da-453b-4697-bf92-76a99f61642b',
287 'info_dict': {
288 'id': 'ba7d17da-453b-4697-bf92-76a99f61642b',
289 'ext': 'mp4',
290 'title': 'Serbia vs. Turkey',
291 'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/ba7d17da-453b-4697-bf92-76a99f61642b/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
292 },
293 'params': {
294 'skip_download': True,
295 },
296 }, {
297 'url': 'https://www.espn.com/watch/player/_/id/4e9b5bd1-4ceb-4482-9d28-1dd5f30d2f34',
298 'info_dict': {
299 'id': '4e9b5bd1-4ceb-4482-9d28-1dd5f30d2f34',
300 'ext': 'mp4',
301 'title': 'Real Madrid vs. Real Betis (LaLiga)',
302 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
303 },
304 'params': {
305 'skip_download': True,
306 },
307 }]
308
309 _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c'
310
311 def _call_bamgrid_api(self, path, video_id, payload=None, headers={}):
312 if 'Authorization' not in headers:
313 headers['Authorization'] = f'Bearer {self._API_KEY}'
314 parse = urllib.parse.urlencode if path == 'token' else json.dumps
315 return self._download_json(
316 f'https://espn.api.edge.bamgrid.com/{path}', video_id, headers=headers, data=parse(payload).encode())
317
318 def _real_extract(self, url):
319 video_id = self._match_id(url)
320 video_data = self._download_json(
321 f'https://watch-cdn.product.api.espn.com/api/product/v3/watchespn/web/playback/event?id={video_id}',
322 video_id)['playbackState']
323
324 # ESPN+ subscription required, through cookies
325 if 'DTC' in video_data.get('sourceId'):
326 cookie = self._get_cookies(url).get('ESPN-ONESITE.WEB-PROD.token')
327 if not cookie:
328 self.raise_login_required(method='cookies')
329
330 assertion = self._call_bamgrid_api(
331 'devices', video_id,
332 headers={'Content-Type': 'application/json; charset=UTF-8'},
333 payload={
334 'deviceFamily': 'android',
335 'applicationRuntime': 'android',
336 'deviceProfile': 'tv',
337 'attributes': {},
338 })['assertion']
339 token = self._call_bamgrid_api(
340 'token', video_id, payload={
341 'subject_token': assertion,
342 'subject_token_type': 'urn:bamtech:params:oauth:token-type:device',
343 'platform': 'android',
344 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
345 })['access_token']
346
347 assertion = self._call_bamgrid_api(
348 'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
349 headers={
350 'Authorization': token,
351 'Content-Type': 'application/json; charset=UTF-8'
352 })['assertion']
353 token = self._call_bamgrid_api(
354 'token', video_id, payload={
355 'subject_token': assertion,
356 'subject_token_type': 'urn:bamtech:params:oauth:token-type:account',
357 'platform': 'android',
358 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
359 })['access_token']
360
361 playback = self._download_json(
362 video_data['videoHref'].format(scenario='browser~ssai'), video_id,
363 headers={
364 'Accept': 'application/vnd.media-service+json; version=5',
365 'Authorization': token
366 })
367 m3u8_url, headers = playback['stream']['complete'][0]['url'], {'authorization': token}
368
369 # No login required
370 elif video_data.get('sourceId') == 'ESPN_FREE':
371 asset = self._download_json(
372 f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
373 video_id)
374 m3u8_url, headers = asset['stream'], {}
375
376 # TV Provider required
377 else:
378 resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None)
379 auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode()
380
381 asset = self._download_json(
382 f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
383 video_id, data=f'adobeToken={urllib.parse.quote_plus(base64.b64encode(auth))}&drmSupport=HLS'.encode())
384 m3u8_url, headers = asset['stream'], {}
385
386 formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
387 self._sort_formats(formats)
388
389 return {
390 'id': video_id,
391 'title': video_data.get('name'),
392 'formats': formats,
393 'subtitles': subtitles,
394 'thumbnail': video_data.get('posterHref'),
395 'http_headers': headers,
396 }