]>
Commit | Line | Data |
---|---|---|
421a4595 | 1 | import re |
e183bb8c | 2 | import urllib.parse |
3 | import uuid | |
b1b01841 | 4 | |
421a4595 | 5 | from .common import InfoExtractor |
6 | from ..utils import ( | |
7 | determine_ext, | |
8 | int_or_none, | |
e183bb8c | 9 | join_nonempty, |
421a4595 | 10 | parse_duration, |
11 | parse_iso8601, | |
e183bb8c | 12 | traverse_obj, |
421a4595 | 13 | try_get, |
14 | ) | |
b1b01841 | 15 | |
421a4595 | 16 | |
17 | class MLBBaseIE(InfoExtractor): | |
18 | def _real_extract(self, url): | |
19 | display_id = self._match_id(url) | |
20 | video = self._download_video_data(display_id) | |
21 | video_id = video['id'] | |
22 | title = video['title'] | |
23 | feed = self._get_feed(video) | |
24 | ||
25 | formats = [] | |
26 | for playback in (feed.get('playbacks') or []): | |
27 | playback_url = playback.get('url') | |
28 | if not playback_url: | |
29 | continue | |
30 | name = playback.get('name') | |
31 | ext = determine_ext(playback_url) | |
32 | if ext == 'm3u8': | |
33 | formats.extend(self._extract_m3u8_formats( | |
34 | playback_url, video_id, 'mp4', | |
35 | 'm3u8_native', m3u8_id=name, fatal=False)) | |
36 | else: | |
37 | f = { | |
38 | 'format_id': name, | |
39 | 'url': playback_url, | |
40 | } | |
41 | mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name) | |
42 | if mobj: | |
43 | f.update({ | |
44 | 'height': int(mobj.group(3)), | |
45 | 'tbr': int(mobj.group(1)), | |
46 | 'width': int(mobj.group(2)), | |
47 | }) | |
48 | mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url) | |
49 | if mobj: | |
50 | f.update({ | |
51 | 'fps': int(mobj.group(3)), | |
52 | 'height': int(mobj.group(2)), | |
53 | 'tbr': int(mobj.group(4)), | |
54 | 'width': int(mobj.group(1)), | |
55 | }) | |
56 | formats.append(f) | |
421a4595 | 57 | |
58 | thumbnails = [] | |
59 | for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []): | |
60 | src = cut.get('src') | |
61 | if not src: | |
62 | continue | |
63 | thumbnails.append({ | |
64 | 'height': int_or_none(cut.get('height')), | |
65 | 'url': src, | |
66 | 'width': int_or_none(cut.get('width')), | |
67 | }) | |
68 | ||
69 | language = (video.get('language') or 'EN').lower() | |
70 | ||
71 | return { | |
72 | 'id': video_id, | |
73 | 'title': title, | |
74 | 'formats': formats, | |
75 | 'description': video.get('description'), | |
76 | 'duration': parse_duration(feed.get('duration')), | |
77 | 'thumbnails': thumbnails, | |
78 | 'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)), | |
79 | 'subtitles': self._extract_mlb_subtitles(feed, language), | |
80 | } | |
81 | ||
82 | ||
83 | class MLBIE(MLBBaseIE): | |
d1feb308 S |
84 | _VALID_URL = r'''(?x) |
85 | https?:// | |
421a4595 | 86 | (?:[\da-z_-]+\.)*mlb\.com/ |
d1feb308 S |
87 | (?: |
88 | (?: | |
421a4595 | 89 | (?:[^/]+/)*video/[^/]+/c-| |
d1feb308 S |
90 | (?: |
91 | shared/video/embed/(?:embed|m-internal-embed)\.html| | |
3800b908 | 92 | (?:[^/]+/)+(?:play|index)\.jsp| |
d1feb308 S |
93 | )\?.*?\bcontent_id= |
94 | ) | |
acca2ac7 | 95 | (?P<id>\d+) |
d1feb308 S |
96 | ) |
97 | ''' | |
bfd973ec | 98 | _EMBED_REGEX = [ |
99 | r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', | |
100 | r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)', | |
101 | ] | |
7bb49d10 | 102 | _TESTS = [ |
07cc63f3 | 103 | { |
acca2ac7 RA |
104 | 'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933', |
105 | 'md5': '632358dacfceec06bad823b83d21df2d', | |
07cc63f3 CC |
106 | 'info_dict': { |
107 | 'id': '34698933', | |
108 | 'ext': 'mp4', | |
109 | 'title': "Ackley's spectacular catch", | |
110 | 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', | |
111 | 'duration': 66, | |
acca2ac7 RA |
112 | 'timestamp': 1405995000, |
113 | 'upload_date': '20140722', | |
ec85ded8 | 114 | 'thumbnail': r're:^https?://.*\.jpg$', |
07cc63f3 CC |
115 | }, |
116 | }, | |
7bb49d10 | 117 | { |
acca2ac7 RA |
118 | 'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663', |
119 | 'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f', | |
7bb49d10 S |
120 | 'info_dict': { |
121 | 'id': '34496663', | |
122 | 'ext': 'mp4', | |
123 | 'title': 'Stanton prepares for Derby', | |
124 | 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57', | |
125 | 'duration': 46, | |
acca2ac7 | 126 | 'timestamp': 1405120200, |
7bb49d10 | 127 | 'upload_date': '20140711', |
ec85ded8 | 128 | 'thumbnail': r're:^https?://.*\.jpg$', |
7bb49d10 | 129 | }, |
b1b01841 | 130 | }, |
7bb49d10 | 131 | { |
acca2ac7 RA |
132 | 'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115', |
133 | 'md5': '99bb9176531adc600b90880fb8be9328', | |
7bb49d10 S |
134 | 'info_dict': { |
135 | 'id': '34578115', | |
136 | 'ext': 'mp4', | |
137 | 'title': 'Cespedes repeats as Derby champ', | |
138 | 'description': 'md5:08df253ce265d4cf6fb09f581fafad07', | |
139 | 'duration': 488, | |
acca2ac7 | 140 | 'timestamp': 1405414336, |
7bb49d10 | 141 | 'upload_date': '20140715', |
ec85ded8 | 142 | 'thumbnail': r're:^https?://.*\.jpg$', |
7bb49d10 S |
143 | }, |
144 | }, | |
145 | { | |
acca2ac7 RA |
146 | 'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915', |
147 | 'md5': 'da8b57a12b060e7663ee1eebd6f330ec', | |
7bb49d10 S |
148 | 'info_dict': { |
149 | 'id': '34577915', | |
150 | 'ext': 'mp4', | |
151 | 'title': 'Bautista on Home Run Derby', | |
152 | 'description': 'md5:b80b34031143d0986dddc64a8839f0fb', | |
153 | 'duration': 52, | |
acca2ac7 | 154 | 'timestamp': 1405405122, |
7bb49d10 | 155 | 'upload_date': '20140715', |
ec85ded8 | 156 | 'thumbnail': r're:^https?://.*\.jpg$', |
7bb49d10 S |
157 | }, |
158 | }, | |
11a6793f MC |
159 | { |
160 | 'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694', | |
161 | 'only_matching': True, | |
162 | }, | |
1a94ff68 S |
163 | { |
164 | 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', | |
165 | 'only_matching': True, | |
166 | }, | |
b2a68d14 S |
167 | { |
168 | 'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553', | |
169 | 'only_matching': True, | |
170 | }, | |
171 | { | |
172 | 'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553', | |
173 | 'only_matching': True, | |
174 | }, | |
9f790b99 | 175 | { |
acca2ac7 | 176 | 'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783', |
9f790b99 | 177 | 'only_matching': True, |
3e7202c1 YCH |
178 | }, |
179 | { | |
180 | # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer | |
181 | 'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb', | |
182 | 'only_matching': True, | |
3800b908 | 183 | }, |
7bb49d10 | 184 | ] |
421a4595 | 185 | _TIMESTAMP_KEY = 'date' |
186 | ||
187 | @staticmethod | |
188 | def _get_feed(video): | |
189 | return video | |
190 | ||
191 | @staticmethod | |
192 | def _extract_mlb_subtitles(feed, language): | |
193 | subtitles = {} | |
194 | for keyword in (feed.get('keywordsAll') or []): | |
195 | keyword_type = keyword.get('type') | |
196 | if keyword_type and keyword_type.startswith('closed_captions_location_'): | |
197 | cc_location = keyword.get('value') | |
198 | if cc_location: | |
199 | subtitles.setdefault(language, []).append({ | |
200 | 'url': cc_location, | |
201 | }) | |
202 | return subtitles | |
203 | ||
204 | def _download_video_data(self, display_id): | |
205 | return self._download_json( | |
206 | 'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id, | |
207 | display_id) | |
208 | ||
209 | ||
210 | class MLBVideoIE(MLBBaseIE): | |
211 | _VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)' | |
212 | _TEST = { | |
213 | 'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933', | |
214 | 'md5': '632358dacfceec06bad823b83d21df2d', | |
215 | 'info_dict': { | |
216 | 'id': 'c04a8863-f569-42e6-9f87-992393657614', | |
217 | 'ext': 'mp4', | |
218 | 'title': "Ackley's spectacular catch", | |
219 | 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', | |
220 | 'duration': 66, | |
221 | 'timestamp': 1405995000, | |
222 | 'upload_date': '20140722', | |
223 | 'thumbnail': r're:^https?://.+', | |
224 | }, | |
225 | } | |
226 | _TIMESTAMP_KEY = 'timestamp' | |
227 | ||
228 | @classmethod | |
229 | def suitable(cls, url): | |
230 | return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url) | |
231 | ||
232 | @staticmethod | |
233 | def _get_feed(video): | |
234 | return video['feeds'][0] | |
235 | ||
236 | @staticmethod | |
237 | def _extract_mlb_subtitles(feed, language): | |
238 | subtitles = {} | |
239 | for cc_location in (feed.get('closedCaptions') or []): | |
240 | subtitles.setdefault(language, []).append({ | |
241 | 'url': cc_location, | |
242 | }) | |
243 | ||
244 | def _download_video_data(self, display_id): | |
245 | # https://www.mlb.com/data-service/en/videos/[SLUG] | |
246 | return self._download_json( | |
247 | 'https://fastball-gateway.mlb.com/graphql', | |
248 | display_id, query={ | |
249 | 'query': '''{ | |
250 | mediaPlayback(ids: "%s") { | |
251 | description | |
252 | feeds(types: CMS) { | |
253 | closedCaptions | |
254 | duration | |
255 | image { | |
256 | cuts { | |
257 | width | |
258 | height | |
259 | src | |
260 | } | |
261 | } | |
262 | playbacks { | |
263 | name | |
264 | url | |
265 | } | |
266 | } | |
267 | id | |
268 | timestamp | |
269 | title | |
270 | } | |
271 | }''' % display_id, | |
272 | })['data']['mediaPlayback'][0] | |
e183bb8c | 273 | |
274 | ||
275 | class MLBTVIE(InfoExtractor): | |
276 | _VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})' | |
277 | _NETRC_MACHINE = 'mlb' | |
278 | ||
279 | _TESTS = [{ | |
280 | 'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638', | |
281 | 'info_dict': { | |
282 | 'id': '661581', | |
283 | 'ext': 'mp4', | |
284 | 'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies', | |
285 | }, | |
286 | 'params': { | |
287 | 'skip_download': True, | |
288 | }, | |
289 | }] | |
290 | _access_token = None | |
291 | ||
292 | def _real_initialize(self): | |
293 | if not self._access_token: | |
294 | self.raise_login_required( | |
295 | 'All videos are only available to registered users', method='password') | |
296 | ||
297 | def _perform_login(self, username, password): | |
298 | data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356' | |
299 | access_token = self._download_json( | |
300 | 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None, | |
301 | headers={ | |
302 | 'User-Agent': 'okhttp/3.12.1', | |
303 | 'Content-Type': 'application/x-www-form-urlencoded' | |
304 | }, data=data.encode())['access_token'] | |
305 | ||
306 | entitlement = self._download_webpage( | |
307 | f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={str(uuid.uuid4())}', None, | |
308 | headers={ | |
309 | 'User-Agent': 'okhttp/3.12.1', | |
310 | 'Authorization': f'Bearer {access_token}' | |
311 | }) | |
312 | ||
313 | data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv' | |
314 | self._access_token = self._download_json( | |
315 | 'https://us.edge.bamgrid.com/token', None, | |
316 | headers={ | |
317 | 'Accept': 'application/json', | |
318 | 'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk', | |
319 | 'Content-Type': 'application/x-www-form-urlencoded' | |
320 | }, data=data.encode())['access_token'] | |
321 | ||
322 | def _real_extract(self, url): | |
323 | video_id = self._match_id(url) | |
324 | airings = self._download_json( | |
325 | f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D', | |
326 | video_id)['data']['Airings'] | |
1e4fca9a | 327 | |
e183bb8c | 328 | formats, subtitles = [], {} |
329 | for airing in airings: | |
330 | m3u8_url = self._download_json( | |
331 | airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id, | |
332 | headers={ | |
333 | 'Authorization': self._access_token, | |
334 | 'Accept': 'application/vnd.media-service+json; version=2' | |
335 | })['stream']['complete'] | |
336 | f, s = self._extract_m3u8_formats_and_subtitles( | |
337 | m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage'))) | |
338 | formats.extend(f) | |
339 | self._merge_subtitles(s, target=subtitles) | |
340 | ||
e183bb8c | 341 | return { |
342 | 'id': video_id, | |
343 | 'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False), | |
1015ceee | 344 | 'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE', |
e183bb8c | 345 | 'formats': formats, |
346 | 'subtitles': subtitles, | |
347 | 'http_headers': {'Authorization': f'Bearer {self._access_token}'}, | |
348 | } | |
e091fb92 H |
349 | |
350 | ||
351 | class MLBArticleIE(InfoExtractor): | |
352 | _VALID_URL = r'https?://www\.mlb\.com/news/(?P<id>[\w-]+)' | |
353 | _TESTS = [{ | |
354 | 'url': 'https://www.mlb.com/news/manny-machado-robs-guillermo-heredia-reacts', | |
355 | 'info_dict': { | |
356 | 'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a', | |
357 | 'title': 'Machado\'s grab draws hilarious irate reaction', | |
358 | 'modified_timestamp': 1650130737, | |
359 | 'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676', | |
360 | 'modified_date': '20220416', | |
361 | }, | |
362 | 'playlist_count': 2, | |
363 | }] | |
364 | ||
365 | def _real_extract(self, url): | |
366 | display_id = self._match_id(url) | |
367 | webpage = self._download_webpage(url, display_id) | |
368 | apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache'] | |
369 | ||
370 | content_data_id = traverse_obj( | |
371 | apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getForgeContent'), 'id'), get_all=False) | |
372 | ||
373 | content_real_info = apollo_cache_json[content_data_id] | |
374 | ||
375 | return self.playlist_from_matches( | |
376 | traverse_obj(content_real_info, ('parts', lambda _, v: v['typename'] == 'Video', 'id')), | |
377 | getter=lambda x: f'https://www.mlb.com/video/{apollo_cache_json[x]["slug"]}', | |
378 | ie=MLBVideoIE, playlist_id=content_real_info.get('_translationId'), | |
379 | title=self._html_search_meta('og:title', webpage), | |
380 | description=content_real_info.get('summary'), | |
381 | modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate'))) |