]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/mlb.py
[ant1newsgr] Add extractor (#1982)
[yt-dlp.git] / yt_dlp / extractor / mlb.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7 determine_ext,
8 int_or_none,
9 parse_duration,
10 parse_iso8601,
11 try_get,
12 )
13
14
15 class MLBBaseIE(InfoExtractor):
16 def _real_extract(self, url):
17 display_id = self._match_id(url)
18 video = self._download_video_data(display_id)
19 video_id = video['id']
20 title = video['title']
21 feed = self._get_feed(video)
22
23 formats = []
24 for playback in (feed.get('playbacks') or []):
25 playback_url = playback.get('url')
26 if not playback_url:
27 continue
28 name = playback.get('name')
29 ext = determine_ext(playback_url)
30 if ext == 'm3u8':
31 formats.extend(self._extract_m3u8_formats(
32 playback_url, video_id, 'mp4',
33 'm3u8_native', m3u8_id=name, fatal=False))
34 else:
35 f = {
36 'format_id': name,
37 'url': playback_url,
38 }
39 mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name)
40 if mobj:
41 f.update({
42 'height': int(mobj.group(3)),
43 'tbr': int(mobj.group(1)),
44 'width': int(mobj.group(2)),
45 })
46 mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url)
47 if mobj:
48 f.update({
49 'fps': int(mobj.group(3)),
50 'height': int(mobj.group(2)),
51 'tbr': int(mobj.group(4)),
52 'width': int(mobj.group(1)),
53 })
54 formats.append(f)
55 self._sort_formats(formats)
56
57 thumbnails = []
58 for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
59 src = cut.get('src')
60 if not src:
61 continue
62 thumbnails.append({
63 'height': int_or_none(cut.get('height')),
64 'url': src,
65 'width': int_or_none(cut.get('width')),
66 })
67
68 language = (video.get('language') or 'EN').lower()
69
70 return {
71 'id': video_id,
72 'title': title,
73 'formats': formats,
74 'description': video.get('description'),
75 'duration': parse_duration(feed.get('duration')),
76 'thumbnails': thumbnails,
77 'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)),
78 'subtitles': self._extract_mlb_subtitles(feed, language),
79 }
80
81
82 class MLBIE(MLBBaseIE):
83 _VALID_URL = r'''(?x)
84 https?://
85 (?:[\da-z_-]+\.)*mlb\.com/
86 (?:
87 (?:
88 (?:[^/]+/)*video/[^/]+/c-|
89 (?:
90 shared/video/embed/(?:embed|m-internal-embed)\.html|
91 (?:[^/]+/)+(?:play|index)\.jsp|
92 )\?.*?\bcontent_id=
93 )
94 (?P<id>\d+)
95 )
96 '''
97 _TESTS = [
98 {
99 'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
100 'md5': '632358dacfceec06bad823b83d21df2d',
101 'info_dict': {
102 'id': '34698933',
103 'ext': 'mp4',
104 'title': "Ackley's spectacular catch",
105 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
106 'duration': 66,
107 'timestamp': 1405995000,
108 'upload_date': '20140722',
109 'thumbnail': r're:^https?://.*\.jpg$',
110 },
111 },
112 {
113 'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
114 'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
115 'info_dict': {
116 'id': '34496663',
117 'ext': 'mp4',
118 'title': 'Stanton prepares for Derby',
119 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
120 'duration': 46,
121 'timestamp': 1405120200,
122 'upload_date': '20140711',
123 'thumbnail': r're:^https?://.*\.jpg$',
124 },
125 },
126 {
127 'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
128 'md5': '99bb9176531adc600b90880fb8be9328',
129 'info_dict': {
130 'id': '34578115',
131 'ext': 'mp4',
132 'title': 'Cespedes repeats as Derby champ',
133 'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
134 'duration': 488,
135 'timestamp': 1405414336,
136 'upload_date': '20140715',
137 'thumbnail': r're:^https?://.*\.jpg$',
138 },
139 },
140 {
141 'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
142 'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
143 'info_dict': {
144 'id': '34577915',
145 'ext': 'mp4',
146 'title': 'Bautista on Home Run Derby',
147 'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
148 'duration': 52,
149 'timestamp': 1405405122,
150 'upload_date': '20140715',
151 'thumbnail': r're:^https?://.*\.jpg$',
152 },
153 },
154 {
155 'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
156 'only_matching': True,
157 },
158 {
159 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
160 'only_matching': True,
161 },
162 {
163 'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553',
164 'only_matching': True,
165 },
166 {
167 'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
168 'only_matching': True,
169 },
170 {
171 'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
172 'only_matching': True,
173 },
174 {
175 # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
176 'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
177 'only_matching': True,
178 },
179 ]
180 _TIMESTAMP_KEY = 'date'
181
182 @staticmethod
183 def _get_feed(video):
184 return video
185
186 @staticmethod
187 def _extract_mlb_subtitles(feed, language):
188 subtitles = {}
189 for keyword in (feed.get('keywordsAll') or []):
190 keyword_type = keyword.get('type')
191 if keyword_type and keyword_type.startswith('closed_captions_location_'):
192 cc_location = keyword.get('value')
193 if cc_location:
194 subtitles.setdefault(language, []).append({
195 'url': cc_location,
196 })
197 return subtitles
198
199 def _download_video_data(self, display_id):
200 return self._download_json(
201 'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id,
202 display_id)
203
204
205 class MLBVideoIE(MLBBaseIE):
206 _VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
207 _TEST = {
208 'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
209 'md5': '632358dacfceec06bad823b83d21df2d',
210 'info_dict': {
211 'id': 'c04a8863-f569-42e6-9f87-992393657614',
212 'ext': 'mp4',
213 'title': "Ackley's spectacular catch",
214 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
215 'duration': 66,
216 'timestamp': 1405995000,
217 'upload_date': '20140722',
218 'thumbnail': r're:^https?://.+',
219 },
220 }
221 _TIMESTAMP_KEY = 'timestamp'
222
223 @classmethod
224 def suitable(cls, url):
225 return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url)
226
227 @staticmethod
228 def _get_feed(video):
229 return video['feeds'][0]
230
231 @staticmethod
232 def _extract_mlb_subtitles(feed, language):
233 subtitles = {}
234 for cc_location in (feed.get('closedCaptions') or []):
235 subtitles.setdefault(language, []).append({
236 'url': cc_location,
237 })
238
239 def _download_video_data(self, display_id):
240 # https://www.mlb.com/data-service/en/videos/[SLUG]
241 return self._download_json(
242 'https://fastball-gateway.mlb.com/graphql',
243 display_id, query={
244 'query': '''{
245 mediaPlayback(ids: "%s") {
246 description
247 feeds(types: CMS) {
248 closedCaptions
249 duration
250 image {
251 cuts {
252 width
253 height
254 src
255 }
256 }
257 playbacks {
258 name
259 url
260 }
261 }
262 id
263 timestamp
264 title
265 }
266 }''' % display_id,
267 })['data']['mediaPlayback'][0]