]>
Commit | Line | Data |
---|---|---|
b1b01841 CC |
1 | from __future__ import unicode_literals |
2 | ||
421a4595 | 3 | import re |
b1b01841 | 4 | |
421a4595 | 5 | from .common import InfoExtractor |
6 | from ..utils import ( | |
7 | determine_ext, | |
8 | int_or_none, | |
9 | parse_duration, | |
10 | parse_iso8601, | |
11 | try_get, | |
12 | ) | |
b1b01841 | 13 | |
421a4595 | 14 | |
15 | class MLBBaseIE(InfoExtractor): | |
16 | def _real_extract(self, url): | |
17 | display_id = self._match_id(url) | |
18 | video = self._download_video_data(display_id) | |
19 | video_id = video['id'] | |
20 | title = video['title'] | |
21 | feed = self._get_feed(video) | |
22 | ||
23 | formats = [] | |
24 | for playback in (feed.get('playbacks') or []): | |
25 | playback_url = playback.get('url') | |
26 | if not playback_url: | |
27 | continue | |
28 | name = playback.get('name') | |
29 | ext = determine_ext(playback_url) | |
30 | if ext == 'm3u8': | |
31 | formats.extend(self._extract_m3u8_formats( | |
32 | playback_url, video_id, 'mp4', | |
33 | 'm3u8_native', m3u8_id=name, fatal=False)) | |
34 | else: | |
35 | f = { | |
36 | 'format_id': name, | |
37 | 'url': playback_url, | |
38 | } | |
39 | mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name) | |
40 | if mobj: | |
41 | f.update({ | |
42 | 'height': int(mobj.group(3)), | |
43 | 'tbr': int(mobj.group(1)), | |
44 | 'width': int(mobj.group(2)), | |
45 | }) | |
46 | mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url) | |
47 | if mobj: | |
48 | f.update({ | |
49 | 'fps': int(mobj.group(3)), | |
50 | 'height': int(mobj.group(2)), | |
51 | 'tbr': int(mobj.group(4)), | |
52 | 'width': int(mobj.group(1)), | |
53 | }) | |
54 | formats.append(f) | |
55 | self._sort_formats(formats) | |
56 | ||
57 | thumbnails = [] | |
58 | for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []): | |
59 | src = cut.get('src') | |
60 | if not src: | |
61 | continue | |
62 | thumbnails.append({ | |
63 | 'height': int_or_none(cut.get('height')), | |
64 | 'url': src, | |
65 | 'width': int_or_none(cut.get('width')), | |
66 | }) | |
67 | ||
68 | language = (video.get('language') or 'EN').lower() | |
69 | ||
70 | return { | |
71 | 'id': video_id, | |
72 | 'title': title, | |
73 | 'formats': formats, | |
74 | 'description': video.get('description'), | |
75 | 'duration': parse_duration(feed.get('duration')), | |
76 | 'thumbnails': thumbnails, | |
77 | 'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)), | |
78 | 'subtitles': self._extract_mlb_subtitles(feed, language), | |
79 | } | |
80 | ||
81 | ||
82 | class MLBIE(MLBBaseIE): | |
d1feb308 S |
83 | _VALID_URL = r'''(?x) |
84 | https?:// | |
421a4595 | 85 | (?:[\da-z_-]+\.)*mlb\.com/ |
d1feb308 S |
86 | (?: |
87 | (?: | |
421a4595 | 88 | (?:[^/]+/)*video/[^/]+/c-| |
d1feb308 S |
89 | (?: |
90 | shared/video/embed/(?:embed|m-internal-embed)\.html| | |
3800b908 | 91 | (?:[^/]+/)+(?:play|index)\.jsp| |
d1feb308 S |
92 | )\?.*?\bcontent_id= |
93 | ) | |
acca2ac7 | 94 | (?P<id>\d+) |
d1feb308 S |
95 | ) |
96 | ''' | |
7bb49d10 | 97 | _TESTS = [ |
07cc63f3 | 98 | { |
acca2ac7 RA |
99 | 'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933', |
100 | 'md5': '632358dacfceec06bad823b83d21df2d', | |
07cc63f3 CC |
101 | 'info_dict': { |
102 | 'id': '34698933', | |
103 | 'ext': 'mp4', | |
104 | 'title': "Ackley's spectacular catch", | |
105 | 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', | |
106 | 'duration': 66, | |
acca2ac7 RA |
107 | 'timestamp': 1405995000, |
108 | 'upload_date': '20140722', | |
ec85ded8 | 109 | 'thumbnail': r're:^https?://.*\.jpg$', |
07cc63f3 CC |
110 | }, |
111 | }, | |
7bb49d10 | 112 | { |
acca2ac7 RA |
113 | 'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663', |
114 | 'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f', | |
7bb49d10 S |
115 | 'info_dict': { |
116 | 'id': '34496663', | |
117 | 'ext': 'mp4', | |
118 | 'title': 'Stanton prepares for Derby', | |
119 | 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57', | |
120 | 'duration': 46, | |
acca2ac7 | 121 | 'timestamp': 1405120200, |
7bb49d10 | 122 | 'upload_date': '20140711', |
ec85ded8 | 123 | 'thumbnail': r're:^https?://.*\.jpg$', |
7bb49d10 | 124 | }, |
b1b01841 | 125 | }, |
7bb49d10 | 126 | { |
acca2ac7 RA |
127 | 'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115', |
128 | 'md5': '99bb9176531adc600b90880fb8be9328', | |
7bb49d10 S |
129 | 'info_dict': { |
130 | 'id': '34578115', | |
131 | 'ext': 'mp4', | |
132 | 'title': 'Cespedes repeats as Derby champ', | |
133 | 'description': 'md5:08df253ce265d4cf6fb09f581fafad07', | |
134 | 'duration': 488, | |
acca2ac7 | 135 | 'timestamp': 1405414336, |
7bb49d10 | 136 | 'upload_date': '20140715', |
ec85ded8 | 137 | 'thumbnail': r're:^https?://.*\.jpg$', |
7bb49d10 S |
138 | }, |
139 | }, | |
140 | { | |
acca2ac7 RA |
141 | 'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915', |
142 | 'md5': 'da8b57a12b060e7663ee1eebd6f330ec', | |
7bb49d10 S |
143 | 'info_dict': { |
144 | 'id': '34577915', | |
145 | 'ext': 'mp4', | |
146 | 'title': 'Bautista on Home Run Derby', | |
147 | 'description': 'md5:b80b34031143d0986dddc64a8839f0fb', | |
148 | 'duration': 52, | |
acca2ac7 | 149 | 'timestamp': 1405405122, |
7bb49d10 | 150 | 'upload_date': '20140715', |
ec85ded8 | 151 | 'thumbnail': r're:^https?://.*\.jpg$', |
7bb49d10 S |
152 | }, |
153 | }, | |
11a6793f MC |
154 | { |
155 | 'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694', | |
156 | 'only_matching': True, | |
157 | }, | |
1a94ff68 S |
158 | { |
159 | 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', | |
160 | 'only_matching': True, | |
161 | }, | |
b2a68d14 S |
162 | { |
163 | 'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553', | |
164 | 'only_matching': True, | |
165 | }, | |
166 | { | |
167 | 'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553', | |
168 | 'only_matching': True, | |
169 | }, | |
9f790b99 | 170 | { |
acca2ac7 | 171 | 'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783', |
9f790b99 | 172 | 'only_matching': True, |
3e7202c1 YCH |
173 | }, |
174 | { | |
175 | # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer | |
176 | 'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb', | |
177 | 'only_matching': True, | |
3800b908 | 178 | }, |
7bb49d10 | 179 | ] |
421a4595 | 180 | _TIMESTAMP_KEY = 'date' |
181 | ||
182 | @staticmethod | |
183 | def _get_feed(video): | |
184 | return video | |
185 | ||
186 | @staticmethod | |
187 | def _extract_mlb_subtitles(feed, language): | |
188 | subtitles = {} | |
189 | for keyword in (feed.get('keywordsAll') or []): | |
190 | keyword_type = keyword.get('type') | |
191 | if keyword_type and keyword_type.startswith('closed_captions_location_'): | |
192 | cc_location = keyword.get('value') | |
193 | if cc_location: | |
194 | subtitles.setdefault(language, []).append({ | |
195 | 'url': cc_location, | |
196 | }) | |
197 | return subtitles | |
198 | ||
199 | def _download_video_data(self, display_id): | |
200 | return self._download_json( | |
201 | 'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id, | |
202 | display_id) | |
203 | ||
204 | ||
205 | class MLBVideoIE(MLBBaseIE): | |
206 | _VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)' | |
207 | _TEST = { | |
208 | 'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933', | |
209 | 'md5': '632358dacfceec06bad823b83d21df2d', | |
210 | 'info_dict': { | |
211 | 'id': 'c04a8863-f569-42e6-9f87-992393657614', | |
212 | 'ext': 'mp4', | |
213 | 'title': "Ackley's spectacular catch", | |
214 | 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', | |
215 | 'duration': 66, | |
216 | 'timestamp': 1405995000, | |
217 | 'upload_date': '20140722', | |
218 | 'thumbnail': r're:^https?://.+', | |
219 | }, | |
220 | } | |
221 | _TIMESTAMP_KEY = 'timestamp' | |
222 | ||
223 | @classmethod | |
224 | def suitable(cls, url): | |
225 | return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url) | |
226 | ||
227 | @staticmethod | |
228 | def _get_feed(video): | |
229 | return video['feeds'][0] | |
230 | ||
231 | @staticmethod | |
232 | def _extract_mlb_subtitles(feed, language): | |
233 | subtitles = {} | |
234 | for cc_location in (feed.get('closedCaptions') or []): | |
235 | subtitles.setdefault(language, []).append({ | |
236 | 'url': cc_location, | |
237 | }) | |
238 | ||
239 | def _download_video_data(self, display_id): | |
240 | # https://www.mlb.com/data-service/en/videos/[SLUG] | |
241 | return self._download_json( | |
242 | 'https://fastball-gateway.mlb.com/graphql', | |
243 | display_id, query={ | |
244 | 'query': '''{ | |
245 | mediaPlayback(ids: "%s") { | |
246 | description | |
247 | feeds(types: CMS) { | |
248 | closedCaptions | |
249 | duration | |
250 | image { | |
251 | cuts { | |
252 | width | |
253 | height | |
254 | src | |
255 | } | |
256 | } | |
257 | playbacks { | |
258 | name | |
259 | url | |
260 | } | |
261 | } | |
262 | id | |
263 | timestamp | |
264 | title | |
265 | } | |
266 | }''' % display_id, | |
267 | })['data']['mediaPlayback'][0] |