]>
Commit | Line | Data |
---|---|---|
b1b01841 CC |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
7bb49d10 S |
6 | from ..utils import ( |
7 | parse_duration, | |
8 | parse_iso8601, | |
7bb49d10 | 9 | ) |
b1b01841 CC |
10 | |
11 | ||
7bb49d10 | 12 | class MLBIE(InfoExtractor): |
b2a68d14 | 13 | _VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)' |
7bb49d10 | 14 | _TESTS = [ |
07cc63f3 CC |
15 | { |
16 | 'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea', | |
17 | 'md5': 'ff56a598c2cf411a9a38a69709e97079', | |
18 | 'info_dict': { | |
19 | 'id': '34698933', | |
20 | 'ext': 'mp4', | |
21 | 'title': "Ackley's spectacular catch", | |
22 | 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', | |
23 | 'duration': 66, | |
24 | 'timestamp': 1405980600, | |
25 | 'upload_date': '20140721', | |
26 | 'thumbnail': 're:^https?://.*\.jpg$', | |
27 | }, | |
28 | }, | |
7bb49d10 S |
29 | { |
30 | 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', | |
31 | 'md5': 'd9c022c10d21f849f49c05ae12a8a7e9', | |
32 | 'info_dict': { | |
33 | 'id': '34496663', | |
34 | 'ext': 'mp4', | |
35 | 'title': 'Stanton prepares for Derby', | |
36 | 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57', | |
37 | 'duration': 46, | |
38 | 'timestamp': 1405105800, | |
39 | 'upload_date': '20140711', | |
40 | 'thumbnail': 're:^https?://.*\.jpg$', | |
41 | }, | |
b1b01841 | 42 | }, |
7bb49d10 S |
43 | { |
44 | 'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby', | |
45 | 'md5': '0e6e73d509321e142409b695eadd541f', | |
46 | 'info_dict': { | |
47 | 'id': '34578115', | |
48 | 'ext': 'mp4', | |
49 | 'title': 'Cespedes repeats as Derby champ', | |
50 | 'description': 'md5:08df253ce265d4cf6fb09f581fafad07', | |
51 | 'duration': 488, | |
52 | 'timestamp': 1405399936, | |
53 | 'upload_date': '20140715', | |
54 | 'thumbnail': 're:^https?://.*\.jpg$', | |
55 | }, | |
56 | }, | |
57 | { | |
58 | 'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance', | |
59 | 'md5': 'b8fd237347b844365d74ea61d4245967', | |
60 | 'info_dict': { | |
61 | 'id': '34577915', | |
62 | 'ext': 'mp4', | |
63 | 'title': 'Bautista on Home Run Derby', | |
64 | 'description': 'md5:b80b34031143d0986dddc64a8839f0fb', | |
65 | 'duration': 52, | |
66 | 'timestamp': 1405390722, | |
67 | 'upload_date': '20140715', | |
68 | 'thumbnail': 're:^https?://.*\.jpg$', | |
69 | }, | |
70 | }, | |
1a94ff68 S |
71 | { |
72 | 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', | |
73 | 'only_matching': True, | |
74 | }, | |
b2a68d14 S |
75 | { |
76 | 'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553', | |
77 | 'only_matching': True, | |
78 | }, | |
79 | { | |
80 | 'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553', | |
81 | 'only_matching': True, | |
82 | }, | |
7bb49d10 | 83 | ] |
b1b01841 CC |
84 | |
85 | def _real_extract(self, url): | |
86 | mobj = re.match(self._VALID_URL, url) | |
87 | video_id = mobj.group('id') | |
88 | ||
7bb49d10 S |
89 | detail = self._download_xml( |
90 | 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml' | |
91 | % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id) | |
92 | ||
93 | title = detail.find('./headline').text | |
94 | description = detail.find('./big-blurb').text | |
95 | duration = parse_duration(detail.find('./duration').text) | |
96 | timestamp = parse_iso8601(detail.attrib['date'][:-5]) | |
97 | ||
385009fc S |
98 | thumbnails = [{ |
99 | 'url': thumbnail.text, | |
100 | } for thumbnail in detail.findall('./thumbnailScenarios/thumbnailScenario')] | |
b1b01841 | 101 | |
7bb49d10 S |
102 | formats = [] |
103 | for media_url in detail.findall('./url'): | |
104 | playback_scenario = media_url.attrib['playback_scenario'] | |
105 | fmt = { | |
106 | 'url': media_url.text, | |
107 | 'format_id': playback_scenario, | |
108 | } | |
109 | m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario) | |
110 | if m: | |
111 | fmt.update({ | |
112 | 'vbr': int(m.group('vbr')) * 1000, | |
113 | 'width': int(m.group('width')), | |
114 | 'height': int(m.group('height')), | |
115 | }) | |
116 | formats.append(fmt) | |
172240c0 | 117 | |
7bb49d10 | 118 | self._sort_formats(formats) |
172240c0 | 119 | |
b1b01841 CC |
120 | return { |
121 | 'id': video_id, | |
b1b01841 | 122 | 'title': title, |
b1b01841 | 123 | 'description': description, |
7bb49d10 S |
124 | 'duration': duration, |
125 | 'timestamp': timestamp, | |
126 | 'formats': formats, | |
385009fc | 127 | 'thumbnails': thumbnails, |
b1b01841 | 128 | } |