]>
Commit | Line | Data |
---|---|---|
632e5684 NJ |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
1cc79574 PH |
7 | from ..compat import ( |
8 | compat_urllib_parse_urlparse, | |
9 | ) | |
632e5684 NJ |
10 | from ..utils import ( |
11 | ExtractorError, | |
12 | int_or_none, | |
13 | remove_end, | |
14 | ) | |
15 | ||
16 | ||
17 | class NFLIE(InfoExtractor): | |
18 | IE_NAME = 'nfl.com' | |
5b4c5463 S |
19 | _VALID_URL = r'''(?x) |
20 | https?:// | |
21 | (?P<host> | |
22 | (?:www\.)? | |
23 | (?: | |
24 | (?: | |
25 | nfl| | |
26 | buffalobills| | |
27 | miamidolphins| | |
28 | patriots| | |
29 | newyorkjets| | |
30 | baltimoreravens| | |
31 | bengals| | |
32 | clevelandbrowns| | |
33 | steelers| | |
34 | houstontexans| | |
35 | colts| | |
36 | jaguars| | |
37 | titansonline| | |
38 | denverbroncos| | |
39 | kcchiefs| | |
40 | raiders| | |
41 | chargers| | |
42 | dallascowboys| | |
43 | giants| | |
44 | philadelphiaeagles| | |
45 | redskins| | |
46 | chicagobears| | |
47 | detroitlions| | |
48 | packers| | |
49 | vikings| | |
50 | atlantafalcons| | |
51 | panthers| | |
52 | neworleanssaints| | |
53 | buccaneers| | |
54 | azcardinals| | |
55 | stlouisrams| | |
56 | 49ers| | |
57 | seahawks | |
58 | )\.com| | |
59 | .+?\.clubs\.nfl\.com | |
60 | ) | |
61 | )/ | |
62 | (?:.+?/)* | |
4423eba4 | 63 | (?P<id>[^/#?&]+) |
5b4c5463 S |
64 | ''' |
65 | _TESTS = [{ | |
66 | 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', | |
67 | 'md5': '394ef771ddcd1354f665b471d78ec4c6', | |
68 | 'info_dict': { | |
69 | 'id': '0ap3000000398478', | |
70 | 'ext': 'mp4', | |
71 | 'title': 'Week 3: Redskins vs. Eagles highlights', | |
72 | 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', | |
73 | 'upload_date': '20140921', | |
74 | 'timestamp': 1411337580, | |
ec85ded8 | 75 | 'thumbnail': r're:^https?://.*\.jpg$', |
7ebd5376 | 76 | } |
5b4c5463 S |
77 | }, { |
78 | 'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266', | |
79 | 'md5': 'cf85bdb4bc49f6e9d3816d130c78279c', | |
80 | 'info_dict': { | |
81 | 'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266', | |
82 | 'ext': 'mp4', | |
83 | 'title': 'LIVE: Post Game vs. Browns', | |
84 | 'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8', | |
85 | 'upload_date': '20131229', | |
86 | 'timestamp': 1388354455, | |
ec85ded8 | 87 | 'thumbnail': r're:^https?://.*\.jpg$', |
5b4c5463 S |
88 | } |
89 | }, { | |
90 | 'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish', | |
91 | 'info_dict': { | |
92 | 'id': '0ap3000000467607', | |
93 | 'ext': 'mp4', | |
94 | 'title': 'Frustrations flare on the field', | |
95 | 'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.', | |
96 | 'timestamp': 1422850320, | |
97 | 'upload_date': '20150202', | |
98 | }, | |
4423eba4 S |
99 | }, { |
100 | 'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette', | |
101 | 'md5': '4c319e2f625ffd0b481b4382c6fc124c', | |
102 | 'info_dict': { | |
103 | 'id': 'n-238346', | |
104 | 'ext': 'mp4', | |
105 | 'title': '10 Days at Gillette', | |
106 | 'description': 'md5:8cd9cd48fac16de596eadc0b24add951', | |
107 | 'timestamp': 1442618809, | |
108 | 'upload_date': '20150918', | |
109 | }, | |
ee2d1902 S |
110 | }, { |
111 | # lowercase data-contentid | |
112 | 'url': 'http://www.steelers.com/news/article-1/Tomlin-on-Ben-getting-Vick-ready/56399c96-4160-48cf-a7ad-1d17d4a3aef7', | |
113 | 'info_dict': { | |
114 | 'id': '12693586-6ea9-4743-9c1c-02c59e4a5ef2', | |
115 | 'ext': 'mp4', | |
116 | 'title': 'Tomlin looks ahead to Ravens on a short week', | |
117 | 'description': 'md5:32f3f7b139f43913181d5cbb24ecad75', | |
118 | 'timestamp': 1443459651, | |
119 | 'upload_date': '20150928', | |
120 | }, | |
121 | 'params': { | |
122 | 'skip_download': True, | |
123 | }, | |
5b4c5463 S |
124 | }, { |
125 | 'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood', | |
126 | 'only_matching': True, | |
127 | }, { | |
128 | 'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a', | |
129 | 'only_matching': True, | |
130 | }] | |
5f4c3188 NJ |
131 | |
132 | @staticmethod | |
133 | def prepend_host(host, url): | |
134 | if not url.startswith('http'): | |
135 | if not url.startswith('/'): | |
136 | url = '/%s' % url | |
137 | url = 'http://{0:}{1:}'.format(host, url) | |
138 | return url | |
139 | ||
140 | @staticmethod | |
141 | def format_from_stream(stream, protocol, host, path_prefix='', | |
142 | preference=0, note=None): | |
143 | url = '{protocol:}://{host:}/{prefix:}{path:}'.format( | |
144 | protocol=protocol, | |
145 | host=host, | |
146 | prefix=path_prefix, | |
147 | path=stream.get('path'), | |
148 | ) | |
149 | return { | |
150 | 'url': url, | |
151 | 'vbr': int_or_none(stream.get('rate', 0), 1000), | |
152 | 'preference': preference, | |
153 | 'format_note': note, | |
632e5684 | 154 | } |
632e5684 NJ |
155 | |
156 | def _real_extract(self, url): | |
157 | mobj = re.match(self._VALID_URL, url) | |
5f4c3188 | 158 | video_id, host = mobj.group('id'), mobj.group('host') |
632e5684 | 159 | |
5f4c3188 | 160 | webpage = self._download_webpage(url, video_id) |
632e5684 | 161 | |
5f4c3188 | 162 | config_url = NFLIE.prepend_host(host, self._search_regex( |
4423eba4 S |
163 | r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1', |
164 | webpage, 'config URL', default='static/content/static/config/video/config.json', | |
165 | group='config')) | |
a57b562c JMF |
166 | # For articles, the id in the url is not the video id |
167 | video_id = self._search_regex( | |
490b7557 | 168 | r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1', |
4423eba4 S |
169 | webpage, 'video id', default=video_id, group='id') |
170 | config = self._download_json(config_url, video_id, 'Downloading player config') | |
5f4c3188 NJ |
171 | url_template = NFLIE.prepend_host( |
172 | host, '{contentURLTemplate:}'.format(**config)) | |
173 | video_data = self._download_json( | |
174 | url_template.format(id=video_id), video_id) | |
632e5684 NJ |
175 | |
176 | formats = [] | |
5f4c3188 NJ |
177 | cdn_data = video_data.get('cdnData', {}) |
178 | streams = cdn_data.get('bitrateInfo', []) | |
179 | if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM': | |
f8fb4a7c | 180 | parts = compat_urllib_parse_urlparse(cdn_data.get('uri')) |
5f4c3188 | 181 | protocol, host = parts.scheme, parts.netloc |
632e5684 | 182 | for stream in streams: |
5f4c3188 NJ |
183 | formats.append( |
184 | NFLIE.format_from_stream(stream, protocol, host)) | |
185 | else: | |
186 | cdns = config.get('cdns') | |
187 | if not cdns: | |
188 | raise ExtractorError('Failed to get CDN data', expected=True) | |
189 | ||
190 | for name, cdn in cdns.items(): | |
191 | # LimeLight streams don't seem to work | |
192 | if cdn.get('name') == 'LIMELIGHT': | |
193 | continue | |
194 | ||
195 | protocol = cdn.get('protocol') | |
196 | host = remove_end(cdn.get('host', ''), '/') | |
197 | if not (protocol and host): | |
632e5684 NJ |
198 | continue |
199 | ||
5f4c3188 NJ |
200 | prefix = cdn.get('pathprefix', '') |
201 | if prefix and not prefix.endswith('/'): | |
202 | prefix = '%s/' % prefix | |
203 | ||
204 | preference = 0 | |
205 | if protocol == 'rtmp': | |
206 | preference = -2 | |
207 | elif 'prog' in name.lower(): | |
208 | preference = 1 | |
209 | ||
210 | for stream in streams: | |
211 | formats.append( | |
212 | NFLIE.format_from_stream(stream, protocol, host, | |
213 | prefix, preference, name)) | |
632e5684 NJ |
214 | |
215 | self._sort_formats(formats) | |
216 | ||
217 | thumbnail = None | |
218 | for q in ('xl', 'l', 'm', 's', 'xs'): | |
219 | thumbnail = video_data.get('imagePaths', {}).get(q) | |
220 | if thumbnail: | |
221 | break | |
222 | ||
223 | return { | |
224 | 'id': video_id, | |
dfee8323 | 225 | 'title': video_data.get('headline'), |
632e5684 NJ |
226 | 'formats': formats, |
227 | 'description': video_data.get('caption'), | |
228 | 'duration': video_data.get('duration'), | |
229 | 'thumbnail': thumbnail, | |
230 | 'timestamp': int_or_none(video_data.get('posted'), 1000), | |
231 | } |