]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .adobepass import AdobePassIE | |
7 | from ..compat import compat_str | |
8 | from ..utils import ( | |
9 | fix_xml_ampersands, | |
10 | xpath_text, | |
11 | int_or_none, | |
12 | determine_ext, | |
13 | float_or_none, | |
14 | parse_duration, | |
15 | xpath_attr, | |
16 | update_url_query, | |
17 | ExtractorError, | |
18 | strip_or_none, | |
19 | url_or_none, | |
20 | ) | |
21 | ||
22 | ||
23 | class TurnerBaseIE(AdobePassIE): | |
24 | _AKAMAI_SPE_TOKEN_CACHE = {} | |
25 | ||
26 | def _extract_timestamp(self, video_data): | |
27 | return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) | |
28 | ||
29 | def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None): | |
30 | secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' | |
31 | token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path) | |
32 | if not token: | |
33 | query = { | |
34 | 'path': secure_path, | |
35 | } | |
36 | if custom_tokenizer_query: | |
37 | query.update(custom_tokenizer_query) | |
38 | else: | |
39 | query['videoId'] = content_id | |
40 | if ap_data.get('auth_required'): | |
41 | query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name']) | |
42 | auth = self._download_xml( | |
43 | tokenizer_src, content_id, query=query) | |
44 | error_msg = xpath_text(auth, 'error/msg') | |
45 | if error_msg: | |
46 | raise ExtractorError(error_msg, expected=True) | |
47 | token = xpath_text(auth, 'token') | |
48 | if not token: | |
49 | return video_url | |
50 | self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token | |
51 | return video_url + '?hdnea=' + token | |
52 | ||
53 | def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal=False): | |
54 | video_data = self._download_xml( | |
55 | data_src, video_id, | |
56 | transform_source=lambda s: fix_xml_ampersands(s).strip(), | |
57 | fatal=fatal) | |
58 | if not video_data: | |
59 | return {} | |
60 | video_id = video_data.attrib['id'] | |
61 | title = xpath_text(video_data, 'headline', fatal=True) | |
62 | content_id = xpath_text(video_data, 'contentId') or video_id | |
63 | # rtmp_src = xpath_text(video_data, 'akamai/src') | |
64 | # if rtmp_src: | |
65 | # split_rtmp_src = rtmp_src.split(',') | |
66 | # if len(split_rtmp_src) == 2: | |
67 | # rtmp_src = split_rtmp_src[1] | |
68 | # aifp = xpath_text(video_data, 'akamai/aifp', default='') | |
69 | ||
70 | urls = [] | |
71 | formats = [] | |
72 | thumbnails = [] | |
73 | subtitles = {} | |
74 | rex = re.compile( | |
75 | r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?') | |
76 | # Possible formats locations: files/file, files/groupFiles/files | |
77 | # and maybe others | |
78 | for video_file in video_data.findall('.//file'): | |
79 | video_url = url_or_none(video_file.text.strip()) | |
80 | if not video_url: | |
81 | continue | |
82 | ext = determine_ext(video_url) | |
83 | if video_url.startswith('/mp4:protected/'): | |
84 | continue | |
85 | # TODO Correct extraction for these files | |
86 | # protected_path_data = path_data.get('protected') | |
87 | # if not protected_path_data or not rtmp_src: | |
88 | # continue | |
89 | # protected_path = self._search_regex( | |
90 | # r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path') | |
91 | # auth = self._download_webpage( | |
92 | # protected_path_data['tokenizer_src'], query={ | |
93 | # 'path': protected_path, | |
94 | # 'videoId': content_id, | |
95 | # 'aifp': aifp, | |
96 | # }) | |
97 | # token = xpath_text(auth, 'token') | |
98 | # if not token: | |
99 | # continue | |
100 | # video_url = rtmp_src + video_url + '?' + token | |
101 | elif video_url.startswith('/secure/'): | |
102 | secure_path_data = path_data.get('secure') | |
103 | if not secure_path_data: | |
104 | continue | |
105 | video_url = self._add_akamai_spe_token( | |
106 | secure_path_data['tokenizer_src'], | |
107 | secure_path_data['media_src'] + video_url, | |
108 | content_id, ap_data) | |
109 | elif not re.match('https?://', video_url): | |
110 | base_path_data = path_data.get(ext, path_data.get('default', {})) | |
111 | media_src = base_path_data.get('media_src') | |
112 | if not media_src: | |
113 | continue | |
114 | video_url = media_src + video_url | |
115 | if video_url in urls: | |
116 | continue | |
117 | urls.append(video_url) | |
118 | format_id = video_file.get('bitrate') | |
119 | if ext in ('scc', 'srt', 'vtt'): | |
120 | subtitles.setdefault('en', []).append({ | |
121 | 'ext': ext, | |
122 | 'url': video_url, | |
123 | }) | |
124 | elif ext == 'png': | |
125 | thumbnails.append({ | |
126 | 'id': format_id, | |
127 | 'url': video_url, | |
128 | }) | |
129 | elif ext == 'smil': | |
130 | formats.extend(self._extract_smil_formats( | |
131 | video_url, video_id, fatal=False)) | |
132 | elif re.match(r'https?://[^/]+\.akamaihd\.net/[iz]/', video_url): | |
133 | formats.extend(self._extract_akamai_formats( | |
134 | video_url, video_id, { | |
135 | 'hds': path_data.get('f4m', {}).get('host'), | |
136 | # nba.cdn.turner.com, ht.cdn.turner.com, ht2.cdn.turner.com | |
137 | # ht3.cdn.turner.com, i.cdn.turner.com, s.cdn.turner.com | |
138 | # ssl.cdn.turner.com | |
139 | 'http': 'pmd.cdn.turner.com', | |
140 | })) | |
141 | elif ext == 'm3u8': | |
142 | m3u8_formats = self._extract_m3u8_formats( | |
143 | video_url, video_id, 'mp4', | |
144 | m3u8_id=format_id or 'hls', fatal=False) | |
145 | if '/secure/' in video_url and '?hdnea=' in video_url: | |
146 | for f in m3u8_formats: | |
147 | f['_seekable'] = False | |
148 | formats.extend(m3u8_formats) | |
149 | elif ext == 'f4m': | |
150 | formats.extend(self._extract_f4m_formats( | |
151 | update_url_query(video_url, {'hdcore': '3.7.0'}), | |
152 | video_id, f4m_id=format_id or 'hds', fatal=False)) | |
153 | else: | |
154 | f = { | |
155 | 'format_id': format_id, | |
156 | 'url': video_url, | |
157 | 'ext': ext, | |
158 | } | |
159 | mobj = rex.search(video_url) | |
160 | if mobj: | |
161 | f.update({ | |
162 | 'width': int(mobj.group('width')), | |
163 | 'height': int(mobj.group('height')), | |
164 | 'tbr': int_or_none(mobj.group('bitrate')), | |
165 | }) | |
166 | elif isinstance(format_id, compat_str): | |
167 | if format_id.isdigit(): | |
168 | f['tbr'] = int(format_id) | |
169 | else: | |
170 | mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) | |
171 | if mobj: | |
172 | if mobj.group(1) == 'audio': | |
173 | f.update({ | |
174 | 'vcodec': 'none', | |
175 | 'ext': 'm4a', | |
176 | }) | |
177 | else: | |
178 | f['tbr'] = int(mobj.group(1)) | |
179 | formats.append(f) | |
180 | self._sort_formats(formats) | |
181 | ||
182 | for source in video_data.findall('closedCaptions/source'): | |
183 | for track in source.findall('track'): | |
184 | track_url = url_or_none(track.get('url')) | |
185 | if not track_url or track_url.endswith('/big'): | |
186 | continue | |
187 | lang = track.get('lang') or track.get('label') or 'en' | |
188 | subtitles.setdefault(lang, []).append({ | |
189 | 'url': track_url, | |
190 | 'ext': { | |
191 | 'scc': 'scc', | |
192 | 'webvtt': 'vtt', | |
193 | 'smptett': 'tt', | |
194 | }.get(source.get('format')) | |
195 | }) | |
196 | ||
197 | thumbnails.extend({ | |
198 | 'id': image.get('cut') or image.get('name'), | |
199 | 'url': image.text, | |
200 | 'width': int_or_none(image.get('width')), | |
201 | 'height': int_or_none(image.get('height')), | |
202 | } for image in video_data.findall('images/image')) | |
203 | ||
204 | is_live = xpath_text(video_data, 'isLive') == 'true' | |
205 | ||
206 | return { | |
207 | 'id': video_id, | |
208 | 'title': self._live_title(title) if is_live else title, | |
209 | 'formats': formats, | |
210 | 'subtitles': subtitles, | |
211 | 'thumbnails': thumbnails, | |
212 | 'thumbnail': xpath_text(video_data, 'poster'), | |
213 | 'description': strip_or_none(xpath_text(video_data, 'description')), | |
214 | 'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')), | |
215 | 'timestamp': self._extract_timestamp(video_data), | |
216 | 'upload_date': xpath_attr(video_data, 'metas', 'version'), | |
217 | 'series': xpath_text(video_data, 'showTitle'), | |
218 | 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), | |
219 | 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), | |
220 | 'is_live': is_live, | |
221 | } | |
222 | ||
223 | def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None): | |
224 | is_live = ap_data.get('is_live') | |
225 | streams_data = self._download_json( | |
226 | 'http://medium.ngtv.io/media/%s/tv' % media_id, | |
227 | media_id)['media']['tv'] | |
228 | duration = None | |
229 | chapters = [] | |
230 | formats = [] | |
231 | for supported_type in ('unprotected', 'bulkaes'): | |
232 | stream_data = streams_data.get(supported_type, {}) | |
233 | m3u8_url = stream_data.get('secureUrl') or stream_data.get('url') | |
234 | if not m3u8_url: | |
235 | continue | |
236 | if stream_data.get('playlistProtection') == 'spe': | |
237 | m3u8_url = self._add_akamai_spe_token( | |
238 | 'http://token.ngtv.io/token/token_spe', | |
239 | m3u8_url, media_id, ap_data or {}, tokenizer_query) | |
240 | formats.extend(self._extract_m3u8_formats( | |
241 | m3u8_url, media_id, 'mp4', m3u8_id='hls', live=is_live, fatal=False)) | |
242 | ||
243 | duration = float_or_none(stream_data.get('totalRuntime')) | |
244 | ||
245 | if not chapters and not is_live: | |
246 | for chapter in stream_data.get('contentSegments', []): | |
247 | start_time = float_or_none(chapter.get('start')) | |
248 | chapter_duration = float_or_none(chapter.get('duration')) | |
249 | if start_time is None or chapter_duration is None: | |
250 | continue | |
251 | chapters.append({ | |
252 | 'start_time': start_time, | |
253 | 'end_time': start_time + chapter_duration, | |
254 | }) | |
255 | self._sort_formats(formats) | |
256 | ||
257 | return { | |
258 | 'formats': formats, | |
259 | 'chapters': chapters, | |
260 | 'duration': duration, | |
261 | } |