]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | import re | |
4 | import io | |
5 | import binascii | |
6 | ||
7 | from ..downloader import _get_real_downloader | |
8 | from .fragment import FragmentFD, can_decrypt_frag | |
9 | from .external import FFmpegFD | |
10 | ||
11 | from ..compat import ( | |
12 | compat_urlparse, | |
13 | ) | |
14 | from ..utils import ( | |
15 | parse_m3u8_attributes, | |
16 | update_url_query, | |
17 | bug_reports_message, | |
18 | ) | |
19 | from .. import webvtt | |
20 | ||
21 | ||
22 | class HlsFD(FragmentFD): | |
23 | """ | |
24 | Download segments in a m3u8 manifest. External downloaders can take over | |
25 | the fragment downloads by supporting the 'm3u8_frag_urls' protocol and | |
26 | re-defining 'supports_manifest' function | |
27 | """ | |
28 | ||
29 | FD_NAME = 'hlsnative' | |
30 | ||
31 | @staticmethod | |
32 | def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag): | |
33 | UNSUPPORTED_FEATURES = [ | |
34 | # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] | |
35 | ||
36 | # Live streams heuristic does not always work (e.g. geo restricted to Germany | |
37 | # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) | |
38 | # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] | |
39 | ||
40 | # This heuristic also is not correct since segments may not be appended as well. | |
41 | # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite | |
42 | # no segments will definitely be appended to the end of the playlist. | |
43 | # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of | |
44 | # # event media playlists [4] | |
45 | # r'#EXT-X-MAP:', # media initialization [5] | |
46 | # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 | |
47 | # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 | |
48 | # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | |
49 | # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 | |
50 | # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 | |
51 | ] | |
52 | if not allow_unplayable_formats: | |
53 | UNSUPPORTED_FEATURES += [ | |
54 | r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] | |
55 | ] | |
56 | ||
57 | def check_results(): | |
58 | yield not info_dict.get('is_live') | |
59 | is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest | |
60 | yield with_crypto or not is_aes128_enc | |
61 | yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest) | |
62 | for feature in UNSUPPORTED_FEATURES: | |
63 | yield not re.search(feature, manifest) | |
64 | return all(check_results()) | |
65 | ||
66 | def real_download(self, filename, info_dict): | |
67 | man_url = info_dict['url'] | |
68 | self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) | |
69 | ||
70 | urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) | |
71 | man_url = urlh.geturl() | |
72 | s = urlh.read().decode('utf-8', 'ignore') | |
73 | ||
74 | if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')): | |
75 | if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): | |
76 | self.report_error('pycryptodome not found. Please install') | |
77 | return False | |
78 | if self.can_download(s, info_dict, with_crypto=True): | |
79 | self.report_warning('pycryptodome is needed to download this file natively') | |
80 | fd = FFmpegFD(self.ydl, self.params) | |
81 | self.report_warning( | |
82 | '%s detected unsupported features; extraction will be delegated to %s' % (self.FD_NAME, fd.get_basename())) | |
83 | # TODO: Make progress updates work without hooking twice | |
84 | # for ph in self._progress_hooks: | |
85 | # fd.add_progress_hook(ph) | |
86 | return fd.real_download(filename, info_dict) | |
87 | ||
88 | is_webvtt = info_dict['ext'] == 'vtt' | |
89 | if is_webvtt: | |
90 | real_downloader = None # Packing the fragments is not currently supported for external downloader | |
91 | else: | |
92 | real_downloader = _get_real_downloader(info_dict, 'm3u8_frag_urls', self.params, None) | |
93 | if real_downloader and not real_downloader.supports_manifest(s): | |
94 | real_downloader = None | |
95 | if real_downloader: | |
96 | self.to_screen( | |
97 | '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) | |
98 | ||
99 | def is_ad_fragment_start(s): | |
100 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s | |
101 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) | |
102 | ||
103 | def is_ad_fragment_end(s): | |
104 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s | |
105 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) | |
106 | ||
107 | fragments = [] | |
108 | ||
109 | media_frags = 0 | |
110 | ad_frags = 0 | |
111 | ad_frag_next = False | |
112 | for line in s.splitlines(): | |
113 | line = line.strip() | |
114 | if not line: | |
115 | continue | |
116 | if line.startswith('#'): | |
117 | if is_ad_fragment_start(line): | |
118 | ad_frag_next = True | |
119 | elif is_ad_fragment_end(line): | |
120 | ad_frag_next = False | |
121 | continue | |
122 | if ad_frag_next: | |
123 | ad_frags += 1 | |
124 | continue | |
125 | media_frags += 1 | |
126 | ||
127 | ctx = { | |
128 | 'filename': filename, | |
129 | 'total_frags': media_frags, | |
130 | 'ad_frags': ad_frags, | |
131 | } | |
132 | ||
133 | if real_downloader: | |
134 | self._prepare_external_frag_download(ctx) | |
135 | else: | |
136 | self._prepare_and_start_frag_download(ctx, info_dict) | |
137 | ||
138 | extra_state = ctx.setdefault('extra_state', {}) | |
139 | ||
140 | format_index = info_dict.get('format_index') | |
141 | extra_query = None | |
142 | extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') | |
143 | if extra_param_to_segment_url: | |
144 | extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) | |
145 | i = 0 | |
146 | media_sequence = 0 | |
147 | decrypt_info = {'METHOD': 'NONE'} | |
148 | byte_range = {} | |
149 | discontinuity_count = 0 | |
150 | frag_index = 0 | |
151 | ad_frag_next = False | |
152 | for line in s.splitlines(): | |
153 | line = line.strip() | |
154 | if line: | |
155 | if not line.startswith('#'): | |
156 | if format_index and discontinuity_count != format_index: | |
157 | continue | |
158 | if ad_frag_next: | |
159 | continue | |
160 | frag_index += 1 | |
161 | if frag_index <= ctx['fragment_index']: | |
162 | continue | |
163 | frag_url = ( | |
164 | line | |
165 | if re.match(r'^https?://', line) | |
166 | else compat_urlparse.urljoin(man_url, line)) | |
167 | if extra_query: | |
168 | frag_url = update_url_query(frag_url, extra_query) | |
169 | ||
170 | fragments.append({ | |
171 | 'frag_index': frag_index, | |
172 | 'url': frag_url, | |
173 | 'decrypt_info': decrypt_info, | |
174 | 'byte_range': byte_range, | |
175 | 'media_sequence': media_sequence, | |
176 | }) | |
177 | ||
178 | elif line.startswith('#EXT-X-MAP'): | |
179 | if format_index and discontinuity_count != format_index: | |
180 | continue | |
181 | if frag_index > 0: | |
182 | self.report_error( | |
183 | 'Initialization fragment found after media fragments, unable to download') | |
184 | return False | |
185 | frag_index += 1 | |
186 | map_info = parse_m3u8_attributes(line[11:]) | |
187 | frag_url = ( | |
188 | map_info.get('URI') | |
189 | if re.match(r'^https?://', map_info.get('URI')) | |
190 | else compat_urlparse.urljoin(man_url, map_info.get('URI'))) | |
191 | if extra_query: | |
192 | frag_url = update_url_query(frag_url, extra_query) | |
193 | ||
194 | fragments.append({ | |
195 | 'frag_index': frag_index, | |
196 | 'url': frag_url, | |
197 | 'decrypt_info': decrypt_info, | |
198 | 'byte_range': byte_range, | |
199 | 'media_sequence': media_sequence | |
200 | }) | |
201 | ||
202 | if map_info.get('BYTERANGE'): | |
203 | splitted_byte_range = map_info.get('BYTERANGE').split('@') | |
204 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
205 | byte_range = { | |
206 | 'start': sub_range_start, | |
207 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
208 | } | |
209 | ||
210 | elif line.startswith('#EXT-X-KEY'): | |
211 | decrypt_url = decrypt_info.get('URI') | |
212 | decrypt_info = parse_m3u8_attributes(line[11:]) | |
213 | if decrypt_info['METHOD'] == 'AES-128': | |
214 | if 'IV' in decrypt_info: | |
215 | decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) | |
216 | if not re.match(r'^https?://', decrypt_info['URI']): | |
217 | decrypt_info['URI'] = compat_urlparse.urljoin( | |
218 | man_url, decrypt_info['URI']) | |
219 | if extra_query: | |
220 | decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) | |
221 | if decrypt_url != decrypt_info['URI']: | |
222 | decrypt_info['KEY'] = None | |
223 | ||
224 | elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | |
225 | media_sequence = int(line[22:]) | |
226 | elif line.startswith('#EXT-X-BYTERANGE'): | |
227 | splitted_byte_range = line[17:].split('@') | |
228 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
229 | byte_range = { | |
230 | 'start': sub_range_start, | |
231 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
232 | } | |
233 | elif is_ad_fragment_start(line): | |
234 | ad_frag_next = True | |
235 | elif is_ad_fragment_end(line): | |
236 | ad_frag_next = False | |
237 | elif line.startswith('#EXT-X-DISCONTINUITY'): | |
238 | discontinuity_count += 1 | |
239 | i += 1 | |
240 | media_sequence += 1 | |
241 | ||
242 | # We only download the first fragment during the test | |
243 | if self.params.get('test', False): | |
244 | fragments = [fragments[0] if fragments else None] | |
245 | ||
246 | if real_downloader: | |
247 | info_copy = info_dict.copy() | |
248 | info_copy['fragments'] = fragments | |
249 | fd = real_downloader(self.ydl, self.params) | |
250 | # TODO: Make progress updates work without hooking twice | |
251 | # for ph in self._progress_hooks: | |
252 | # fd.add_progress_hook(ph) | |
253 | return fd.real_download(filename, info_copy) | |
254 | ||
255 | if is_webvtt: | |
256 | def pack_fragment(frag_content, frag_index): | |
257 | output = io.StringIO() | |
258 | adjust = 0 | |
259 | for block in webvtt.parse_fragment(frag_content): | |
260 | if isinstance(block, webvtt.CueBlock): | |
261 | block.start += adjust | |
262 | block.end += adjust | |
263 | ||
264 | dedup_window = extra_state.setdefault('webvtt_dedup_window', []) | |
265 | cue = block.as_json | |
266 | ||
267 | # skip the cue if an identical one appears | |
268 | # in the window of potential duplicates | |
269 | # and prune the window of unviable candidates | |
270 | i = 0 | |
271 | skip = True | |
272 | while i < len(dedup_window): | |
273 | window_cue = dedup_window[i] | |
274 | if window_cue == cue: | |
275 | break | |
276 | if window_cue['end'] >= cue['start']: | |
277 | i += 1 | |
278 | continue | |
279 | del dedup_window[i] | |
280 | else: | |
281 | skip = False | |
282 | ||
283 | if skip: | |
284 | continue | |
285 | ||
286 | # add the cue to the window | |
287 | dedup_window.append(cue) | |
288 | elif isinstance(block, webvtt.Magic): | |
289 | # take care of MPEG PES timestamp overflow | |
290 | if block.mpegts is None: | |
291 | block.mpegts = 0 | |
292 | extra_state.setdefault('webvtt_mpegts_adjust', 0) | |
293 | block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 | |
294 | if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): | |
295 | extra_state['webvtt_mpegts_adjust'] += 1 | |
296 | block.mpegts += 1 << 33 | |
297 | extra_state['webvtt_mpegts_last'] = block.mpegts | |
298 | ||
299 | if frag_index == 1: | |
300 | extra_state['webvtt_mpegts'] = block.mpegts or 0 | |
301 | extra_state['webvtt_local'] = block.local or 0 | |
302 | # XXX: block.local = block.mpegts = None ? | |
303 | else: | |
304 | if block.mpegts is not None and block.local is not None: | |
305 | adjust = ( | |
306 | (block.mpegts - extra_state.get('webvtt_mpegts', 0)) | |
307 | - (block.local - extra_state.get('webvtt_local', 0)) | |
308 | ) | |
309 | continue | |
310 | elif isinstance(block, webvtt.HeaderBlock): | |
311 | if frag_index != 1: | |
312 | # XXX: this should probably be silent as well | |
313 | # or verify that all segments contain the same data | |
314 | self.report_warning(bug_reports_message( | |
315 | 'Discarding a %s block found in the middle of the stream; ' | |
316 | 'if the subtitles display incorrectly,' | |
317 | % (type(block).__name__))) | |
318 | continue | |
319 | block.write_into(output) | |
320 | ||
321 | return output.getvalue().encode('utf-8') | |
322 | else: | |
323 | pack_fragment = None | |
324 | return self.download_and_append_fragments(ctx, fragments, info_dict, pack_fragment) |