]>
Commit | Line | Data |
---|---|---|
f0b5d6af PH |
1 | from __future__ import unicode_literals |
2 | ||
f0b5d6af | 3 | import re |
4a2f19ab | 4 | import io |
e154c651 | 5 | import binascii |
3bc2ddcc | 6 | |
dbf5416a | 7 | from ..downloader import get_suitable_downloader |
4c7853de | 8 | from .fragment import FragmentFD, can_decrypt_frag |
0d66bd0e | 9 | from .external import FFmpegFD |
f9a5affa | 10 | |
e154c651 | 11 | from ..compat import ( |
12 | compat_urlparse, | |
e154c651 | 13 | ) |
1cc79574 | 14 | from ..utils import ( |
e154c651 | 15 | parse_m3u8_attributes, |
aaf44a2f | 16 | update_url_query, |
4a2f19ab | 17 | bug_reports_message, |
3bc2ddcc | 18 | ) |
4a2f19ab | 19 | from .. import webvtt |
3bc2ddcc JMF |
20 | |
21 | ||
12b84ac8 | 22 | class HlsFD(FragmentFD): |
0a473f2f | 23 | """ |
24 | Download segments in a m3u8 manifest. External downloaders can take over | |
52a8a1e1 | 25 | the fragment downloads by supporting the 'm3u8_frag_urls' protocol and |
0a473f2f | 26 | re-defining 'supports_manifest' function |
27 | """ | |
f0b5d6af | 28 | |
f9a5affa S |
29 | FD_NAME = 'hlsnative' |
30 | ||
0d66bd0e | 31 | @staticmethod |
0a473f2f | 32 | def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag): |
63ad4d43 | 33 | UNSUPPORTED_FEATURES = [ |
f5974637 | 34 | # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] |
1e236d7e | 35 | |
c15c47d1 S |
36 | # Live streams heuristic does not always work (e.g. geo restricted to Germany |
37 | # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) | |
2937590e | 38 | # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] |
1e236d7e S |
39 | |
40 | # This heuristic also is not correct since segments may not be appended as well. | |
633b444f S |
41 | # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite |
42 | # no segments will definitely be appended to the end of the playlist. | |
1e236d7e | 43 | # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of |
51c4d85c | 44 | # # event media playlists [4] |
b1bb77d7 | 45 | # r'#EXT-X-MAP:', # media initialization [5] |
0d66bd0e S |
46 | # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 |
47 | # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 | |
48 | # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | |
6104cc29 | 49 | # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 |
29f7c58a | 50 | # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 |
63ad4d43 | 51 | ] |
52 | if not allow_unplayable_formats: | |
53 | UNSUPPORTED_FEATURES += [ | |
54 | r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] | |
55 | ] | |
0a473f2f | 56 | |
57 | def check_results(): | |
58 | yield not info_dict.get('is_live') | |
59 | is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest | |
60 | yield with_crypto or not is_aes128_enc | |
61 | yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest) | |
62 | for feature in UNSUPPORTED_FEATURES: | |
63 | yield not re.search(feature, manifest) | |
64 | return all(check_results()) | |
0d66bd0e | 65 | |
f0b5d6af | 66 | def real_download(self, filename, info_dict): |
f9a5affa S |
67 | man_url = info_dict['url'] |
68 | self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) | |
69035555 | 69 | |
c5a49ff0 S |
70 | urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) |
71 | man_url = urlh.geturl() | |
72 | s = urlh.read().decode('utf-8', 'ignore') | |
0d66bd0e | 73 | |
0a473f2f | 74 | if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')): |
c712b16d | 75 | if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): |
beb4b92a | 76 | self.report_error('pycryptodome not found. Please install') |
bfa1073e | 77 | return False |
d9524b89 | 78 | if self.can_download(s, info_dict, with_crypto=True): |
beb4b92a | 79 | self.report_warning('pycryptodome is needed to download this file natively') |
2bfaf89b | 80 | fd = FFmpegFD(self.ydl, self.params) |
beb4b92a | 81 | self.report_warning( |
82 | '%s detected unsupported features; extraction will be delegated to %s' % (self.FD_NAME, fd.get_basename())) | |
2bfaf89b | 83 | return fd.real_download(filename, info_dict) |
0d66bd0e | 84 | |
5dcd8e1d | 85 | is_webvtt = info_dict['ext'] == 'vtt' |
86 | if is_webvtt: | |
87 | real_downloader = None # Packing the fragments is not currently supported for external downloader | |
88 | else: | |
96fccc10 | 89 | real_downloader = get_suitable_downloader( |
a46a815b | 90 | info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-')) |
0a473f2f | 91 | if real_downloader and not real_downloader.supports_manifest(s): |
92 | real_downloader = None | |
beb4b92a | 93 | if real_downloader: |
94 | self.to_screen( | |
95 | '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) | |
0a473f2f | 96 | |
f1ab3b7d | 97 | def is_ad_fragment_start(s): |
3089bc74 S |
98 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s |
99 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) | |
74c42d9e | 100 | |
f1ab3b7d | 101 | def is_ad_fragment_end(s): |
3089bc74 S |
102 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s |
103 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) | |
f1ab3b7d | 104 | |
d7009caa | 105 | fragments = [] |
5219cb3e | 106 | |
74c42d9e S |
107 | media_frags = 0 |
108 | ad_frags = 0 | |
109 | ad_frag_next = False | |
f0b5d6af PH |
110 | for line in s.splitlines(): |
111 | line = line.strip() | |
74c42d9e S |
112 | if not line: |
113 | continue | |
114 | if line.startswith('#'): | |
f1ab3b7d | 115 | if is_ad_fragment_start(line): |
a9ee4f6e | 116 | ad_frag_next = True |
f1ab3b7d RA |
117 | elif is_ad_fragment_end(line): |
118 | ad_frag_next = False | |
74c42d9e S |
119 | continue |
120 | if ad_frag_next: | |
f1ab3b7d | 121 | ad_frags += 1 |
74c42d9e S |
122 | continue |
123 | media_frags += 1 | |
f0b5d6af | 124 | |
f9a5affa | 125 | ctx = { |
f0b5d6af | 126 | 'filename': filename, |
74c42d9e S |
127 | 'total_frags': media_frags, |
128 | 'ad_frags': ad_frags, | |
f9a5affa S |
129 | } |
130 | ||
5219cb3e | 131 | if real_downloader: |
132 | self._prepare_external_frag_download(ctx) | |
133 | else: | |
3ba7740d | 134 | self._prepare_and_start_frag_download(ctx, info_dict) |
f9a5affa | 135 | |
4a2f19ab F |
136 | extra_state = ctx.setdefault('extra_state', {}) |
137 | ||
310c2ed2 | 138 | format_index = info_dict.get('format_index') |
b8079a40 | 139 | extra_query = None |
aaf44a2f | 140 | extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') |
b8079a40 RA |
141 | if extra_param_to_segment_url: |
142 | extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) | |
e154c651 | 143 | i = 0 |
144 | media_sequence = 0 | |
145 | decrypt_info = {'METHOD': 'NONE'} | |
f5974637 | 146 | byte_range = {} |
310c2ed2 | 147 | discontinuity_count = 0 |
75a24854 | 148 | frag_index = 0 |
74c42d9e | 149 | ad_frag_next = False |
e154c651 | 150 | for line in s.splitlines(): |
151 | line = line.strip() | |
152 | if line: | |
153 | if not line.startswith('#'): | |
310c2ed2 | 154 | if format_index and discontinuity_count != format_index: |
155 | continue | |
74c42d9e | 156 | if ad_frag_next: |
74c42d9e | 157 | continue |
75a24854 | 158 | frag_index += 1 |
3e0304fe | 159 | if frag_index <= ctx['fragment_index']: |
75a24854 | 160 | continue |
e154c651 | 161 | frag_url = ( |
162 | line | |
163 | if re.match(r'^https?://', line) | |
164 | else compat_urlparse.urljoin(man_url, line)) | |
b8079a40 RA |
165 | if extra_query: |
166 | frag_url = update_url_query(frag_url, extra_query) | |
5219cb3e | 167 | |
4cf1e5d2 | 168 | fragments.append({ |
169 | 'frag_index': frag_index, | |
170 | 'url': frag_url, | |
171 | 'decrypt_info': decrypt_info, | |
172 | 'byte_range': byte_range, | |
173 | 'media_sequence': media_sequence, | |
174 | }) | |
5219cb3e | 175 | |
b1bb77d7 | 176 | elif line.startswith('#EXT-X-MAP'): |
310c2ed2 | 177 | if format_index and discontinuity_count != format_index: |
178 | continue | |
b1bb77d7 | 179 | if frag_index > 0: |
180 | self.report_error( | |
beb4b92a | 181 | 'Initialization fragment found after media fragments, unable to download') |
b1bb77d7 | 182 | return False |
183 | frag_index += 1 | |
184 | map_info = parse_m3u8_attributes(line[11:]) | |
185 | frag_url = ( | |
186 | map_info.get('URI') | |
187 | if re.match(r'^https?://', map_info.get('URI')) | |
188 | else compat_urlparse.urljoin(man_url, map_info.get('URI'))) | |
189 | if extra_query: | |
190 | frag_url = update_url_query(frag_url, extra_query) | |
4cf1e5d2 | 191 | |
192 | fragments.append({ | |
193 | 'frag_index': frag_index, | |
194 | 'url': frag_url, | |
195 | 'decrypt_info': decrypt_info, | |
196 | 'byte_range': byte_range, | |
197 | 'media_sequence': media_sequence | |
198 | }) | |
b1bb77d7 | 199 | |
200 | if map_info.get('BYTERANGE'): | |
201 | splitted_byte_range = map_info.get('BYTERANGE').split('@') | |
202 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
203 | byte_range = { | |
204 | 'start': sub_range_start, | |
205 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
206 | } | |
b1bb77d7 | 207 | |
208 | elif line.startswith('#EXT-X-KEY'): | |
209 | decrypt_url = decrypt_info.get('URI') | |
210 | decrypt_info = parse_m3u8_attributes(line[11:]) | |
211 | if decrypt_info['METHOD'] == 'AES-128': | |
212 | if 'IV' in decrypt_info: | |
213 | decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) | |
214 | if not re.match(r'^https?://', decrypt_info['URI']): | |
215 | decrypt_info['URI'] = compat_urlparse.urljoin( | |
216 | man_url, decrypt_info['URI']) | |
217 | if extra_query: | |
218 | decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) | |
219 | if decrypt_url != decrypt_info['URI']: | |
220 | decrypt_info['KEY'] = None | |
b1bb77d7 | 221 | |
222 | elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | |
223 | media_sequence = int(line[22:]) | |
224 | elif line.startswith('#EXT-X-BYTERANGE'): | |
225 | splitted_byte_range = line[17:].split('@') | |
226 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
227 | byte_range = { | |
228 | 'start': sub_range_start, | |
229 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
230 | } | |
231 | elif is_ad_fragment_start(line): | |
232 | ad_frag_next = True | |
233 | elif is_ad_fragment_end(line): | |
234 | ad_frag_next = False | |
310c2ed2 | 235 | elif line.startswith('#EXT-X-DISCONTINUITY'): |
236 | discontinuity_count += 1 | |
4cf1e5d2 | 237 | i += 1 |
238 | media_sequence += 1 | |
b1bb77d7 | 239 | |
4cf1e5d2 | 240 | # We only download the first fragment during the test |
4c7853de | 241 | if self.params.get('test', False): |
4cf1e5d2 | 242 | fragments = [fragments[0] if fragments else None] |
f9a5affa | 243 | |
5219cb3e | 244 | if real_downloader: |
245 | info_copy = info_dict.copy() | |
d7009caa | 246 | info_copy['fragments'] = fragments |
5219cb3e | 247 | fd = real_downloader(self.ydl, self.params) |
248 | # TODO: Make progress updates work without hooking twice | |
249 | # for ph in self._progress_hooks: | |
250 | # fd.add_progress_hook(ph) | |
8e897ed2 | 251 | return fd.real_download(filename, info_copy) |
333217f4 | 252 | |
bd4d1ea3 | 253 | if is_webvtt: |
254 | def pack_fragment(frag_content, frag_index): | |
255 | output = io.StringIO() | |
256 | adjust = 0 | |
7a6742b5 F |
257 | overflow = False |
258 | mpegts_last = None | |
bd4d1ea3 | 259 | for block in webvtt.parse_fragment(frag_content): |
260 | if isinstance(block, webvtt.CueBlock): | |
7a6742b5 F |
261 | extra_state['webvtt_mpegts_last'] = mpegts_last |
262 | if overflow: | |
263 | extra_state['webvtt_mpegts_adjust'] += 1 | |
264 | overflow = False | |
bd4d1ea3 | 265 | block.start += adjust |
266 | block.end += adjust | |
267 | ||
268 | dedup_window = extra_state.setdefault('webvtt_dedup_window', []) | |
bd4d1ea3 | 269 | |
25a3f4f5 F |
270 | ready = [] |
271 | ||
bd4d1ea3 | 272 | i = 0 |
25a3f4f5 | 273 | is_new = True |
bd4d1ea3 | 274 | while i < len(dedup_window): |
25a3f4f5 F |
275 | wcue = dedup_window[i] |
276 | wblock = webvtt.CueBlock.from_json(wcue) | |
277 | i += 1 | |
278 | if wblock.hinges(block): | |
279 | wcue['end'] = block.end | |
280 | is_new = False | |
281 | continue | |
282 | if wblock == block: | |
283 | is_new = False | |
284 | continue | |
285 | if wblock.end > block.start: | |
4a2f19ab | 286 | continue |
25a3f4f5 F |
287 | ready.append(wblock) |
288 | i -= 1 | |
bd4d1ea3 | 289 | del dedup_window[i] |
bd4d1ea3 | 290 | |
25a3f4f5 F |
291 | if is_new: |
292 | dedup_window.append(block.as_json) | |
293 | for block in ready: | |
294 | block.write_into(output) | |
bd4d1ea3 | 295 | |
25a3f4f5 F |
296 | # we only emit cues once they fall out of the duplicate window |
297 | continue | |
bd4d1ea3 | 298 | elif isinstance(block, webvtt.Magic): |
299 | # take care of MPEG PES timestamp overflow | |
300 | if block.mpegts is None: | |
301 | block.mpegts = 0 | |
302 | extra_state.setdefault('webvtt_mpegts_adjust', 0) | |
303 | block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 | |
304 | if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): | |
7a6742b5 | 305 | overflow = True |
bd4d1ea3 | 306 | block.mpegts += 1 << 33 |
7a6742b5 | 307 | mpegts_last = block.mpegts |
bd4d1ea3 | 308 | |
309 | if frag_index == 1: | |
310 | extra_state['webvtt_mpegts'] = block.mpegts or 0 | |
311 | extra_state['webvtt_local'] = block.local or 0 | |
312 | # XXX: block.local = block.mpegts = None ? | |
313 | else: | |
314 | if block.mpegts is not None and block.local is not None: | |
315 | adjust = ( | |
316 | (block.mpegts - extra_state.get('webvtt_mpegts', 0)) | |
317 | - (block.local - extra_state.get('webvtt_local', 0)) | |
318 | ) | |
319 | continue | |
320 | elif isinstance(block, webvtt.HeaderBlock): | |
321 | if frag_index != 1: | |
322 | # XXX: this should probably be silent as well | |
323 | # or verify that all segments contain the same data | |
324 | self.report_warning(bug_reports_message( | |
325 | 'Discarding a %s block found in the middle of the stream; ' | |
326 | 'if the subtitles display incorrectly,' | |
327 | % (type(block).__name__))) | |
328 | continue | |
329 | block.write_into(output) | |
330 | ||
331 | return output.getvalue().encode('utf-8') | |
25a3f4f5 F |
332 | |
333 | def fin_fragments(): | |
334 | dedup_window = extra_state.get('webvtt_dedup_window') | |
335 | if not dedup_window: | |
336 | return b'' | |
337 | ||
338 | output = io.StringIO() | |
339 | for cue in dedup_window: | |
340 | webvtt.CueBlock.from_json(cue).write_into(output) | |
341 | ||
342 | return output.getvalue().encode('utf-8') | |
343 | ||
344 | self.download_and_append_fragments( | |
345 | ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) | |
bd4d1ea3 | 346 | else: |
25a3f4f5 | 347 | return self.download_and_append_fragments(ctx, fragments, info_dict) |