]>
Commit | Line | Data |
---|---|---|
f0b5d6af PH |
1 | from __future__ import unicode_literals |
2 | ||
f0b5d6af | 3 | import re |
e154c651 | 4 | import binascii |
5 | try: | |
6 | from Crypto.Cipher import AES | |
7 | can_decrypt_frag = True | |
8 | except ImportError: | |
9 | can_decrypt_frag = False | |
4cf1e5d2 | 10 | try: |
11 | import concurrent.futures | |
12 | can_threaded_download = True | |
13 | except ImportError: | |
14 | can_threaded_download = False | |
3bc2ddcc | 15 | |
5219cb3e | 16 | from ..downloader import _get_real_downloader |
f9a5affa | 17 | from .fragment import FragmentFD |
0d66bd0e | 18 | from .external import FFmpegFD |
f9a5affa | 19 | |
e154c651 | 20 | from ..compat import ( |
25afc2a7 | 21 | compat_urllib_error, |
e154c651 | 22 | compat_urlparse, |
23 | compat_struct_pack, | |
24 | ) | |
1cc79574 | 25 | from ..utils import ( |
e154c651 | 26 | parse_m3u8_attributes, |
4cf1e5d2 | 27 | sanitize_open, |
aaf44a2f | 28 | update_url_query, |
3bc2ddcc JMF |
29 | ) |
30 | ||
31 | ||
12b84ac8 | 32 | class HlsFD(FragmentFD): |
0a473f2f | 33 | """ |
34 | Download segments in a m3u8 manifest. External downloaders can take over | |
35 | the fragment downloads by supporting the 'frag_urls' protocol and | |
36 | re-defining 'supports_manifest' function | |
37 | """ | |
f0b5d6af | 38 | |
f9a5affa S |
39 | FD_NAME = 'hlsnative' |
40 | ||
0d66bd0e | 41 | @staticmethod |
0a473f2f | 42 | def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag): |
63ad4d43 | 43 | UNSUPPORTED_FEATURES = [ |
f5974637 | 44 | # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] |
1e236d7e | 45 | |
c15c47d1 S |
46 | # Live streams heuristic does not always work (e.g. geo restricted to Germany |
47 | # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) | |
2937590e | 48 | # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] |
1e236d7e S |
49 | |
50 | # This heuristic also is not correct since segments may not be appended as well. | |
633b444f S |
51 | # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite |
52 | # no segments will definitely be appended to the end of the playlist. | |
1e236d7e | 53 | # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of |
51c4d85c | 54 | # # event media playlists [4] |
b1bb77d7 | 55 | # r'#EXT-X-MAP:', # media initialization [5] |
0d66bd0e S |
56 | # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 |
57 | # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 | |
58 | # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | |
6104cc29 | 59 | # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 |
29f7c58a | 60 | # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 |
63ad4d43 | 61 | ] |
62 | if not allow_unplayable_formats: | |
63 | UNSUPPORTED_FEATURES += [ | |
64 | r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] | |
65 | ] | |
0a473f2f | 66 | |
67 | def check_results(): | |
68 | yield not info_dict.get('is_live') | |
69 | is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest | |
70 | yield with_crypto or not is_aes128_enc | |
71 | yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest) | |
72 | for feature in UNSUPPORTED_FEATURES: | |
73 | yield not re.search(feature, manifest) | |
74 | return all(check_results()) | |
0d66bd0e | 75 | |
f0b5d6af | 76 | def real_download(self, filename, info_dict): |
f9a5affa S |
77 | man_url = info_dict['url'] |
78 | self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) | |
69035555 | 79 | |
c5a49ff0 S |
80 | urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) |
81 | man_url = urlh.geturl() | |
82 | s = urlh.read().decode('utf-8', 'ignore') | |
0d66bd0e | 83 | |
0a473f2f | 84 | if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')): |
c712b16d | 85 | if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): |
d9524b89 | 86 | self.report_error('pycryptodome not found. Please install it.') |
bfa1073e | 87 | return False |
d9524b89 | 88 | if self.can_download(s, info_dict, with_crypto=True): |
89 | self.report_warning('pycryptodome is needed to download this file with hlsnative') | |
2bfaf89b RA |
90 | self.report_warning( |
91 | 'hlsnative has detected features it does not support, ' | |
92 | 'extraction will be delegated to ffmpeg') | |
93 | fd = FFmpegFD(self.ydl, self.params) | |
5219cb3e | 94 | # TODO: Make progress updates work without hooking twice |
95 | # for ph in self._progress_hooks: | |
96 | # fd.add_progress_hook(ph) | |
2bfaf89b | 97 | return fd.real_download(filename, info_dict) |
0d66bd0e | 98 | |
0a473f2f | 99 | real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None) |
100 | if real_downloader and not real_downloader.supports_manifest(s): | |
101 | real_downloader = None | |
102 | ||
f1ab3b7d | 103 | def is_ad_fragment_start(s): |
3089bc74 S |
104 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s |
105 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) | |
74c42d9e | 106 | |
f1ab3b7d | 107 | def is_ad_fragment_end(s): |
3089bc74 S |
108 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s |
109 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) | |
f1ab3b7d | 110 | |
d7009caa | 111 | fragments = [] |
5219cb3e | 112 | |
74c42d9e S |
113 | media_frags = 0 |
114 | ad_frags = 0 | |
115 | ad_frag_next = False | |
f0b5d6af PH |
116 | for line in s.splitlines(): |
117 | line = line.strip() | |
74c42d9e S |
118 | if not line: |
119 | continue | |
120 | if line.startswith('#'): | |
f1ab3b7d | 121 | if is_ad_fragment_start(line): |
a9ee4f6e | 122 | ad_frag_next = True |
f1ab3b7d RA |
123 | elif is_ad_fragment_end(line): |
124 | ad_frag_next = False | |
74c42d9e S |
125 | continue |
126 | if ad_frag_next: | |
f1ab3b7d | 127 | ad_frags += 1 |
74c42d9e S |
128 | continue |
129 | media_frags += 1 | |
f0b5d6af | 130 | |
f9a5affa | 131 | ctx = { |
f0b5d6af | 132 | 'filename': filename, |
74c42d9e S |
133 | 'total_frags': media_frags, |
134 | 'ad_frags': ad_frags, | |
f9a5affa S |
135 | } |
136 | ||
5219cb3e | 137 | if real_downloader: |
138 | self._prepare_external_frag_download(ctx) | |
139 | else: | |
140 | self._prepare_and_start_frag_download(ctx) | |
f9a5affa | 141 | |
25afc2a7 S |
142 | fragment_retries = self.params.get('fragment_retries', 0) |
143 | skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | |
144 | test = self.params.get('test', False) | |
145 | ||
310c2ed2 | 146 | format_index = info_dict.get('format_index') |
b8079a40 | 147 | extra_query = None |
aaf44a2f | 148 | extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') |
b8079a40 RA |
149 | if extra_param_to_segment_url: |
150 | extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) | |
e154c651 | 151 | i = 0 |
152 | media_sequence = 0 | |
153 | decrypt_info = {'METHOD': 'NONE'} | |
f5974637 | 154 | byte_range = {} |
310c2ed2 | 155 | discontinuity_count = 0 |
75a24854 | 156 | frag_index = 0 |
74c42d9e | 157 | ad_frag_next = False |
e154c651 | 158 | for line in s.splitlines(): |
159 | line = line.strip() | |
160 | if line: | |
161 | if not line.startswith('#'): | |
310c2ed2 | 162 | if format_index and discontinuity_count != format_index: |
163 | continue | |
74c42d9e | 164 | if ad_frag_next: |
74c42d9e | 165 | continue |
75a24854 | 166 | frag_index += 1 |
3e0304fe | 167 | if frag_index <= ctx['fragment_index']: |
75a24854 | 168 | continue |
e154c651 | 169 | frag_url = ( |
170 | line | |
171 | if re.match(r'^https?://', line) | |
172 | else compat_urlparse.urljoin(man_url, line)) | |
b8079a40 RA |
173 | if extra_query: |
174 | frag_url = update_url_query(frag_url, extra_query) | |
5219cb3e | 175 | |
4cf1e5d2 | 176 | fragments.append({ |
177 | 'frag_index': frag_index, | |
178 | 'url': frag_url, | |
179 | 'decrypt_info': decrypt_info, | |
180 | 'byte_range': byte_range, | |
181 | 'media_sequence': media_sequence, | |
182 | }) | |
5219cb3e | 183 | |
b1bb77d7 | 184 | elif line.startswith('#EXT-X-MAP'): |
310c2ed2 | 185 | if format_index and discontinuity_count != format_index: |
186 | continue | |
b1bb77d7 | 187 | if frag_index > 0: |
188 | self.report_error( | |
189 | 'initialization fragment found after media fragments, unable to download') | |
190 | return False | |
191 | frag_index += 1 | |
192 | map_info = parse_m3u8_attributes(line[11:]) | |
193 | frag_url = ( | |
194 | map_info.get('URI') | |
195 | if re.match(r'^https?://', map_info.get('URI')) | |
196 | else compat_urlparse.urljoin(man_url, map_info.get('URI'))) | |
197 | if extra_query: | |
198 | frag_url = update_url_query(frag_url, extra_query) | |
4cf1e5d2 | 199 | |
200 | fragments.append({ | |
201 | 'frag_index': frag_index, | |
202 | 'url': frag_url, | |
203 | 'decrypt_info': decrypt_info, | |
204 | 'byte_range': byte_range, | |
205 | 'media_sequence': media_sequence | |
206 | }) | |
b1bb77d7 | 207 | |
208 | if map_info.get('BYTERANGE'): | |
209 | splitted_byte_range = map_info.get('BYTERANGE').split('@') | |
210 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
211 | byte_range = { | |
212 | 'start': sub_range_start, | |
213 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
214 | } | |
b1bb77d7 | 215 | |
216 | elif line.startswith('#EXT-X-KEY'): | |
217 | decrypt_url = decrypt_info.get('URI') | |
218 | decrypt_info = parse_m3u8_attributes(line[11:]) | |
219 | if decrypt_info['METHOD'] == 'AES-128': | |
220 | if 'IV' in decrypt_info: | |
221 | decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) | |
222 | if not re.match(r'^https?://', decrypt_info['URI']): | |
223 | decrypt_info['URI'] = compat_urlparse.urljoin( | |
224 | man_url, decrypt_info['URI']) | |
225 | if extra_query: | |
226 | decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) | |
227 | if decrypt_url != decrypt_info['URI']: | |
228 | decrypt_info['KEY'] = None | |
b1bb77d7 | 229 | |
230 | elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | |
231 | media_sequence = int(line[22:]) | |
232 | elif line.startswith('#EXT-X-BYTERANGE'): | |
233 | splitted_byte_range = line[17:].split('@') | |
234 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
235 | byte_range = { | |
236 | 'start': sub_range_start, | |
237 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
238 | } | |
239 | elif is_ad_fragment_start(line): | |
240 | ad_frag_next = True | |
241 | elif is_ad_fragment_end(line): | |
242 | ad_frag_next = False | |
310c2ed2 | 243 | elif line.startswith('#EXT-X-DISCONTINUITY'): |
244 | discontinuity_count += 1 | |
4cf1e5d2 | 245 | i += 1 |
246 | media_sequence += 1 | |
b1bb77d7 | 247 | |
4cf1e5d2 | 248 | # We only download the first fragment during the test |
249 | if test: | |
250 | fragments = [fragments[0] if fragments else None] | |
f9a5affa | 251 | |
5219cb3e | 252 | if real_downloader: |
253 | info_copy = info_dict.copy() | |
d7009caa | 254 | info_copy['fragments'] = fragments |
5219cb3e | 255 | fd = real_downloader(self.ydl, self.params) |
256 | # TODO: Make progress updates work without hooking twice | |
257 | # for ph in self._progress_hooks: | |
258 | # fd.add_progress_hook(ph) | |
259 | success = fd.real_download(filename, info_copy) | |
260 | if not success: | |
261 | return False | |
262 | else: | |
4cf1e5d2 | 263 | def download_fragment(fragment): |
264 | frag_index = fragment['frag_index'] | |
265 | frag_url = fragment['url'] | |
266 | decrypt_info = fragment['decrypt_info'] | |
267 | byte_range = fragment['byte_range'] | |
268 | media_sequence = fragment['media_sequence'] | |
269 | ||
270 | ctx['fragment_index'] = frag_index | |
271 | ||
272 | count = 0 | |
273 | headers = info_dict.get('http_headers', {}) | |
274 | if byte_range: | |
275 | headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) | |
276 | while count <= fragment_retries: | |
277 | try: | |
278 | success, frag_content = self._download_fragment( | |
279 | ctx, frag_url, info_dict, headers) | |
280 | if not success: | |
281 | return False, frag_index | |
282 | break | |
283 | except compat_urllib_error.HTTPError as err: | |
284 | # Unavailable (possibly temporary) fragments may be served. | |
285 | # First we try to retry then either skip or abort. | |
286 | # See https://github.com/ytdl-org/youtube-dl/issues/10165, | |
287 | # https://github.com/ytdl-org/youtube-dl/issues/10448). | |
288 | count += 1 | |
289 | if count <= fragment_retries: | |
290 | self.report_retry_fragment(err, frag_index, count, fragment_retries) | |
291 | if count > fragment_retries: | |
292 | return False, frag_index | |
293 | ||
294 | if decrypt_info['METHOD'] == 'AES-128': | |
295 | iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) | |
296 | decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( | |
297 | self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() | |
298 | # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block | |
299 | # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, | |
300 | # not what it decrypts to. | |
301 | if not test: | |
302 | frag_content = AES.new( | |
303 | decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) | |
304 | ||
305 | return frag_content, frag_index | |
306 | ||
307 | def append_fragment(frag_content, frag_index): | |
308 | if frag_content: | |
309 | fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index) | |
310 | try: | |
311 | file, frag_sanitized = sanitize_open(fragment_filename, 'rb') | |
312 | ctx['fragment_filename_sanitized'] = frag_sanitized | |
313 | file.close() | |
314 | self._append_fragment(ctx, frag_content) | |
315 | return True | |
316 | except FileNotFoundError: | |
317 | if skip_unavailable_fragments: | |
318 | self.report_skip_fragment(frag_index) | |
319 | return True | |
320 | else: | |
321 | self.report_error( | |
322 | 'fragment %s not found, unable to continue' % frag_index) | |
323 | return False | |
324 | else: | |
325 | if skip_unavailable_fragments: | |
326 | self.report_skip_fragment(frag_index) | |
327 | return True | |
328 | else: | |
329 | self.report_error( | |
330 | 'fragment %s not found, unable to continue' % frag_index) | |
331 | return False | |
332 | ||
333 | max_workers = self.params.get('concurrent_fragment_downloads', 1) | |
334 | if can_threaded_download and max_workers > 1: | |
335 | self.report_warning('The download speed shown is only of one thread. This is a known issue') | |
336 | with concurrent.futures.ThreadPoolExecutor(max_workers) as pool: | |
337 | futures = [pool.submit(download_fragment, fragment) for fragment in fragments] | |
338 | # timeout must be 0 to return instantly | |
339 | done, not_done = concurrent.futures.wait(futures, timeout=0) | |
340 | try: | |
341 | while not_done: | |
342 | # Check every 1 second for KeyboardInterrupt | |
343 | freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1) | |
344 | done |= freshly_done | |
345 | except KeyboardInterrupt: | |
346 | for future in not_done: | |
347 | future.cancel() | |
348 | # timeout must be none to cancel | |
349 | concurrent.futures.wait(not_done, timeout=None) | |
350 | raise KeyboardInterrupt | |
351 | results = [future.result() for future in futures] | |
352 | ||
353 | for frag_content, frag_index in results: | |
354 | result = append_fragment(frag_content, frag_index) | |
355 | if not result: | |
356 | return False | |
357 | else: | |
358 | for fragment in fragments: | |
359 | frag_content, frag_index = download_fragment(fragment) | |
360 | result = append_fragment(frag_content, frag_index) | |
361 | if not result: | |
362 | return False | |
363 | ||
5219cb3e | 364 | self._finish_frag_download(ctx) |
f0b5d6af | 365 | return True |