]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | import re | |
4 | import binascii | |
5 | try: | |
6 | from Crypto.Cipher import AES | |
7 | can_decrypt_frag = True | |
8 | except ImportError: | |
9 | can_decrypt_frag = False | |
10 | try: | |
11 | import concurrent.futures | |
12 | can_threaded_download = True | |
13 | except ImportError: | |
14 | can_threaded_download = False | |
15 | ||
16 | from ..downloader import _get_real_downloader | |
17 | from .fragment import FragmentFD | |
18 | from .external import FFmpegFD | |
19 | ||
20 | from ..compat import ( | |
21 | compat_urllib_error, | |
22 | compat_urlparse, | |
23 | compat_struct_pack, | |
24 | ) | |
25 | from ..utils import ( | |
26 | parse_m3u8_attributes, | |
27 | sanitize_open, | |
28 | update_url_query, | |
29 | ) | |
30 | ||
31 | ||
32 | class HlsFD(FragmentFD): | |
33 | """ | |
34 | Download segments in a m3u8 manifest. External downloaders can take over | |
35 | the fragment downloads by supporting the 'frag_urls' protocol and | |
36 | re-defining 'supports_manifest' function | |
37 | """ | |
38 | ||
39 | FD_NAME = 'hlsnative' | |
40 | ||
41 | @staticmethod | |
42 | def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag): | |
43 | UNSUPPORTED_FEATURES = [ | |
44 | # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] | |
45 | ||
46 | # Live streams heuristic does not always work (e.g. geo restricted to Germany | |
47 | # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) | |
48 | # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] | |
49 | ||
50 | # This heuristic also is not correct since segments may not be appended as well. | |
51 | # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite | |
52 | # no segments will definitely be appended to the end of the playlist. | |
53 | # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of | |
54 | # # event media playlists [4] | |
55 | # r'#EXT-X-MAP:', # media initialization [5] | |
56 | # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 | |
57 | # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 | |
58 | # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | |
59 | # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 | |
60 | # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 | |
61 | ] | |
62 | if not allow_unplayable_formats: | |
63 | UNSUPPORTED_FEATURES += [ | |
64 | r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] | |
65 | ] | |
66 | ||
67 | def check_results(): | |
68 | yield not info_dict.get('is_live') | |
69 | is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest | |
70 | yield with_crypto or not is_aes128_enc | |
71 | yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest) | |
72 | for feature in UNSUPPORTED_FEATURES: | |
73 | yield not re.search(feature, manifest) | |
74 | return all(check_results()) | |
75 | ||
76 | def real_download(self, filename, info_dict): | |
77 | man_url = info_dict['url'] | |
78 | self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) | |
79 | ||
80 | urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) | |
81 | man_url = urlh.geturl() | |
82 | s = urlh.read().decode('utf-8', 'ignore') | |
83 | ||
84 | if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')): | |
85 | if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): | |
86 | self.report_error('pycryptodome not found. Please install it.') | |
87 | return False | |
88 | if self.can_download(s, info_dict, with_crypto=True): | |
89 | self.report_warning('pycryptodome is needed to download this file with hlsnative') | |
90 | self.report_warning( | |
91 | 'hlsnative has detected features it does not support, ' | |
92 | 'extraction will be delegated to ffmpeg') | |
93 | fd = FFmpegFD(self.ydl, self.params) | |
94 | # TODO: Make progress updates work without hooking twice | |
95 | # for ph in self._progress_hooks: | |
96 | # fd.add_progress_hook(ph) | |
97 | return fd.real_download(filename, info_dict) | |
98 | ||
99 | real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None) | |
100 | if real_downloader and not real_downloader.supports_manifest(s): | |
101 | real_downloader = None | |
102 | ||
103 | def is_ad_fragment_start(s): | |
104 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s | |
105 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) | |
106 | ||
107 | def is_ad_fragment_end(s): | |
108 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s | |
109 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) | |
110 | ||
111 | fragments = [] | |
112 | ||
113 | media_frags = 0 | |
114 | ad_frags = 0 | |
115 | ad_frag_next = False | |
116 | for line in s.splitlines(): | |
117 | line = line.strip() | |
118 | if not line: | |
119 | continue | |
120 | if line.startswith('#'): | |
121 | if is_ad_fragment_start(line): | |
122 | ad_frag_next = True | |
123 | elif is_ad_fragment_end(line): | |
124 | ad_frag_next = False | |
125 | continue | |
126 | if ad_frag_next: | |
127 | ad_frags += 1 | |
128 | continue | |
129 | media_frags += 1 | |
130 | ||
131 | ctx = { | |
132 | 'filename': filename, | |
133 | 'total_frags': media_frags, | |
134 | 'ad_frags': ad_frags, | |
135 | } | |
136 | ||
137 | if real_downloader: | |
138 | self._prepare_external_frag_download(ctx) | |
139 | else: | |
140 | self._prepare_and_start_frag_download(ctx) | |
141 | ||
142 | fragment_retries = self.params.get('fragment_retries', 0) | |
143 | skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | |
144 | test = self.params.get('test', False) | |
145 | ||
146 | format_index = info_dict.get('format_index') | |
147 | extra_query = None | |
148 | extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') | |
149 | if extra_param_to_segment_url: | |
150 | extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) | |
151 | i = 0 | |
152 | media_sequence = 0 | |
153 | decrypt_info = {'METHOD': 'NONE'} | |
154 | byte_range = {} | |
155 | discontinuity_count = 0 | |
156 | frag_index = 0 | |
157 | ad_frag_next = False | |
158 | for line in s.splitlines(): | |
159 | line = line.strip() | |
160 | if line: | |
161 | if not line.startswith('#'): | |
162 | if format_index and discontinuity_count != format_index: | |
163 | continue | |
164 | if ad_frag_next: | |
165 | continue | |
166 | frag_index += 1 | |
167 | if frag_index <= ctx['fragment_index']: | |
168 | continue | |
169 | frag_url = ( | |
170 | line | |
171 | if re.match(r'^https?://', line) | |
172 | else compat_urlparse.urljoin(man_url, line)) | |
173 | if extra_query: | |
174 | frag_url = update_url_query(frag_url, extra_query) | |
175 | ||
176 | fragments.append({ | |
177 | 'frag_index': frag_index, | |
178 | 'url': frag_url, | |
179 | 'decrypt_info': decrypt_info, | |
180 | 'byte_range': byte_range, | |
181 | 'media_sequence': media_sequence, | |
182 | }) | |
183 | ||
184 | elif line.startswith('#EXT-X-MAP'): | |
185 | if format_index and discontinuity_count != format_index: | |
186 | continue | |
187 | if frag_index > 0: | |
188 | self.report_error( | |
189 | 'initialization fragment found after media fragments, unable to download') | |
190 | return False | |
191 | frag_index += 1 | |
192 | map_info = parse_m3u8_attributes(line[11:]) | |
193 | frag_url = ( | |
194 | map_info.get('URI') | |
195 | if re.match(r'^https?://', map_info.get('URI')) | |
196 | else compat_urlparse.urljoin(man_url, map_info.get('URI'))) | |
197 | if extra_query: | |
198 | frag_url = update_url_query(frag_url, extra_query) | |
199 | ||
200 | fragments.append({ | |
201 | 'frag_index': frag_index, | |
202 | 'url': frag_url, | |
203 | 'decrypt_info': decrypt_info, | |
204 | 'byte_range': byte_range, | |
205 | 'media_sequence': media_sequence | |
206 | }) | |
207 | ||
208 | if map_info.get('BYTERANGE'): | |
209 | splitted_byte_range = map_info.get('BYTERANGE').split('@') | |
210 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
211 | byte_range = { | |
212 | 'start': sub_range_start, | |
213 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
214 | } | |
215 | ||
216 | elif line.startswith('#EXT-X-KEY'): | |
217 | decrypt_url = decrypt_info.get('URI') | |
218 | decrypt_info = parse_m3u8_attributes(line[11:]) | |
219 | if decrypt_info['METHOD'] == 'AES-128': | |
220 | if 'IV' in decrypt_info: | |
221 | decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) | |
222 | if not re.match(r'^https?://', decrypt_info['URI']): | |
223 | decrypt_info['URI'] = compat_urlparse.urljoin( | |
224 | man_url, decrypt_info['URI']) | |
225 | if extra_query: | |
226 | decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) | |
227 | if decrypt_url != decrypt_info['URI']: | |
228 | decrypt_info['KEY'] = None | |
229 | ||
230 | elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | |
231 | media_sequence = int(line[22:]) | |
232 | elif line.startswith('#EXT-X-BYTERANGE'): | |
233 | splitted_byte_range = line[17:].split('@') | |
234 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
235 | byte_range = { | |
236 | 'start': sub_range_start, | |
237 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
238 | } | |
239 | elif is_ad_fragment_start(line): | |
240 | ad_frag_next = True | |
241 | elif is_ad_fragment_end(line): | |
242 | ad_frag_next = False | |
243 | elif line.startswith('#EXT-X-DISCONTINUITY'): | |
244 | discontinuity_count += 1 | |
245 | i += 1 | |
246 | media_sequence += 1 | |
247 | ||
248 | # We only download the first fragment during the test | |
249 | if test: | |
250 | fragments = [fragments[0] if fragments else None] | |
251 | ||
252 | if real_downloader: | |
253 | info_copy = info_dict.copy() | |
254 | info_copy['fragments'] = fragments | |
255 | fd = real_downloader(self.ydl, self.params) | |
256 | # TODO: Make progress updates work without hooking twice | |
257 | # for ph in self._progress_hooks: | |
258 | # fd.add_progress_hook(ph) | |
259 | success = fd.real_download(filename, info_copy) | |
260 | if not success: | |
261 | return False | |
262 | else: | |
263 | def download_fragment(fragment): | |
264 | frag_index = fragment['frag_index'] | |
265 | frag_url = fragment['url'] | |
266 | decrypt_info = fragment['decrypt_info'] | |
267 | byte_range = fragment['byte_range'] | |
268 | media_sequence = fragment['media_sequence'] | |
269 | ||
270 | ctx['fragment_index'] = frag_index | |
271 | ||
272 | count = 0 | |
273 | headers = info_dict.get('http_headers', {}) | |
274 | if byte_range: | |
275 | headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) | |
276 | while count <= fragment_retries: | |
277 | try: | |
278 | success, frag_content = self._download_fragment( | |
279 | ctx, frag_url, info_dict, headers) | |
280 | if not success: | |
281 | return False, frag_index | |
282 | break | |
283 | except compat_urllib_error.HTTPError as err: | |
284 | # Unavailable (possibly temporary) fragments may be served. | |
285 | # First we try to retry then either skip or abort. | |
286 | # See https://github.com/ytdl-org/youtube-dl/issues/10165, | |
287 | # https://github.com/ytdl-org/youtube-dl/issues/10448). | |
288 | count += 1 | |
289 | if count <= fragment_retries: | |
290 | self.report_retry_fragment(err, frag_index, count, fragment_retries) | |
291 | if count > fragment_retries: | |
292 | return False, frag_index | |
293 | ||
294 | if decrypt_info['METHOD'] == 'AES-128': | |
295 | iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) | |
296 | decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( | |
297 | self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() | |
298 | # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block | |
299 | # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, | |
300 | # not what it decrypts to. | |
301 | if not test: | |
302 | frag_content = AES.new( | |
303 | decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) | |
304 | ||
305 | return frag_content, frag_index | |
306 | ||
307 | def append_fragment(frag_content, frag_index): | |
308 | if frag_content: | |
309 | fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index) | |
310 | try: | |
311 | file, frag_sanitized = sanitize_open(fragment_filename, 'rb') | |
312 | ctx['fragment_filename_sanitized'] = frag_sanitized | |
313 | file.close() | |
314 | self._append_fragment(ctx, frag_content) | |
315 | return True | |
316 | except FileNotFoundError: | |
317 | if skip_unavailable_fragments: | |
318 | self.report_skip_fragment(frag_index) | |
319 | return True | |
320 | else: | |
321 | self.report_error( | |
322 | 'fragment %s not found, unable to continue' % frag_index) | |
323 | return False | |
324 | else: | |
325 | if skip_unavailable_fragments: | |
326 | self.report_skip_fragment(frag_index) | |
327 | return True | |
328 | else: | |
329 | self.report_error( | |
330 | 'fragment %s not found, unable to continue' % frag_index) | |
331 | return False | |
332 | ||
333 | max_workers = self.params.get('concurrent_fragment_downloads', 1) | |
334 | if can_threaded_download and max_workers > 1: | |
335 | self.report_warning('The download speed shown is only of one thread. This is a known issue') | |
336 | with concurrent.futures.ThreadPoolExecutor(max_workers) as pool: | |
337 | futures = [pool.submit(download_fragment, fragment) for fragment in fragments] | |
338 | # timeout must be 0 to return instantly | |
339 | done, not_done = concurrent.futures.wait(futures, timeout=0) | |
340 | try: | |
341 | while not_done: | |
342 | # Check every 1 second for KeyboardInterrupt | |
343 | freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1) | |
344 | done |= freshly_done | |
345 | except KeyboardInterrupt: | |
346 | for future in not_done: | |
347 | future.cancel() | |
348 | # timeout must be none to cancel | |
349 | concurrent.futures.wait(not_done, timeout=None) | |
350 | raise KeyboardInterrupt | |
351 | results = [future.result() for future in futures] | |
352 | ||
353 | for frag_content, frag_index in results: | |
354 | result = append_fragment(frag_content, frag_index) | |
355 | if not result: | |
356 | return False | |
357 | else: | |
358 | for fragment in fragments: | |
359 | frag_content, frag_index = download_fragment(fragment) | |
360 | result = append_fragment(frag_content, frag_index) | |
361 | if not result: | |
362 | return False | |
363 | ||
364 | self._finish_frag_download(ctx) | |
365 | return True |