]>
Commit | Line | Data |
---|---|---|
e154c651 | 1 | import binascii |
f8271158 | 2 | import io |
3 | import re | |
14f25df2 | 4 | import urllib.parse |
3bc2ddcc | 5 | |
c487cf00 | 6 | from . import get_suitable_downloader |
0d66bd0e | 7 | from .external import FFmpegFD |
f8271158 | 8 | from .fragment import FragmentFD |
4a2f19ab | 9 | from .. import webvtt |
f6a765ce | 10 | from ..dependencies import Cryptodome |
7e68567e | 11 | from ..utils import ( |
12 | bug_reports_message, | |
13 | parse_m3u8_attributes, | |
14 | remove_start, | |
15 | traverse_obj, | |
16 | update_url_query, | |
17 | urljoin, | |
18 | ) | |
3bc2ddcc JMF |
19 | |
20 | ||
12b84ac8 | 21 | class HlsFD(FragmentFD): |
0a473f2f | 22 | """ |
23 | Download segments in a m3u8 manifest. External downloaders can take over | |
52a8a1e1 | 24 | the fragment downloads by supporting the 'm3u8_frag_urls' protocol and |
0a473f2f | 25 | re-defining 'supports_manifest' function |
26 | """ | |
f0b5d6af | 27 | |
f9a5affa S |
28 | FD_NAME = 'hlsnative' |
29 | ||
0d66bd0e | 30 | @staticmethod |
bc344cd4 | 31 | def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039 |
32 | return bool(re.search('|'.join(( | |
33 | r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay | |
34 | r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay | |
35 | r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady | |
36 | r'#EXT-X-FAXS-CM:', # Adobe Flash Access | |
37 | )), manifest)) | |
38 | ||
39 | @classmethod | |
40 | def can_download(cls, manifest, info_dict, allow_unplayable_formats=False): | |
63ad4d43 | 41 | UNSUPPORTED_FEATURES = [ |
f5974637 | 42 | # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] |
1e236d7e | 43 | |
c15c47d1 S |
44 | # Live streams heuristic does not always work (e.g. geo restricted to Germany |
45 | # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) | |
2937590e | 46 | # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] |
1e236d7e S |
47 | |
48 | # This heuristic also is not correct since segments may not be appended as well. | |
633b444f S |
49 | # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite |
50 | # no segments will definitely be appended to the end of the playlist. | |
1e236d7e | 51 | # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of |
51c4d85c | 52 | # # event media playlists [4] |
b1bb77d7 | 53 | # r'#EXT-X-MAP:', # media initialization [5] |
0d66bd0e S |
54 | # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 |
55 | # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 | |
56 | # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | |
6104cc29 | 57 | # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 |
29f7c58a | 58 | # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 |
63ad4d43 | 59 | ] |
60 | if not allow_unplayable_formats: | |
61 | UNSUPPORTED_FEATURES += [ | |
bc344cd4 | 62 | r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM |
63ad4d43 | 63 | ] |
0a473f2f | 64 | |
65 | def check_results(): | |
66 | yield not info_dict.get('is_live') | |
0a473f2f | 67 | for feature in UNSUPPORTED_FEATURES: |
68 | yield not re.search(feature, manifest) | |
bc344cd4 | 69 | if not allow_unplayable_formats: |
70 | yield not cls._has_drm(manifest) | |
0a473f2f | 71 | return all(check_results()) |
0d66bd0e | 72 | |
f0b5d6af | 73 | def real_download(self, filename, info_dict): |
f9a5affa S |
74 | man_url = info_dict['url'] |
75 | self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) | |
69035555 | 76 | |
c5a49ff0 | 77 | urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) |
3d2623a8 | 78 | man_url = urlh.url |
c5a49ff0 | 79 | s = urlh.read().decode('utf-8', 'ignore') |
0d66bd0e | 80 | |
7687c8ac | 81 | can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None |
bbae4377 | 82 | if can_download: |
83 | has_ffmpeg = FFmpegFD.available() | |
65f6e807 | 84 | no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s |
bbae4377 | 85 | if no_crypto and has_ffmpeg: |
49e7e9c3 | 86 | can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' |
bbae4377 | 87 | elif no_crypto: |
49e7e9c3 | 88 | message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' |
7687c8ac | 89 | 'Decryption will be performed natively, but will be extremely slow') |
ae61d108 | 90 | elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s): |
bbae4377 | 91 | install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and ' |
92 | message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' | |
93 | f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') | |
7687c8ac | 94 | if not can_download: |
bc344cd4 | 95 | if self._has_drm(s) and not self.params.get('allow_unplayable_formats'): |
96 | if info_dict.get('has_drm') and self.params.get('test'): | |
97 | self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True) | |
98 | else: | |
99 | self.report_error( | |
100 | 'This format is DRM protected; Try selecting another format with --format or ' | |
101 | 'add --check-formats to automatically fallback to the next best format', tb=False) | |
6b993ca7 | 102 | return False |
7687c8ac | 103 | message = message or 'Unsupported features have been detected' |
2bfaf89b | 104 | fd = FFmpegFD(self.ydl, self.params) |
7687c8ac | 105 | self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') |
2bfaf89b | 106 | return fd.real_download(filename, info_dict) |
7687c8ac | 107 | elif message: |
108 | self.report_warning(message) | |
0d66bd0e | 109 | |
5dcd8e1d | 110 | is_webvtt = info_dict['ext'] == 'vtt' |
111 | if is_webvtt: | |
112 | real_downloader = None # Packing the fragments is not currently supported for external downloader | |
113 | else: | |
96fccc10 | 114 | real_downloader = get_suitable_downloader( |
a46a815b | 115 | info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-')) |
0a473f2f | 116 | if real_downloader and not real_downloader.supports_manifest(s): |
117 | real_downloader = None | |
beb4b92a | 118 | if real_downloader: |
86e5f3ed | 119 | self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') |
0a473f2f | 120 | |
f1ab3b7d | 121 | def is_ad_fragment_start(s): |
3089bc74 S |
122 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s |
123 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) | |
74c42d9e | 124 | |
f1ab3b7d | 125 | def is_ad_fragment_end(s): |
3089bc74 S |
126 | return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s |
127 | or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) | |
f1ab3b7d | 128 | |
d7009caa | 129 | fragments = [] |
5219cb3e | 130 | |
74c42d9e S |
131 | media_frags = 0 |
132 | ad_frags = 0 | |
133 | ad_frag_next = False | |
f0b5d6af PH |
134 | for line in s.splitlines(): |
135 | line = line.strip() | |
74c42d9e S |
136 | if not line: |
137 | continue | |
138 | if line.startswith('#'): | |
f1ab3b7d | 139 | if is_ad_fragment_start(line): |
a9ee4f6e | 140 | ad_frag_next = True |
f1ab3b7d RA |
141 | elif is_ad_fragment_end(line): |
142 | ad_frag_next = False | |
74c42d9e S |
143 | continue |
144 | if ad_frag_next: | |
f1ab3b7d | 145 | ad_frags += 1 |
74c42d9e S |
146 | continue |
147 | media_frags += 1 | |
f0b5d6af | 148 | |
f9a5affa | 149 | ctx = { |
f0b5d6af | 150 | 'filename': filename, |
74c42d9e S |
151 | 'total_frags': media_frags, |
152 | 'ad_frags': ad_frags, | |
f9a5affa S |
153 | } |
154 | ||
5219cb3e | 155 | if real_downloader: |
156 | self._prepare_external_frag_download(ctx) | |
157 | else: | |
3ba7740d | 158 | self._prepare_and_start_frag_download(ctx, info_dict) |
f9a5affa | 159 | |
4a2f19ab F |
160 | extra_state = ctx.setdefault('extra_state', {}) |
161 | ||
310c2ed2 | 162 | format_index = info_dict.get('format_index') |
b8079a40 | 163 | extra_query = None |
aaf44a2f | 164 | extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') |
b8079a40 | 165 | if extra_param_to_segment_url: |
14f25df2 | 166 | extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) |
e154c651 | 167 | i = 0 |
168 | media_sequence = 0 | |
169 | decrypt_info = {'METHOD': 'NONE'} | |
7e68567e | 170 | external_aes_key = traverse_obj(info_dict, ('hls_aes', 'key')) |
171 | if external_aes_key: | |
172 | external_aes_key = binascii.unhexlify(remove_start(external_aes_key, '0x')) | |
173 | assert len(external_aes_key) in (16, 24, 32), 'Invalid length for HLS AES-128 key' | |
174 | external_aes_iv = traverse_obj(info_dict, ('hls_aes', 'iv')) | |
175 | if external_aes_iv: | |
176 | external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) | |
f5974637 | 177 | byte_range = {} |
310c2ed2 | 178 | discontinuity_count = 0 |
75a24854 | 179 | frag_index = 0 |
74c42d9e | 180 | ad_frag_next = False |
e154c651 | 181 | for line in s.splitlines(): |
182 | line = line.strip() | |
183 | if line: | |
184 | if not line.startswith('#'): | |
310c2ed2 | 185 | if format_index and discontinuity_count != format_index: |
186 | continue | |
74c42d9e | 187 | if ad_frag_next: |
74c42d9e | 188 | continue |
75a24854 | 189 | frag_index += 1 |
3e0304fe | 190 | if frag_index <= ctx['fragment_index']: |
75a24854 | 191 | continue |
7e68567e | 192 | frag_url = urljoin(man_url, line) |
b8079a40 RA |
193 | if extra_query: |
194 | frag_url = update_url_query(frag_url, extra_query) | |
5219cb3e | 195 | |
4cf1e5d2 | 196 | fragments.append({ |
197 | 'frag_index': frag_index, | |
198 | 'url': frag_url, | |
199 | 'decrypt_info': decrypt_info, | |
200 | 'byte_range': byte_range, | |
201 | 'media_sequence': media_sequence, | |
202 | }) | |
d9d8b857 | 203 | media_sequence += 1 |
5219cb3e | 204 | |
b1bb77d7 | 205 | elif line.startswith('#EXT-X-MAP'): |
310c2ed2 | 206 | if format_index and discontinuity_count != format_index: |
207 | continue | |
b1bb77d7 | 208 | if frag_index > 0: |
209 | self.report_error( | |
beb4b92a | 210 | 'Initialization fragment found after media fragments, unable to download') |
b1bb77d7 | 211 | return False |
212 | frag_index += 1 | |
213 | map_info = parse_m3u8_attributes(line[11:]) | |
7e68567e | 214 | frag_url = urljoin(man_url, map_info.get('URI')) |
b1bb77d7 | 215 | if extra_query: |
216 | frag_url = update_url_query(frag_url, extra_query) | |
4cf1e5d2 | 217 | |
e4fa34a1 | 218 | if map_info.get('BYTERANGE'): |
219 | splitted_byte_range = map_info.get('BYTERANGE').split('@') | |
220 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
221 | byte_range = { | |
222 | 'start': sub_range_start, | |
223 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
224 | } | |
225 | ||
4cf1e5d2 | 226 | fragments.append({ |
227 | 'frag_index': frag_index, | |
228 | 'url': frag_url, | |
229 | 'decrypt_info': decrypt_info, | |
230 | 'byte_range': byte_range, | |
231 | 'media_sequence': media_sequence | |
232 | }) | |
d9d8b857 | 233 | media_sequence += 1 |
b1bb77d7 | 234 | |
b1bb77d7 | 235 | elif line.startswith('#EXT-X-KEY'): |
236 | decrypt_url = decrypt_info.get('URI') | |
237 | decrypt_info = parse_m3u8_attributes(line[11:]) | |
238 | if decrypt_info['METHOD'] == 'AES-128': | |
7e68567e | 239 | if external_aes_iv: |
240 | decrypt_info['IV'] = external_aes_iv | |
241 | elif 'IV' in decrypt_info: | |
b1bb77d7 | 242 | decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) |
7e68567e | 243 | if external_aes_key: |
244 | decrypt_info['KEY'] = external_aes_key | |
245 | else: | |
246 | decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) | |
247 | if extra_query: | |
248 | decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) | |
249 | if decrypt_url != decrypt_info['URI']: | |
250 | decrypt_info['KEY'] = None | |
b1bb77d7 | 251 | |
252 | elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | |
253 | media_sequence = int(line[22:]) | |
254 | elif line.startswith('#EXT-X-BYTERANGE'): | |
255 | splitted_byte_range = line[17:].split('@') | |
256 | sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | |
257 | byte_range = { | |
258 | 'start': sub_range_start, | |
259 | 'end': sub_range_start + int(splitted_byte_range[0]), | |
260 | } | |
261 | elif is_ad_fragment_start(line): | |
262 | ad_frag_next = True | |
263 | elif is_ad_fragment_end(line): | |
264 | ad_frag_next = False | |
310c2ed2 | 265 | elif line.startswith('#EXT-X-DISCONTINUITY'): |
266 | discontinuity_count += 1 | |
4cf1e5d2 | 267 | i += 1 |
b1bb77d7 | 268 | |
4cf1e5d2 | 269 | # We only download the first fragment during the test |
4c7853de | 270 | if self.params.get('test', False): |
4cf1e5d2 | 271 | fragments = [fragments[0] if fragments else None] |
f9a5affa | 272 | |
5219cb3e | 273 | if real_downloader: |
03b4de72 | 274 | info_dict['fragments'] = fragments |
5219cb3e | 275 | fd = real_downloader(self.ydl, self.params) |
276 | # TODO: Make progress updates work without hooking twice | |
277 | # for ph in self._progress_hooks: | |
278 | # fd.add_progress_hook(ph) | |
03b4de72 | 279 | return fd.real_download(filename, info_dict) |
333217f4 | 280 | |
bd4d1ea3 | 281 | if is_webvtt: |
282 | def pack_fragment(frag_content, frag_index): | |
283 | output = io.StringIO() | |
284 | adjust = 0 | |
7a6742b5 F |
285 | overflow = False |
286 | mpegts_last = None | |
bd4d1ea3 | 287 | for block in webvtt.parse_fragment(frag_content): |
288 | if isinstance(block, webvtt.CueBlock): | |
7a6742b5 F |
289 | extra_state['webvtt_mpegts_last'] = mpegts_last |
290 | if overflow: | |
291 | extra_state['webvtt_mpegts_adjust'] += 1 | |
292 | overflow = False | |
bd4d1ea3 | 293 | block.start += adjust |
294 | block.end += adjust | |
295 | ||
296 | dedup_window = extra_state.setdefault('webvtt_dedup_window', []) | |
bd4d1ea3 | 297 | |
25a3f4f5 F |
298 | ready = [] |
299 | ||
bd4d1ea3 | 300 | i = 0 |
25a3f4f5 | 301 | is_new = True |
bd4d1ea3 | 302 | while i < len(dedup_window): |
25a3f4f5 F |
303 | wcue = dedup_window[i] |
304 | wblock = webvtt.CueBlock.from_json(wcue) | |
305 | i += 1 | |
306 | if wblock.hinges(block): | |
307 | wcue['end'] = block.end | |
308 | is_new = False | |
309 | continue | |
310 | if wblock == block: | |
311 | is_new = False | |
312 | continue | |
313 | if wblock.end > block.start: | |
4a2f19ab | 314 | continue |
25a3f4f5 F |
315 | ready.append(wblock) |
316 | i -= 1 | |
bd4d1ea3 | 317 | del dedup_window[i] |
bd4d1ea3 | 318 | |
25a3f4f5 F |
319 | if is_new: |
320 | dedup_window.append(block.as_json) | |
321 | for block in ready: | |
322 | block.write_into(output) | |
bd4d1ea3 | 323 | |
25a3f4f5 F |
324 | # we only emit cues once they fall out of the duplicate window |
325 | continue | |
bd4d1ea3 | 326 | elif isinstance(block, webvtt.Magic): |
327 | # take care of MPEG PES timestamp overflow | |
328 | if block.mpegts is None: | |
329 | block.mpegts = 0 | |
330 | extra_state.setdefault('webvtt_mpegts_adjust', 0) | |
331 | block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 | |
332 | if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): | |
7a6742b5 | 333 | overflow = True |
bd4d1ea3 | 334 | block.mpegts += 1 << 33 |
7a6742b5 | 335 | mpegts_last = block.mpegts |
bd4d1ea3 | 336 | |
337 | if frag_index == 1: | |
338 | extra_state['webvtt_mpegts'] = block.mpegts or 0 | |
339 | extra_state['webvtt_local'] = block.local or 0 | |
340 | # XXX: block.local = block.mpegts = None ? | |
341 | else: | |
342 | if block.mpegts is not None and block.local is not None: | |
343 | adjust = ( | |
344 | (block.mpegts - extra_state.get('webvtt_mpegts', 0)) | |
345 | - (block.local - extra_state.get('webvtt_local', 0)) | |
346 | ) | |
347 | continue | |
348 | elif isinstance(block, webvtt.HeaderBlock): | |
349 | if frag_index != 1: | |
350 | # XXX: this should probably be silent as well | |
351 | # or verify that all segments contain the same data | |
352 | self.report_warning(bug_reports_message( | |
353 | 'Discarding a %s block found in the middle of the stream; ' | |
354 | 'if the subtitles display incorrectly,' | |
355 | % (type(block).__name__))) | |
356 | continue | |
357 | block.write_into(output) | |
358 | ||
0f06bcd7 | 359 | return output.getvalue().encode() |
25a3f4f5 F |
360 | |
361 | def fin_fragments(): | |
362 | dedup_window = extra_state.get('webvtt_dedup_window') | |
363 | if not dedup_window: | |
364 | return b'' | |
365 | ||
366 | output = io.StringIO() | |
367 | for cue in dedup_window: | |
368 | webvtt.CueBlock.from_json(cue).write_into(output) | |
369 | ||
0f06bcd7 | 370 | return output.getvalue().encode() |
25a3f4f5 | 371 | |
f24e44e8 | 372 | if len(fragments) == 1: |
373 | self.download_and_append_fragments(ctx, fragments, info_dict) | |
374 | else: | |
375 | self.download_and_append_fragments( | |
376 | ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) | |
bd4d1ea3 | 377 | else: |
25a3f4f5 | 378 | return self.download_and_append_fragments(ctx, fragments, info_dict) |