]> jfr.im git - yt-dlp.git/blob - yt_dlp/downloader/fragment.py
[dash,youtube] Download live from start to end (#888)
[yt-dlp.git] / yt_dlp / downloader / fragment.py
1 from __future__ import division, unicode_literals
2
3 import http.client
4 import json
5 import math
6 import os
7 import time
8
9 try:
10 import concurrent.futures
11 can_threaded_download = True
12 except ImportError:
13 can_threaded_download = False
14
15 from .common import FileDownloader
16 from .http import HttpFD
17 from ..aes import aes_cbc_decrypt_bytes
18 from ..compat import (
19 compat_os_name,
20 compat_urllib_error,
21 compat_struct_pack,
22 )
23 from ..utils import (
24 DownloadError,
25 error_to_compat_str,
26 encodeFilename,
27 sanitize_open,
28 sanitized_Request,
29 )
30
31
32 class HttpQuietDownloader(HttpFD):
33 def to_screen(self, *args, **kargs):
34 pass
35
36 def report_retry(self, err, count, retries):
37 super().to_screen(
38 f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...')
39
40
41 class FragmentFD(FileDownloader):
42 """
43 A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
44
45 Available options:
46
47 fragment_retries: Number of times to retry a fragment for HTTP error (DASH
48 and hlsnative only)
49 skip_unavailable_fragments:
50 Skip unavailable fragments (DASH and hlsnative only)
51 keep_fragments: Keep downloaded fragments on disk after downloading is
52 finished
53 concurrent_fragment_downloads: The number of threads to use for native hls and dash downloads
54 _no_ytdl_file: Don't use .ytdl file
55
56 For each incomplete fragment download yt-dlp keeps on disk a special
57 bookkeeping file with download state and metadata (in future such files will
58 be used for any incomplete download handled by yt-dlp). This file is
59 used to properly handle resuming, check download file consistency and detect
60 potential errors. The file has a .ytdl extension and represents a standard
61 JSON file of the following format:
62
63 extractor:
64 Dictionary of extractor related data. TBD.
65
66 downloader:
67 Dictionary of downloader related data. May contain following data:
68 current_fragment:
69 Dictionary with current (being downloaded) fragment data:
70 index: 0-based index of current fragment among all fragments
71 fragment_count:
72 Total count of fragments
73
74 This feature is experimental and file format may change in future.
75 """
76
77 def report_retry_fragment(self, err, frag_index, count, retries):
78 self.to_screen(
79 '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
80 % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
81
82 def report_skip_fragment(self, frag_index, err=None):
83 err = f' {err};' if err else ''
84 self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...')
85
86 def _prepare_url(self, info_dict, url):
87 headers = info_dict.get('http_headers')
88 return sanitized_Request(url, None, headers) if headers else url
89
90 def _prepare_and_start_frag_download(self, ctx, info_dict):
91 self._prepare_frag_download(ctx)
92 self._start_frag_download(ctx, info_dict)
93
94 def __do_ytdl_file(self, ctx):
95 return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file')
96
97 def _read_ytdl_file(self, ctx):
98 assert 'ytdl_corrupt' not in ctx
99 stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
100 try:
101 ytdl_data = json.loads(stream.read())
102 ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index']
103 if 'extra_state' in ytdl_data['downloader']:
104 ctx['extra_state'] = ytdl_data['downloader']['extra_state']
105 except Exception:
106 ctx['ytdl_corrupt'] = True
107 finally:
108 stream.close()
109
110 def _write_ytdl_file(self, ctx):
111 frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
112 try:
113 downloader = {
114 'current_fragment': {
115 'index': ctx['fragment_index'],
116 },
117 }
118 if 'extra_state' in ctx:
119 downloader['extra_state'] = ctx['extra_state']
120 if ctx.get('fragment_count') is not None:
121 downloader['fragment_count'] = ctx['fragment_count']
122 frag_index_stream.write(json.dumps({'downloader': downloader}))
123 finally:
124 frag_index_stream.close()
125
126 def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
127 fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
128 fragment_info_dict = {
129 'url': frag_url,
130 'http_headers': headers or info_dict.get('http_headers'),
131 'request_data': request_data,
132 'ctx_id': ctx.get('ctx_id'),
133 }
134 success = ctx['dl'].download(fragment_filename, fragment_info_dict)
135 if not success:
136 return False, None
137 if fragment_info_dict.get('filetime'):
138 ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
139 ctx['fragment_filename_sanitized'] = fragment_filename
140 return True, self._read_fragment(ctx)
141
142 def _read_fragment(self, ctx):
143 down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
144 ctx['fragment_filename_sanitized'] = frag_sanitized
145 frag_content = down.read()
146 down.close()
147 return frag_content
148
149 def _append_fragment(self, ctx, frag_content):
150 try:
151 ctx['dest_stream'].write(frag_content)
152 ctx['dest_stream'].flush()
153 finally:
154 if self.__do_ytdl_file(ctx):
155 self._write_ytdl_file(ctx)
156 if not self.params.get('keep_fragments', False):
157 os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
158 del ctx['fragment_filename_sanitized']
159
160 def _prepare_frag_download(self, ctx):
161 if 'live' not in ctx:
162 ctx['live'] = False
163 if not ctx['live']:
164 total_frags_str = '%d' % ctx['total_frags']
165 ad_frags = ctx.get('ad_frags', 0)
166 if ad_frags:
167 total_frags_str += ' (not including %d ad)' % ad_frags
168 else:
169 total_frags_str = 'unknown (live)'
170 self.to_screen(
171 '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
172 self.report_destination(ctx['filename'])
173 dl = HttpQuietDownloader(
174 self.ydl,
175 {
176 'continuedl': True,
177 'quiet': self.params.get('quiet'),
178 'noprogress': True,
179 'ratelimit': self.params.get('ratelimit'),
180 'retries': self.params.get('retries', 0),
181 'nopart': self.params.get('nopart', False),
182 'test': self.params.get('test', False),
183 }
184 )
185 tmpfilename = self.temp_name(ctx['filename'])
186 open_mode = 'wb'
187 resume_len = 0
188
189 # Establish possible resume length
190 if os.path.isfile(encodeFilename(tmpfilename)):
191 open_mode = 'ab'
192 resume_len = os.path.getsize(encodeFilename(tmpfilename))
193
194 # Should be initialized before ytdl file check
195 ctx.update({
196 'tmpfilename': tmpfilename,
197 'fragment_index': 0,
198 })
199
200 if self.__do_ytdl_file(ctx):
201 if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
202 self._read_ytdl_file(ctx)
203 is_corrupt = ctx.get('ytdl_corrupt') is True
204 is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
205 if is_corrupt or is_inconsistent:
206 message = (
207 '.ytdl file is corrupt' if is_corrupt else
208 'Inconsistent state of incomplete fragment download')
209 self.report_warning(
210 '%s. Restarting from the beginning ...' % message)
211 ctx['fragment_index'] = resume_len = 0
212 if 'ytdl_corrupt' in ctx:
213 del ctx['ytdl_corrupt']
214 self._write_ytdl_file(ctx)
215 else:
216 self._write_ytdl_file(ctx)
217 assert ctx['fragment_index'] == 0
218
219 dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
220
221 ctx.update({
222 'dl': dl,
223 'dest_stream': dest_stream,
224 'tmpfilename': tmpfilename,
225 # Total complete fragments downloaded so far in bytes
226 'complete_frags_downloaded_bytes': resume_len,
227 })
228
229 def _start_frag_download(self, ctx, info_dict):
230 resume_len = ctx['complete_frags_downloaded_bytes']
231 total_frags = ctx['total_frags']
232 ctx_id = ctx.get('ctx_id')
233 # This dict stores the download progress, it's updated by the progress
234 # hook
235 state = {
236 'status': 'downloading',
237 'downloaded_bytes': resume_len,
238 'fragment_index': ctx['fragment_index'],
239 'fragment_count': total_frags,
240 'filename': ctx['filename'],
241 'tmpfilename': ctx['tmpfilename'],
242 }
243
244 start = time.time()
245 ctx.update({
246 'started': start,
247 'fragment_started': start,
248 # Amount of fragment's bytes downloaded by the time of the previous
249 # frag progress hook invocation
250 'prev_frag_downloaded_bytes': 0,
251 })
252
253 def frag_progress_hook(s):
254 if s['status'] not in ('downloading', 'finished'):
255 return
256
257 if ctx_id is not None and s.get('ctx_id') != ctx_id:
258 return
259
260 state['max_progress'] = ctx.get('max_progress')
261 state['progress_idx'] = ctx.get('progress_idx')
262
263 time_now = time.time()
264 state['elapsed'] = time_now - start
265 frag_total_bytes = s.get('total_bytes') or 0
266 s['fragment_info_dict'] = s.pop('info_dict', {})
267 if not ctx['live']:
268 estimated_size = (
269 (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
270 / (state['fragment_index'] + 1) * total_frags)
271 state['total_bytes_estimate'] = estimated_size
272
273 if s['status'] == 'finished':
274 state['fragment_index'] += 1
275 ctx['fragment_index'] = state['fragment_index']
276 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
277 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
278 ctx['speed'] = state['speed'] = self.calc_speed(
279 ctx['fragment_started'], time_now, frag_total_bytes)
280 ctx['fragment_started'] = time.time()
281 ctx['prev_frag_downloaded_bytes'] = 0
282 else:
283 frag_downloaded_bytes = s['downloaded_bytes']
284 state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
285 if not ctx['live']:
286 state['eta'] = self.calc_eta(
287 start, time_now, estimated_size - resume_len,
288 state['downloaded_bytes'] - resume_len)
289 ctx['speed'] = state['speed'] = self.calc_speed(
290 ctx['fragment_started'], time_now, frag_downloaded_bytes)
291 ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
292 self._hook_progress(state, info_dict)
293
294 ctx['dl'].add_progress_hook(frag_progress_hook)
295
296 return start
297
298 def _finish_frag_download(self, ctx, info_dict):
299 ctx['dest_stream'].close()
300 if self.__do_ytdl_file(ctx):
301 ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
302 if os.path.isfile(ytdl_filename):
303 os.remove(ytdl_filename)
304 elapsed = time.time() - ctx['started']
305
306 if ctx['tmpfilename'] == '-':
307 downloaded_bytes = ctx['complete_frags_downloaded_bytes']
308 else:
309 self.try_rename(ctx['tmpfilename'], ctx['filename'])
310 if self.params.get('updatetime', True):
311 filetime = ctx.get('fragment_filetime')
312 if filetime:
313 try:
314 os.utime(ctx['filename'], (time.time(), filetime))
315 except Exception:
316 pass
317 downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
318
319 self._hook_progress({
320 'downloaded_bytes': downloaded_bytes,
321 'total_bytes': downloaded_bytes,
322 'filename': ctx['filename'],
323 'status': 'finished',
324 'elapsed': elapsed,
325 'ctx_id': ctx.get('ctx_id'),
326 'max_progress': ctx.get('max_progress'),
327 'progress_idx': ctx.get('progress_idx'),
328 }, info_dict)
329
330 def _prepare_external_frag_download(self, ctx):
331 if 'live' not in ctx:
332 ctx['live'] = False
333 if not ctx['live']:
334 total_frags_str = '%d' % ctx['total_frags']
335 ad_frags = ctx.get('ad_frags', 0)
336 if ad_frags:
337 total_frags_str += ' (not including %d ad)' % ad_frags
338 else:
339 total_frags_str = 'unknown (live)'
340 self.to_screen(
341 '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
342
343 tmpfilename = self.temp_name(ctx['filename'])
344
345 # Should be initialized before ytdl file check
346 ctx.update({
347 'tmpfilename': tmpfilename,
348 'fragment_index': 0,
349 })
350
351 def decrypter(self, info_dict):
352 _key_cache = {}
353
354 def _get_key(url):
355 if url not in _key_cache:
356 _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read()
357 return _key_cache[url]
358
359 def decrypt_fragment(fragment, frag_content):
360 decrypt_info = fragment.get('decrypt_info')
361 if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
362 return frag_content
363 iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
364 decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI'])
365 # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
366 # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
367 # not what it decrypts to.
368 if self.params.get('test', False):
369 return frag_content
370 decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)
371 return decrypted_data[:-decrypted_data[-1]]
372
373 return decrypt_fragment
374
375 def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None):
376 '''
377 @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
378 all args must be either tuple or list
379 '''
380 interrupt_trigger = [True]
381 max_progress = len(args)
382 if max_progress == 1:
383 return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func)
384 max_workers = self.params.get('concurrent_fragment_downloads', 1)
385 if max_progress > 1:
386 self._prepare_multiline_status(max_progress)
387
388 def thread_func(idx, ctx, fragments, info_dict, tpe):
389 ctx['max_progress'] = max_progress
390 ctx['progress_idx'] = idx
391 return self.download_and_append_fragments(
392 ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func,
393 tpe=tpe, interrupt_trigger=interrupt_trigger)
394
395 class FTPE(concurrent.futures.ThreadPoolExecutor):
396 # has to stop this or it's going to wait on the worker thread itself
397 def __exit__(self, exc_type, exc_val, exc_tb):
398 pass
399
400 spins = []
401 if compat_os_name == 'nt':
402 self.report_warning('Ctrl+C does not work on Windows when used with parallel threads. '
403 'This is a known issue and patches are welcome')
404 for idx, (ctx, fragments, info_dict) in enumerate(args):
405 tpe = FTPE(math.ceil(max_workers / max_progress))
406 job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe)
407 spins.append((tpe, job))
408
409 result = True
410 for tpe, job in spins:
411 try:
412 result = result and job.result()
413 except KeyboardInterrupt:
414 interrupt_trigger[0] = False
415 finally:
416 tpe.shutdown(wait=True)
417 if not interrupt_trigger[0]:
418 raise KeyboardInterrupt()
419 return result
420
421 def download_and_append_fragments(
422 self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None,
423 tpe=None, interrupt_trigger=None):
424 if not interrupt_trigger:
425 interrupt_trigger = (True, )
426
427 fragment_retries = self.params.get('fragment_retries', 0)
428 is_fatal = (
429 ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0))
430 if self.params.get('skip_unavailable_fragments', True) else (lambda _: True))
431
432 if not pack_func:
433 pack_func = lambda frag_content, _: frag_content
434
435 def download_fragment(fragment, ctx):
436 frag_index = ctx['fragment_index'] = fragment['frag_index']
437 if not interrupt_trigger[0]:
438 return False, frag_index
439 headers = info_dict.get('http_headers', {}).copy()
440 byte_range = fragment.get('byte_range')
441 if byte_range:
442 headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
443
444 # Never skip the first fragment
445 fatal = is_fatal(fragment.get('index') or (frag_index - 1))
446 count, frag_content = 0, None
447 while count <= fragment_retries:
448 try:
449 success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
450 if not success:
451 return False, frag_index
452 break
453 except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
454 # Unavailable (possibly temporary) fragments may be served.
455 # First we try to retry then either skip or abort.
456 # See https://github.com/ytdl-org/youtube-dl/issues/10165,
457 # https://github.com/ytdl-org/youtube-dl/issues/10448).
458 count += 1
459 if count <= fragment_retries:
460 self.report_retry_fragment(err, frag_index, count, fragment_retries)
461 except DownloadError:
462 # Don't retry fragment if error occurred during HTTP downloading
463 # itself since it has own retry settings
464 if not fatal:
465 break
466 raise
467
468 if count > fragment_retries:
469 if not fatal:
470 return False, frag_index
471 ctx['dest_stream'].close()
472 self.report_error('Giving up after %s fragment retries' % fragment_retries)
473 return False, frag_index
474 return frag_content, frag_index
475
476 def append_fragment(frag_content, frag_index, ctx):
477 if not frag_content:
478 if not is_fatal(frag_index - 1):
479 self.report_skip_fragment(frag_index, 'fragment not found')
480 return True
481 else:
482 ctx['dest_stream'].close()
483 self.report_error(
484 'fragment %s not found, unable to continue' % frag_index)
485 return False
486 self._append_fragment(ctx, pack_func(frag_content, frag_index))
487 return True
488
489 decrypt_fragment = self.decrypter(info_dict)
490
491 max_workers = math.ceil(
492 self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1))
493 if can_threaded_download and max_workers > 1:
494
495 def _download_fragment(fragment):
496 ctx_copy = ctx.copy()
497 frag_content, frag_index = download_fragment(fragment, ctx_copy)
498 return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
499
500 self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
501 with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
502 for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
503 if not interrupt_trigger[0]:
504 break
505 ctx['fragment_filename_sanitized'] = frag_filename
506 ctx['fragment_index'] = frag_index
507 result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
508 if not result:
509 return False
510 else:
511 for fragment in fragments:
512 if not interrupt_trigger[0]:
513 break
514 frag_content, frag_index = download_fragment(fragment, ctx)
515 result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
516 if not result:
517 return False
518
519 if finish_func is not None:
520 ctx['dest_stream'].write(finish_func())
521 ctx['dest_stream'].flush()
522 self._finish_frag_download(ctx, info_dict)
523 return True