]> jfr.im git - yt-dlp.git/blob - yt_dlp/downloader/external.py
Add option `--file-access-retries` (#2066)
[yt-dlp.git] / yt_dlp / downloader / external.py
1 from __future__ import unicode_literals
2
3 import os.path
4 import re
5 import subprocess
6 import sys
7 import time
8
9 from .fragment import FragmentFD
10 from ..compat import (
11 compat_setenv,
12 compat_str,
13 )
14 from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
15 from ..utils import (
16 cli_option,
17 cli_valueless_option,
18 cli_bool_option,
19 _configuration_args,
20 encodeFilename,
21 encodeArgument,
22 handle_youtubedl_headers,
23 check_executable,
24 Popen,
25 )
26
27
28 class ExternalFD(FragmentFD):
29 SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
30 can_download_to_stdout = False
31
32 def real_download(self, filename, info_dict):
33 self.report_destination(filename)
34 tmpfilename = self.temp_name(filename)
35
36 try:
37 started = time.time()
38 retval = self._call_downloader(tmpfilename, info_dict)
39 except KeyboardInterrupt:
40 if not info_dict.get('is_live'):
41 raise
42 # Live stream downloading cancellation should be considered as
43 # correct and expected termination thus all postprocessing
44 # should take place
45 retval = 0
46 self.to_screen('[%s] Interrupted by user' % self.get_basename())
47
48 if retval == 0:
49 status = {
50 'filename': filename,
51 'status': 'finished',
52 'elapsed': time.time() - started,
53 }
54 if filename != '-':
55 fsize = os.path.getsize(encodeFilename(tmpfilename))
56 self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
57 self.try_rename(tmpfilename, filename)
58 status.update({
59 'downloaded_bytes': fsize,
60 'total_bytes': fsize,
61 })
62 self._hook_progress(status, info_dict)
63 return True
64 else:
65 self.to_stderr('\n')
66 self.report_error('%s exited with code %d' % (
67 self.get_basename(), retval))
68 return False
69
70 @classmethod
71 def get_basename(cls):
72 return cls.__name__[:-2].lower()
73
74 @property
75 def exe(self):
76 return self.get_basename()
77
78 @classmethod
79 def available(cls, path=None):
80 path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
81 if path:
82 cls.exe = path
83 return path
84 return False
85
86 @classmethod
87 def supports(cls, info_dict):
88 return (
89 (cls.can_download_to_stdout or not info_dict.get('to_stdout'))
90 and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS)
91
92 @classmethod
93 def can_download(cls, info_dict, path=None):
94 return cls.available(path) and cls.supports(info_dict)
95
96 def _option(self, command_option, param):
97 return cli_option(self.params, command_option, param)
98
99 def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
100 return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
101
102 def _valueless_option(self, command_option, param, expected_value=True):
103 return cli_valueless_option(self.params, command_option, param, expected_value)
104
105 def _configuration_args(self, keys=None, *args, **kwargs):
106 return _configuration_args(
107 self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(),
108 keys, *args, **kwargs)
109
110 def _call_downloader(self, tmpfilename, info_dict):
111 """ Either overwrite this or implement _make_cmd """
112 cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
113
114 self._debug_cmd(cmd)
115
116 if 'fragments' not in info_dict:
117 p = Popen(cmd, stderr=subprocess.PIPE)
118 _, stderr = p.communicate_or_kill()
119 if p.returncode != 0:
120 self.to_stderr(stderr.decode('utf-8', 'replace'))
121 return p.returncode
122
123 fragment_retries = self.params.get('fragment_retries', 0)
124 skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
125
126 count = 0
127 while count <= fragment_retries:
128 p = Popen(cmd, stderr=subprocess.PIPE)
129 _, stderr = p.communicate_or_kill()
130 if p.returncode == 0:
131 break
132 # TODO: Decide whether to retry based on error code
133 # https://aria2.github.io/manual/en/html/aria2c.html#exit-status
134 self.to_stderr(stderr.decode('utf-8', 'replace'))
135 count += 1
136 if count <= fragment_retries:
137 self.to_screen(
138 '[%s] Got error. Retrying fragments (attempt %d of %s)...'
139 % (self.get_basename(), count, self.format_retries(fragment_retries)))
140 if count > fragment_retries:
141 if not skip_unavailable_fragments:
142 self.report_error('Giving up after %s fragment retries' % fragment_retries)
143 return -1
144
145 decrypt_fragment = self.decrypter(info_dict)
146 dest, _ = self.sanitize_open(tmpfilename, 'wb')
147 for frag_index, fragment in enumerate(info_dict['fragments']):
148 fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
149 try:
150 src, _ = self.sanitize_open(fragment_filename, 'rb')
151 except IOError as err:
152 if skip_unavailable_fragments and frag_index > 1:
153 self.report_skip_fragment(frag_index, err)
154 continue
155 self.report_error(f'Unable to open fragment {frag_index}; {err}')
156 return -1
157 dest.write(decrypt_fragment(fragment, src.read()))
158 src.close()
159 if not self.params.get('keep_fragments', False):
160 os.remove(encodeFilename(fragment_filename))
161 dest.close()
162 os.remove(encodeFilename('%s.frag.urls' % tmpfilename))
163 return 0
164
165
166 class CurlFD(ExternalFD):
167 AVAILABLE_OPT = '-V'
168
169 def _make_cmd(self, tmpfilename, info_dict):
170 cmd = [self.exe, '--location', '-o', tmpfilename]
171 if info_dict.get('http_headers') is not None:
172 for key, val in info_dict['http_headers'].items():
173 cmd += ['--header', '%s: %s' % (key, val)]
174
175 cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
176 cmd += self._valueless_option('--silent', 'noprogress')
177 cmd += self._valueless_option('--verbose', 'verbose')
178 cmd += self._option('--limit-rate', 'ratelimit')
179 retry = self._option('--retry', 'retries')
180 if len(retry) == 2:
181 if retry[1] in ('inf', 'infinite'):
182 retry[1] = '2147483647'
183 cmd += retry
184 cmd += self._option('--max-filesize', 'max_filesize')
185 cmd += self._option('--interface', 'source_address')
186 cmd += self._option('--proxy', 'proxy')
187 cmd += self._valueless_option('--insecure', 'nocheckcertificate')
188 cmd += self._configuration_args()
189 cmd += ['--', info_dict['url']]
190 return cmd
191
192 def _call_downloader(self, tmpfilename, info_dict):
193 cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
194
195 self._debug_cmd(cmd)
196
197 # curl writes the progress to stderr so don't capture it.
198 p = Popen(cmd)
199 p.communicate_or_kill()
200 return p.returncode
201
202
203 class AxelFD(ExternalFD):
204 AVAILABLE_OPT = '-V'
205
206 def _make_cmd(self, tmpfilename, info_dict):
207 cmd = [self.exe, '-o', tmpfilename]
208 if info_dict.get('http_headers') is not None:
209 for key, val in info_dict['http_headers'].items():
210 cmd += ['-H', '%s: %s' % (key, val)]
211 cmd += self._configuration_args()
212 cmd += ['--', info_dict['url']]
213 return cmd
214
215
216 class WgetFD(ExternalFD):
217 AVAILABLE_OPT = '--version'
218
219 def _make_cmd(self, tmpfilename, info_dict):
220 cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
221 if info_dict.get('http_headers') is not None:
222 for key, val in info_dict['http_headers'].items():
223 cmd += ['--header', '%s: %s' % (key, val)]
224 cmd += self._option('--limit-rate', 'ratelimit')
225 retry = self._option('--tries', 'retries')
226 if len(retry) == 2:
227 if retry[1] in ('inf', 'infinite'):
228 retry[1] = '0'
229 cmd += retry
230 cmd += self._option('--bind-address', 'source_address')
231 cmd += self._option('--proxy', 'proxy')
232 cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
233 cmd += self._configuration_args()
234 cmd += ['--', info_dict['url']]
235 return cmd
236
237
238 class Aria2cFD(ExternalFD):
239 AVAILABLE_OPT = '-v'
240 SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
241
242 @staticmethod
243 def supports_manifest(manifest):
244 UNSUPPORTED_FEATURES = [
245 r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
246 # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
247 ]
248 check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
249 return all(check_results)
250
251 def _make_cmd(self, tmpfilename, info_dict):
252 cmd = [self.exe, '-c',
253 '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
254 '--file-allocation=none', '-x16', '-j16', '-s16']
255 if 'fragments' in info_dict:
256 cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
257 else:
258 cmd += ['--min-split-size', '1M']
259
260 if info_dict.get('http_headers') is not None:
261 for key, val in info_dict['http_headers'].items():
262 cmd += ['--header', '%s: %s' % (key, val)]
263 cmd += self._option('--max-overall-download-limit', 'ratelimit')
264 cmd += self._option('--interface', 'source_address')
265 cmd += self._option('--all-proxy', 'proxy')
266 cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
267 cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
268 cmd += self._configuration_args()
269
270 # aria2c strips out spaces from the beginning/end of filenames and paths.
271 # We work around this issue by adding a "./" to the beginning of the
272 # filename and relative path, and adding a "/" at the end of the path.
273 # See: https://github.com/yt-dlp/yt-dlp/issues/276
274 # https://github.com/ytdl-org/youtube-dl/issues/20312
275 # https://github.com/aria2/aria2/issues/1373
276 dn = os.path.dirname(tmpfilename)
277 if dn:
278 if not os.path.isabs(dn):
279 dn = '.%s%s' % (os.path.sep, dn)
280 cmd += ['--dir', dn + os.path.sep]
281 if 'fragments' not in info_dict:
282 cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))]
283 cmd += ['--auto-file-renaming=false']
284
285 if 'fragments' in info_dict:
286 cmd += ['--file-allocation=none', '--uri-selector=inorder']
287 url_list_file = '%s.frag.urls' % tmpfilename
288 url_list = []
289 for frag_index, fragment in enumerate(info_dict['fragments']):
290 fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
291 url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
292 stream, _ = self.sanitize_open(url_list_file, 'wb')
293 stream.write('\n'.join(url_list).encode('utf-8'))
294 stream.close()
295 cmd += ['-i', url_list_file]
296 else:
297 cmd += ['--', info_dict['url']]
298 return cmd
299
300
301 class HttpieFD(ExternalFD):
302 AVAILABLE_OPT = '--version'
303
304 @classmethod
305 def available(cls, path=None):
306 return ExternalFD.available(cls, path or 'http')
307
308 def _make_cmd(self, tmpfilename, info_dict):
309 cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
310
311 if info_dict.get('http_headers') is not None:
312 for key, val in info_dict['http_headers'].items():
313 cmd += ['%s:%s' % (key, val)]
314 return cmd
315
316
317 class FFmpegFD(ExternalFD):
318 SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
319 can_download_to_stdout = True
320
321 @classmethod
322 def available(cls, path=None):
323 # TODO: Fix path for ffmpeg
324 # Fixme: This may be wrong when --ffmpeg-location is used
325 return FFmpegPostProcessor().available
326
327 @classmethod
328 def supports(cls, info_dict):
329 return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+'))
330
331 def on_process_started(self, proc, stdin):
332 """ Override this in subclasses """
333 pass
334
335 @classmethod
336 def can_merge_formats(cls, info_dict, params):
337 return (
338 info_dict.get('requested_formats')
339 and info_dict.get('protocol')
340 and not params.get('allow_unplayable_formats')
341 and 'no-direct-merge' not in params.get('compat_opts', [])
342 and cls.can_download(info_dict))
343
344 def _call_downloader(self, tmpfilename, info_dict):
345 urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']]
346 ffpp = FFmpegPostProcessor(downloader=self)
347 if not ffpp.available:
348 self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
349 return False
350 ffpp.check_version()
351
352 args = [ffpp.executable, '-y']
353
354 for log_level in ('quiet', 'verbose'):
355 if self.params.get(log_level, False):
356 args += ['-loglevel', log_level]
357 break
358 if not self.params.get('verbose'):
359 args += ['-hide_banner']
360
361 args += info_dict.get('_ffmpeg_args', [])
362
363 # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead
364 seekable = info_dict.get('_seekable')
365 if seekable is not None:
366 # setting -seekable prevents ffmpeg from guessing if the server
367 # supports seeking(by adding the header `Range: bytes=0-`), which
368 # can cause problems in some cases
369 # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
370 # http://trac.ffmpeg.org/ticket/6125#comment:10
371 args += ['-seekable', '1' if seekable else '0']
372
373 # start_time = info_dict.get('start_time') or 0
374 # if start_time:
375 # args += ['-ss', compat_str(start_time)]
376 # end_time = info_dict.get('end_time')
377 # if end_time:
378 # args += ['-t', compat_str(end_time - start_time)]
379
380 if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]):
381 # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
382 # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
383 headers = handle_youtubedl_headers(info_dict['http_headers'])
384 args += [
385 '-headers',
386 ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
387
388 env = None
389 proxy = self.params.get('proxy')
390 if proxy:
391 if not re.match(r'^[\da-zA-Z]+://', proxy):
392 proxy = 'http://%s' % proxy
393
394 if proxy.startswith('socks'):
395 self.report_warning(
396 '%s does not support SOCKS proxies. Downloading is likely to fail. '
397 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
398
399 # Since December 2015 ffmpeg supports -http_proxy option (see
400 # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
401 # We could switch to the following code if we are able to detect version properly
402 # args += ['-http_proxy', proxy]
403 env = os.environ.copy()
404 compat_setenv('HTTP_PROXY', proxy, env=env)
405 compat_setenv('http_proxy', proxy, env=env)
406
407 protocol = info_dict.get('protocol')
408
409 if protocol == 'rtmp':
410 player_url = info_dict.get('player_url')
411 page_url = info_dict.get('page_url')
412 app = info_dict.get('app')
413 play_path = info_dict.get('play_path')
414 tc_url = info_dict.get('tc_url')
415 flash_version = info_dict.get('flash_version')
416 live = info_dict.get('rtmp_live', False)
417 conn = info_dict.get('rtmp_conn')
418 if player_url is not None:
419 args += ['-rtmp_swfverify', player_url]
420 if page_url is not None:
421 args += ['-rtmp_pageurl', page_url]
422 if app is not None:
423 args += ['-rtmp_app', app]
424 if play_path is not None:
425 args += ['-rtmp_playpath', play_path]
426 if tc_url is not None:
427 args += ['-rtmp_tcurl', tc_url]
428 if flash_version is not None:
429 args += ['-rtmp_flashver', flash_version]
430 if live:
431 args += ['-rtmp_live', 'live']
432 if isinstance(conn, list):
433 for entry in conn:
434 args += ['-rtmp_conn', entry]
435 elif isinstance(conn, compat_str):
436 args += ['-rtmp_conn', conn]
437
438 for i, url in enumerate(urls):
439 args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
440
441 args += ['-c', 'copy']
442 if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
443 for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
444 stream_number = fmt.get('manifest_stream_number', 0)
445 args.extend(['-map', f'{i}:{stream_number}'])
446
447 if self.params.get('test', False):
448 args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
449
450 ext = info_dict['ext']
451 if protocol in ('m3u8', 'm3u8_native'):
452 use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
453 if use_mpegts is None:
454 use_mpegts = info_dict.get('is_live')
455 if use_mpegts:
456 args += ['-f', 'mpegts']
457 else:
458 args += ['-f', 'mp4']
459 if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
460 args += ['-bsf:a', 'aac_adtstoasc']
461 elif protocol == 'rtmp':
462 args += ['-f', 'flv']
463 elif ext == 'mp4' and tmpfilename == '-':
464 args += ['-f', 'mpegts']
465 else:
466 args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
467
468 args += self._configuration_args(('_o1', '_o', ''))
469
470 args = [encodeArgument(opt) for opt in args]
471 args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
472 self._debug_cmd(args)
473
474 proc = Popen(args, stdin=subprocess.PIPE, env=env)
475 if url in ('-', 'pipe:'):
476 self.on_process_started(proc, proc.stdin)
477 try:
478 retval = proc.wait()
479 except BaseException as e:
480 # subprocces.run would send the SIGKILL signal to ffmpeg and the
481 # mp4 file couldn't be played, but if we ask ffmpeg to quit it
482 # produces a file that is playable (this is mostly useful for live
483 # streams). Note that Windows is not affected and produces playable
484 # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
485 if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
486 proc.communicate_or_kill(b'q')
487 else:
488 proc.kill()
489 proc.wait()
490 raise
491 return retval
492
493
494 class AVconvFD(FFmpegFD):
495 pass
496
497
498 _BY_NAME = dict(
499 (klass.get_basename(), klass)
500 for name, klass in globals().items()
501 if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
502 )
503
504
505 def list_external_downloaders():
506 return sorted(_BY_NAME.keys())
507
508
509 def get_external_downloader(external_downloader):
510 """ Given the name of the executable, see whether we support the given
511 downloader . """
512 # Drop .exe extension on Windows
513 bn = os.path.splitext(os.path.basename(external_downloader))[0]
514 return _BY_NAME.get(bn)