]> jfr.im git - yt-dlp.git/blame - youtube_dl/downloader/external.py
[yahoo] Extract all <iframe>s
[yt-dlp.git] / youtube_dl / downloader / external.py
CommitLineData
222516d9
PH
1from __future__ import unicode_literals
2
3import os.path
4import subprocess
12b84ac8 5import sys
6import re
222516d9
PH
7
8from .common import FileDownloader
a755f825 9from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
222516d9 10from ..utils import (
1195a38f
S
11 cli_option,
12 cli_valueless_option,
13 cli_bool_option,
14 cli_configuration_args,
222516d9 15 encodeFilename,
74f8654a 16 encodeArgument,
12b84ac8 17 handle_youtubedl_headers,
99cbe98c 18 check_executable,
222516d9
PH
19)
20
21
22class ExternalFD(FileDownloader):
23 def real_download(self, filename, info_dict):
24 self.report_destination(filename)
25 tmpfilename = self.temp_name(filename)
26
27 retval = self._call_downloader(tmpfilename, info_dict)
28 if retval == 0:
29 fsize = os.path.getsize(encodeFilename(tmpfilename))
30 self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
31 self.try_rename(tmpfilename, filename)
32 self._hook_progress({
33 'downloaded_bytes': fsize,
34 'total_bytes': fsize,
35 'filename': filename,
36 'status': 'finished',
37 })
38 return True
39 else:
40 self.to_stderr('\n')
41 self.report_error('%s exited with code %d' % (
42 self.get_basename(), retval))
43 return False
44
45 @classmethod
46 def get_basename(cls):
47 return cls.__name__[:-2].lower()
48
49 @property
50 def exe(self):
51 return self.params.get('external_downloader')
52
99cbe98c 53 @classmethod
54 def available(cls):
91ee320b 55 return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
99cbe98c 56
222516d9
PH
57 @classmethod
58 def supports(cls, info_dict):
59 return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
60
2cb99ebb 61 @classmethod
62 def can_download(cls, info_dict):
63 return cls.available() and cls.supports(info_dict)
64
bf812ef7 65 def _option(self, command_option, param):
1195a38f 66 return cli_option(self.params, command_option, param)
bf812ef7 67
266b0ad6 68 def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
1195a38f 69 return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
266b0ad6 70
dc534b67 71 def _valueless_option(self, command_option, param, expected_value=True):
1195a38f 72 return cli_valueless_option(self.params, command_option, param, expected_value)
f30c2e8e 73
c75f0b36 74 def _configuration_args(self, default=[]):
1195a38f 75 return cli_configuration_args(self.params, 'external_downloader_args', default)
c75f0b36 76
222516d9
PH
77 def _call_downloader(self, tmpfilename, info_dict):
78 """ Either overwrite this or implement _make_cmd """
74f8654a 79 cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
222516d9 80
74f8654a 81 self._debug_cmd(cmd)
222516d9
PH
82
83 p = subprocess.Popen(
384b6202
PH
84 cmd, stderr=subprocess.PIPE)
85 _, stderr = p.communicate()
222516d9
PH
86 if p.returncode != 0:
87 self.to_stderr(stderr)
88 return p.returncode
89
90
384b6202 91class CurlFD(ExternalFD):
91ee320b 92 AVAILABLE_OPT = '-V'
99cbe98c 93
384b6202 94 def _make_cmd(self, tmpfilename, info_dict):
163d9667 95 cmd = [self.exe, '--location', '-o', tmpfilename]
e5660ee6 96 for key, val in info_dict['http_headers'].items():
384b6202 97 cmd += ['--header', '%s: %s' % (key, val)]
9f3da138 98 cmd += self._option('--interface', 'source_address')
e7a8c303 99 cmd += self._option('--proxy', 'proxy')
dc534b67 100 cmd += self._valueless_option('--insecure', 'nocheckcertificate')
c75f0b36 101 cmd += self._configuration_args()
384b6202
PH
102 cmd += ['--', info_dict['url']]
103 return cmd
104
105
e0ac5214 106class AxelFD(ExternalFD):
91ee320b 107 AVAILABLE_OPT = '-V'
99cbe98c 108
e0ac5214 109 def _make_cmd(self, tmpfilename, info_dict):
110 cmd = [self.exe, '-o', tmpfilename]
111 for key, val in info_dict['http_headers'].items():
112 cmd += ['-H', '%s: %s' % (key, val)]
113 cmd += self._configuration_args()
114 cmd += ['--', info_dict['url']]
115 return cmd
116
117
222516d9 118class WgetFD(ExternalFD):
91ee320b 119 AVAILABLE_OPT = '--version'
99cbe98c 120
222516d9
PH
121 def _make_cmd(self, tmpfilename, info_dict):
122 cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
e5660ee6 123 for key, val in info_dict['http_headers'].items():
222516d9 124 cmd += ['--header', '%s: %s' % (key, val)]
9f3da138 125 cmd += self._option('--bind-address', 'source_address')
bf812ef7 126 cmd += self._option('--proxy', 'proxy')
dc534b67 127 cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
c75f0b36 128 cmd += self._configuration_args()
222516d9
PH
129 cmd += ['--', info_dict['url']]
130 return cmd
131
132
384b6202 133class Aria2cFD(ExternalFD):
91ee320b 134 AVAILABLE_OPT = '-v'
99cbe98c 135
384b6202 136 def _make_cmd(self, tmpfilename, info_dict):
c75f0b36
PH
137 cmd = [self.exe, '-c']
138 cmd += self._configuration_args([
139 '--min-split-size', '1M', '--max-connection-per-server', '4'])
384b6202
PH
140 dn = os.path.dirname(tmpfilename)
141 if dn:
142 cmd += ['--dir', dn]
143 cmd += ['--out', os.path.basename(tmpfilename)]
e5660ee6 144 for key, val in info_dict['http_headers'].items():
384b6202 145 cmd += ['--header', '%s: %s' % (key, val)]
9f3da138 146 cmd += self._option('--interface', 'source_address')
bf812ef7 147 cmd += self._option('--all-proxy', 'proxy')
266b0ad6 148 cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
384b6202
PH
149 cmd += ['--', info_dict['url']]
150 return cmd
151
906e2f0e
JMF
152
153class HttpieFD(ExternalFD):
99cbe98c 154 @classmethod
155 def available(cls):
156 return check_executable('http', ['--version'])
157
906e2f0e
JMF
158 def _make_cmd(self, tmpfilename, info_dict):
159 cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
160 for key, val in info_dict['http_headers'].items():
161 cmd += ['%s:%s' % (key, val)]
162 return cmd
163
12b84ac8 164
165class FFmpegFD(ExternalFD):
166 @classmethod
167 def supports(cls, info_dict):
6ae27bed 168 return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
12b84ac8 169
99cbe98c 170 @classmethod
171 def available(cls):
172 return FFmpegPostProcessor().available
173
12b84ac8 174 def _call_downloader(self, tmpfilename, info_dict):
175 url = info_dict['url']
176 ffpp = FFmpegPostProcessor(downloader=self)
77dea16a 177 if not ffpp.available:
178 self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
179 return False
12b84ac8 180 ffpp.check_version()
181
182 args = [ffpp.executable, '-y']
183
d8515fd4 184 args += self._configuration_args()
185
694c47b2 186 # start_time = info_dict.get('start_time') or 0
187 # if start_time:
188 # args += ['-ss', compat_str(start_time)]
189 # end_time = info_dict.get('end_time')
190 # if end_time:
191 # args += ['-t', compat_str(end_time - start_time)]
12b84ac8 192
193 if info_dict['http_headers'] and re.match(r'^https?://', url):
194 # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
195 # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
196 headers = handle_youtubedl_headers(info_dict['http_headers'])
197 args += [
198 '-headers',
199 ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
200
4230c489 201 protocol = info_dict.get('protocol')
202
203 if protocol == 'rtmp':
204 player_url = info_dict.get('player_url')
205 page_url = info_dict.get('page_url')
206 app = info_dict.get('app')
207 play_path = info_dict.get('play_path')
208 tc_url = info_dict.get('tc_url')
209 flash_version = info_dict.get('flash_version')
210 live = info_dict.get('rtmp_live', False)
211 if player_url is not None:
212 args += ['-rtmp_swfverify', player_url]
213 if page_url is not None:
214 args += ['-rtmp_pageurl', page_url]
215 if app is not None:
216 args += ['-rtmp_app', app]
217 if play_path is not None:
218 args += ['-rtmp_playpath', play_path]
219 if tc_url is not None:
220 args += ['-rtmp_tcurl', tc_url]
221 if flash_version is not None:
222 args += ['-rtmp_flashver', flash_version]
223 if live:
224 args += ['-rtmp_live', 'live']
225
12b84ac8 226 args += ['-i', url, '-c', 'copy']
4230c489 227 if protocol == 'm3u8':
ce599d5a 228 if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
12b84ac8 229 args += ['-f', 'mpegts']
230 else:
231 args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
4230c489 232 elif protocol == 'rtmp':
233 args += ['-f', 'flv']
12b84ac8 234 else:
a755f825 235 args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
12b84ac8 236
237 args = [encodeArgument(opt) for opt in args]
d868f43c 238 args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
12b84ac8 239
240 self._debug_cmd(args)
241
242 proc = subprocess.Popen(args, stdin=subprocess.PIPE)
243 try:
244 retval = proc.wait()
245 except KeyboardInterrupt:
246 # subprocces.run would send the SIGKILL signal to ffmpeg and the
247 # mp4 file couldn't be played, but if we ask ffmpeg to quit it
248 # produces a file that is playable (this is mostly useful for live
249 # streams). Note that Windows is not affected and produces playable
250 # files (see https://github.com/rg3/youtube-dl/issues/8300).
251 if sys.platform != 'win32':
252 proc.communicate(b'q')
253 raise
254 return retval
255
256
257class AVconvFD(FFmpegFD):
258 pass
259
222516d9
PH
260_BY_NAME = dict(
261 (klass.get_basename(), klass)
262 for name, klass in globals().items()
263 if name.endswith('FD') and name != 'ExternalFD'
264)
265
266
267def list_external_downloaders():
268 return sorted(_BY_NAME.keys())
269
270
271def get_external_downloader(external_downloader):
272 """ Given the name of the executable, see whether we support the given
273 downloader . """
6c4d20cd
S
274 # Drop .exe extension on Windows
275 bn = os.path.splitext(os.path.basename(external_downloader))[0]
222516d9 276 return _BY_NAME[bn]