]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | import os.path | |
4 | import re | |
5 | import subprocess | |
6 | import sys | |
7 | import time | |
8 | ||
9 | try: | |
10 | from Crypto.Cipher import AES | |
11 | can_decrypt_frag = True | |
12 | except ImportError: | |
13 | can_decrypt_frag = False | |
14 | ||
15 | from .common import FileDownloader | |
16 | from ..compat import ( | |
17 | compat_setenv, | |
18 | compat_str, | |
19 | ) | |
20 | from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS | |
21 | from ..utils import ( | |
22 | cli_option, | |
23 | cli_valueless_option, | |
24 | cli_bool_option, | |
25 | cli_configuration_args, | |
26 | encodeFilename, | |
27 | encodeArgument, | |
28 | handle_youtubedl_headers, | |
29 | check_executable, | |
30 | is_outdated_version, | |
31 | process_communicate_or_kill, | |
32 | sanitized_Request, | |
33 | sanitize_open, | |
34 | ) | |
35 | ||
36 | ||
37 | class ExternalFD(FileDownloader): | |
38 | SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') | |
39 | ||
40 | def real_download(self, filename, info_dict): | |
41 | self.report_destination(filename) | |
42 | tmpfilename = self.temp_name(filename) | |
43 | ||
44 | try: | |
45 | started = time.time() | |
46 | retval = self._call_downloader(tmpfilename, info_dict) | |
47 | except KeyboardInterrupt: | |
48 | if not info_dict.get('is_live'): | |
49 | raise | |
50 | # Live stream downloading cancellation should be considered as | |
51 | # correct and expected termination thus all postprocessing | |
52 | # should take place | |
53 | retval = 0 | |
54 | self.to_screen('[%s] Interrupted by user' % self.get_basename()) | |
55 | ||
56 | if retval == 0: | |
57 | status = { | |
58 | 'filename': filename, | |
59 | 'status': 'finished', | |
60 | 'elapsed': time.time() - started, | |
61 | } | |
62 | if filename != '-': | |
63 | fsize = os.path.getsize(encodeFilename(tmpfilename)) | |
64 | self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) | |
65 | self.try_rename(tmpfilename, filename) | |
66 | status.update({ | |
67 | 'downloaded_bytes': fsize, | |
68 | 'total_bytes': fsize, | |
69 | }) | |
70 | self._hook_progress(status) | |
71 | return True | |
72 | else: | |
73 | self.to_stderr('\n') | |
74 | self.report_error('%s exited with code %d' % ( | |
75 | self.get_basename(), retval)) | |
76 | return False | |
77 | ||
78 | @classmethod | |
79 | def get_basename(cls): | |
80 | return cls.__name__[:-2].lower() | |
81 | ||
82 | @property | |
83 | def exe(self): | |
84 | return self.params.get('external_downloader') | |
85 | ||
86 | @classmethod | |
87 | def available(cls, path=None): | |
88 | return check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT]) | |
89 | ||
90 | @classmethod | |
91 | def supports(cls, info_dict): | |
92 | return info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS | |
93 | ||
94 | @classmethod | |
95 | def can_download(cls, info_dict, path=None): | |
96 | return cls.available(path) and cls.supports(info_dict) | |
97 | ||
98 | def _option(self, command_option, param): | |
99 | return cli_option(self.params, command_option, param) | |
100 | ||
101 | def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None): | |
102 | return cli_bool_option(self.params, command_option, param, true_value, false_value, separator) | |
103 | ||
104 | def _valueless_option(self, command_option, param, expected_value=True): | |
105 | return cli_valueless_option(self.params, command_option, param, expected_value) | |
106 | ||
107 | def _configuration_args(self, *args, **kwargs): | |
108 | return cli_configuration_args( | |
109 | self.params.get('external_downloader_args'), | |
110 | [self.get_basename(), 'default'], | |
111 | *args, **kwargs) | |
112 | ||
113 | def _call_downloader(self, tmpfilename, info_dict): | |
114 | """ Either overwrite this or implement _make_cmd """ | |
115 | cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] | |
116 | ||
117 | self._debug_cmd(cmd) | |
118 | ||
119 | if 'fragments' in info_dict: | |
120 | fragment_retries = self.params.get('fragment_retries', 0) | |
121 | skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | |
122 | ||
123 | count = 0 | |
124 | while count <= fragment_retries: | |
125 | p = subprocess.Popen( | |
126 | cmd, stderr=subprocess.PIPE) | |
127 | _, stderr = process_communicate_or_kill(p) | |
128 | if p.returncode == 0: | |
129 | break | |
130 | # TODO: Decide whether to retry based on error code | |
131 | # https://aria2.github.io/manual/en/html/aria2c.html#exit-status | |
132 | self.to_stderr(stderr.decode('utf-8', 'replace')) | |
133 | count += 1 | |
134 | if count <= fragment_retries: | |
135 | self.to_screen( | |
136 | '[%s] Got error. Retrying fragments (attempt %d of %s)...' | |
137 | % (self.get_basename(), count, self.format_retries(fragment_retries))) | |
138 | if count > fragment_retries: | |
139 | if not skip_unavailable_fragments: | |
140 | self.report_error('Giving up after %s fragment retries' % fragment_retries) | |
141 | return -1 | |
142 | ||
143 | dest, _ = sanitize_open(tmpfilename, 'wb') | |
144 | for frag_index, fragment in enumerate(info_dict['fragments']): | |
145 | fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) | |
146 | try: | |
147 | src, _ = sanitize_open(fragment_filename, 'rb') | |
148 | except IOError: | |
149 | if skip_unavailable_fragments and frag_index > 1: | |
150 | self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) | |
151 | continue | |
152 | self.report_error('Unable to open fragment %d' % frag_index) | |
153 | return -1 | |
154 | decrypt_info = fragment.get('decrypt_info') | |
155 | if decrypt_info: | |
156 | if decrypt_info['METHOD'] == 'AES-128': | |
157 | iv = decrypt_info.get('IV') | |
158 | decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( | |
159 | self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() | |
160 | encrypted_data = src.read() | |
161 | decrypted_data = AES.new( | |
162 | decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(encrypted_data) | |
163 | dest.write(decrypted_data) | |
164 | else: | |
165 | fragment_data = src.read() | |
166 | dest.write(fragment_data) | |
167 | else: | |
168 | fragment_data = src.read() | |
169 | dest.write(fragment_data) | |
170 | src.close() | |
171 | if not self.params.get('keep_fragments', False): | |
172 | os.remove(encodeFilename(fragment_filename)) | |
173 | dest.close() | |
174 | os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) | |
175 | else: | |
176 | p = subprocess.Popen( | |
177 | cmd, stderr=subprocess.PIPE) | |
178 | _, stderr = process_communicate_or_kill(p) | |
179 | if p.returncode != 0: | |
180 | self.to_stderr(stderr.decode('utf-8', 'replace')) | |
181 | return p.returncode | |
182 | ||
183 | def _prepare_url(self, info_dict, url): | |
184 | headers = info_dict.get('http_headers') | |
185 | return sanitized_Request(url, None, headers) if headers else url | |
186 | ||
187 | ||
188 | class CurlFD(ExternalFD): | |
189 | AVAILABLE_OPT = '-V' | |
190 | ||
191 | def _make_cmd(self, tmpfilename, info_dict): | |
192 | cmd = [self.exe, '--location', '-o', tmpfilename] | |
193 | if info_dict.get('http_headers') is not None: | |
194 | for key, val in info_dict['http_headers'].items(): | |
195 | cmd += ['--header', '%s: %s' % (key, val)] | |
196 | ||
197 | cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') | |
198 | cmd += self._valueless_option('--silent', 'noprogress') | |
199 | cmd += self._valueless_option('--verbose', 'verbose') | |
200 | cmd += self._option('--limit-rate', 'ratelimit') | |
201 | retry = self._option('--retry', 'retries') | |
202 | if len(retry) == 2: | |
203 | if retry[1] in ('inf', 'infinite'): | |
204 | retry[1] = '2147483647' | |
205 | cmd += retry | |
206 | cmd += self._option('--max-filesize', 'max_filesize') | |
207 | cmd += self._option('--interface', 'source_address') | |
208 | cmd += self._option('--proxy', 'proxy') | |
209 | cmd += self._valueless_option('--insecure', 'nocheckcertificate') | |
210 | cmd += self._configuration_args() | |
211 | cmd += ['--', info_dict['url']] | |
212 | return cmd | |
213 | ||
214 | def _call_downloader(self, tmpfilename, info_dict): | |
215 | cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] | |
216 | ||
217 | self._debug_cmd(cmd) | |
218 | ||
219 | # curl writes the progress to stderr so don't capture it. | |
220 | p = subprocess.Popen(cmd) | |
221 | process_communicate_or_kill(p) | |
222 | return p.returncode | |
223 | ||
224 | ||
225 | class AxelFD(ExternalFD): | |
226 | AVAILABLE_OPT = '-V' | |
227 | ||
228 | def _make_cmd(self, tmpfilename, info_dict): | |
229 | cmd = [self.exe, '-o', tmpfilename] | |
230 | if info_dict.get('http_headers') is not None: | |
231 | for key, val in info_dict['http_headers'].items(): | |
232 | cmd += ['-H', '%s: %s' % (key, val)] | |
233 | cmd += self._configuration_args() | |
234 | cmd += ['--', info_dict['url']] | |
235 | return cmd | |
236 | ||
237 | ||
238 | class WgetFD(ExternalFD): | |
239 | AVAILABLE_OPT = '--version' | |
240 | ||
241 | def _make_cmd(self, tmpfilename, info_dict): | |
242 | cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] | |
243 | if info_dict.get('http_headers') is not None: | |
244 | for key, val in info_dict['http_headers'].items(): | |
245 | cmd += ['--header', '%s: %s' % (key, val)] | |
246 | cmd += self._option('--limit-rate', 'ratelimit') | |
247 | retry = self._option('--tries', 'retries') | |
248 | if len(retry) == 2: | |
249 | if retry[1] in ('inf', 'infinite'): | |
250 | retry[1] = '0' | |
251 | cmd += retry | |
252 | cmd += self._option('--bind-address', 'source_address') | |
253 | cmd += self._option('--proxy', 'proxy') | |
254 | cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') | |
255 | cmd += self._configuration_args() | |
256 | cmd += ['--', info_dict['url']] | |
257 | return cmd | |
258 | ||
259 | ||
260 | class Aria2cFD(ExternalFD): | |
261 | AVAILABLE_OPT = '-v' | |
262 | SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls') | |
263 | ||
264 | @staticmethod | |
265 | def supports_manifest(manifest): | |
266 | UNSUPPORTED_FEATURES = [ | |
267 | r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] | |
268 | # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 | |
269 | ] | |
270 | check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) | |
271 | return all(check_results) | |
272 | ||
273 | def _make_cmd(self, tmpfilename, info_dict): | |
274 | cmd = [self.exe, '-c', | |
275 | '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', | |
276 | '--file-allocation=none', '-x16', '-j16', '-s16'] | |
277 | if 'fragments' in info_dict: | |
278 | cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] | |
279 | ||
280 | if info_dict.get('http_headers') is not None: | |
281 | for key, val in info_dict['http_headers'].items(): | |
282 | cmd += ['--header', '%s: %s' % (key, val)] | |
283 | cmd += self._option('--interface', 'source_address') | |
284 | cmd += self._option('--all-proxy', 'proxy') | |
285 | cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') | |
286 | cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') | |
287 | cmd += self._configuration_args() | |
288 | ||
289 | dn = os.path.dirname(tmpfilename) | |
290 | if dn: | |
291 | cmd += ['--dir', dn] | |
292 | if 'fragments' not in info_dict: | |
293 | cmd += ['--out', os.path.basename(tmpfilename)] | |
294 | cmd += ['--auto-file-renaming=false'] | |
295 | ||
296 | if 'fragments' in info_dict: | |
297 | cmd += ['--file-allocation=none', '--uri-selector=inorder'] | |
298 | url_list_file = '%s.frag.urls' % tmpfilename | |
299 | url_list = [] | |
300 | for frag_index, fragment in enumerate(info_dict['fragments']): | |
301 | fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) | |
302 | url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) | |
303 | stream, _ = sanitize_open(url_list_file, 'wb') | |
304 | stream.write('\n'.join(url_list).encode('utf-8')) | |
305 | stream.close() | |
306 | cmd += ['-i', url_list_file] | |
307 | else: | |
308 | cmd += ['--', info_dict['url']] | |
309 | return cmd | |
310 | ||
311 | ||
312 | class HttpieFD(ExternalFD): | |
313 | @classmethod | |
314 | def available(cls, path=None): | |
315 | return check_executable(path or 'http', ['--version']) | |
316 | ||
317 | def _make_cmd(self, tmpfilename, info_dict): | |
318 | cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] | |
319 | ||
320 | if info_dict.get('http_headers') is not None: | |
321 | for key, val in info_dict['http_headers'].items(): | |
322 | cmd += ['%s:%s' % (key, val)] | |
323 | return cmd | |
324 | ||
325 | ||
326 | class FFmpegFD(ExternalFD): | |
327 | SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms') | |
328 | ||
329 | @classmethod | |
330 | def available(cls, path=None): # path is ignored for ffmpeg | |
331 | return FFmpegPostProcessor().available | |
332 | ||
333 | def _call_downloader(self, tmpfilename, info_dict): | |
334 | url = info_dict['url'] | |
335 | ffpp = FFmpegPostProcessor(downloader=self) | |
336 | if not ffpp.available: | |
337 | self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') | |
338 | return False | |
339 | ffpp.check_version() | |
340 | ||
341 | args = [ffpp.executable, '-y'] | |
342 | ||
343 | for log_level in ('quiet', 'verbose'): | |
344 | if self.params.get(log_level, False): | |
345 | args += ['-loglevel', log_level] | |
346 | break | |
347 | ||
348 | seekable = info_dict.get('_seekable') | |
349 | if seekable is not None: | |
350 | # setting -seekable prevents ffmpeg from guessing if the server | |
351 | # supports seeking(by adding the header `Range: bytes=0-`), which | |
352 | # can cause problems in some cases | |
353 | # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 | |
354 | # http://trac.ffmpeg.org/ticket/6125#comment:10 | |
355 | args += ['-seekable', '1' if seekable else '0'] | |
356 | ||
357 | args += self._configuration_args() | |
358 | ||
359 | # start_time = info_dict.get('start_time') or 0 | |
360 | # if start_time: | |
361 | # args += ['-ss', compat_str(start_time)] | |
362 | # end_time = info_dict.get('end_time') | |
363 | # if end_time: | |
364 | # args += ['-t', compat_str(end_time - start_time)] | |
365 | ||
366 | if info_dict.get('http_headers') is not None and re.match(r'^https?://', url): | |
367 | # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: | |
368 | # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. | |
369 | headers = handle_youtubedl_headers(info_dict['http_headers']) | |
370 | args += [ | |
371 | '-headers', | |
372 | ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] | |
373 | ||
374 | env = None | |
375 | proxy = self.params.get('proxy') | |
376 | if proxy: | |
377 | if not re.match(r'^[\da-zA-Z]+://', proxy): | |
378 | proxy = 'http://%s' % proxy | |
379 | ||
380 | if proxy.startswith('socks'): | |
381 | self.report_warning( | |
382 | '%s does not support SOCKS proxies. Downloading is likely to fail. ' | |
383 | 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) | |
384 | ||
385 | # Since December 2015 ffmpeg supports -http_proxy option (see | |
386 | # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) | |
387 | # We could switch to the following code if we are able to detect version properly | |
388 | # args += ['-http_proxy', proxy] | |
389 | env = os.environ.copy() | |
390 | compat_setenv('HTTP_PROXY', proxy, env=env) | |
391 | compat_setenv('http_proxy', proxy, env=env) | |
392 | ||
393 | protocol = info_dict.get('protocol') | |
394 | ||
395 | if protocol == 'rtmp': | |
396 | player_url = info_dict.get('player_url') | |
397 | page_url = info_dict.get('page_url') | |
398 | app = info_dict.get('app') | |
399 | play_path = info_dict.get('play_path') | |
400 | tc_url = info_dict.get('tc_url') | |
401 | flash_version = info_dict.get('flash_version') | |
402 | live = info_dict.get('rtmp_live', False) | |
403 | conn = info_dict.get('rtmp_conn') | |
404 | if player_url is not None: | |
405 | args += ['-rtmp_swfverify', player_url] | |
406 | if page_url is not None: | |
407 | args += ['-rtmp_pageurl', page_url] | |
408 | if app is not None: | |
409 | args += ['-rtmp_app', app] | |
410 | if play_path is not None: | |
411 | args += ['-rtmp_playpath', play_path] | |
412 | if tc_url is not None: | |
413 | args += ['-rtmp_tcurl', tc_url] | |
414 | if flash_version is not None: | |
415 | args += ['-rtmp_flashver', flash_version] | |
416 | if live: | |
417 | args += ['-rtmp_live', 'live'] | |
418 | if isinstance(conn, list): | |
419 | for entry in conn: | |
420 | args += ['-rtmp_conn', entry] | |
421 | elif isinstance(conn, compat_str): | |
422 | args += ['-rtmp_conn', conn] | |
423 | ||
424 | args += ['-i', url, '-c', 'copy'] | |
425 | ||
426 | if self.params.get('test', False): | |
427 | args += ['-fs', compat_str(self._TEST_FILE_SIZE)] | |
428 | ||
429 | if protocol in ('m3u8', 'm3u8_native'): | |
430 | use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts') | |
431 | if use_mpegts is None: | |
432 | use_mpegts = info_dict.get('is_live') | |
433 | if use_mpegts: | |
434 | args += ['-f', 'mpegts'] | |
435 | else: | |
436 | args += ['-f', 'mp4'] | |
437 | if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): | |
438 | args += ['-bsf:a', 'aac_adtstoasc'] | |
439 | elif protocol == 'rtmp': | |
440 | args += ['-f', 'flv'] | |
441 | else: | |
442 | args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])] | |
443 | ||
444 | args = [encodeArgument(opt) for opt in args] | |
445 | args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) | |
446 | ||
447 | self._debug_cmd(args) | |
448 | ||
449 | proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) | |
450 | try: | |
451 | retval = proc.wait() | |
452 | except BaseException as e: | |
453 | # subprocces.run would send the SIGKILL signal to ffmpeg and the | |
454 | # mp4 file couldn't be played, but if we ask ffmpeg to quit it | |
455 | # produces a file that is playable (this is mostly useful for live | |
456 | # streams). Note that Windows is not affected and produces playable | |
457 | # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). | |
458 | if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32': | |
459 | process_communicate_or_kill(proc, b'q') | |
460 | else: | |
461 | proc.kill() | |
462 | proc.wait() | |
463 | raise | |
464 | return retval | |
465 | ||
466 | ||
467 | class AVconvFD(FFmpegFD): | |
468 | pass | |
469 | ||
470 | ||
471 | _BY_NAME = dict( | |
472 | (klass.get_basename(), klass) | |
473 | for name, klass in globals().items() | |
474 | if name.endswith('FD') and name != 'ExternalFD' | |
475 | ) | |
476 | ||
477 | ||
478 | def list_external_downloaders(): | |
479 | return sorted(_BY_NAME.keys()) | |
480 | ||
481 | ||
482 | def get_external_downloader(external_downloader): | |
483 | """ Given the name of the executable, see whether we support the given | |
484 | downloader . """ | |
485 | # Drop .exe extension on Windows | |
486 | bn = os.path.splitext(os.path.basename(external_downloader))[0] | |
487 | return _BY_NAME[bn] |