]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
[ffmpeg] Make available a property
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import platform
16 import re
17 import shutil
18 import subprocess
19 import socket
20 import sys
21 import time
22 import traceback
23
24 if os.name == 'nt':
25 import ctypes
26
27 from .compat import (
28 compat_basestring,
29 compat_cookiejar,
30 compat_expanduser,
31 compat_http_client,
32 compat_kwargs,
33 compat_str,
34 compat_urllib_error,
35 compat_urllib_request,
36 )
37 from .utils import (
38 escape_url,
39 ContentTooShortError,
40 date_from_str,
41 DateRange,
42 DEFAULT_OUTTMPL,
43 determine_ext,
44 DownloadError,
45 encodeFilename,
46 ExtractorError,
47 format_bytes,
48 formatSeconds,
49 get_term_width,
50 locked_file,
51 make_HTTPS_handler,
52 MaxDownloadsReached,
53 PagedList,
54 parse_filesize,
55 PostProcessingError,
56 platform_name,
57 preferredencoding,
58 render_table,
59 SameFileError,
60 sanitize_filename,
61 std_headers,
62 subtitles_filename,
63 takewhile_inclusive,
64 UnavailableVideoError,
65 url_basename,
66 version_tuple,
67 write_json_file,
68 write_string,
69 YoutubeDLHandler,
70 prepend_extension,
71 args_to_str,
72 age_restricted,
73 )
74 from .cache import Cache
75 from .extractor import get_info_extractor, gen_extractors
76 from .downloader import get_suitable_downloader
77 from .downloader.rtmp import rtmpdump_version
78 from .postprocessor import (
79 FFmpegFixupM4aPP,
80 FFmpegFixupStretchedPP,
81 FFmpegMergerPP,
82 FFmpegPostProcessor,
83 get_postprocessor,
84 )
85 from .version import __version__
86
87
88 class YoutubeDL(object):
89 """YoutubeDL class.
90
91 YoutubeDL objects are the ones responsible of downloading the
92 actual video file and writing it to disk if the user has requested
93 it, among some other tasks. In most cases there should be one per
94 program. As, given a video URL, the downloader doesn't know how to
95 extract all the needed information, task that InfoExtractors do, it
96 has to pass the URL to one of them.
97
98 For this, YoutubeDL objects have a method that allows
99 InfoExtractors to be registered in a given order. When it is passed
100 a URL, the YoutubeDL object handles it to the first InfoExtractor it
101 finds that reports being able to handle it. The InfoExtractor extracts
102 all the information about the video or videos the URL refers to, and
103 YoutubeDL process the extracted information, possibly using a File
104 Downloader to download the video.
105
106 YoutubeDL objects accept a lot of parameters. In order not to saturate
107 the object constructor with arguments, it receives a dictionary of
108 options instead. These options are available through the params
109 attribute for the InfoExtractors to use. The YoutubeDL also
110 registers itself as the downloader in charge for the InfoExtractors
111 that are added to it, so this is a "mutual registration".
112
113 Available options:
114
115 username: Username for authentication purposes.
116 password: Password for authentication purposes.
117 videopassword: Password for acces a video.
118 usenetrc: Use netrc for authentication instead.
119 verbose: Print additional info to stdout.
120 quiet: Do not print messages to stdout.
121 no_warnings: Do not print out anything for warnings.
122 forceurl: Force printing final URL.
123 forcetitle: Force printing title.
124 forceid: Force printing ID.
125 forcethumbnail: Force printing thumbnail URL.
126 forcedescription: Force printing description.
127 forcefilename: Force printing final filename.
128 forceduration: Force printing duration.
129 forcejson: Force printing info_dict as JSON.
130 dump_single_json: Force printing the info_dict of the whole playlist
131 (or video) as a single JSON line.
132 simulate: Do not download the video files.
133 format: Video format code. See options.py for more information.
134 format_limit: Highest quality format to try.
135 outtmpl: Template for output names.
136 restrictfilenames: Do not allow "&" and spaces in file names
137 ignoreerrors: Do not stop on download errors.
138 nooverwrites: Prevent overwriting files.
139 playliststart: Playlist item to start at.
140 playlistend: Playlist item to end at.
141 playlist_items: Specific indices of playlist to download.
142 playlistreverse: Download playlist items in reverse order.
143 matchtitle: Download only matching titles.
144 rejecttitle: Reject downloads for matching titles.
145 logger: Log messages to a logging.Logger instance.
146 logtostderr: Log messages to stderr instead of stdout.
147 writedescription: Write the video description to a .description file
148 writeinfojson: Write the video description to a .info.json file
149 writeannotations: Write the video annotations to a .annotations.xml file
150 writethumbnail: Write the thumbnail image to a file
151 write_all_thumbnails: Write all thumbnail formats to files
152 writesubtitles: Write the video subtitles to a file
153 writeautomaticsub: Write the automatic subtitles to a file
154 allsubtitles: Downloads all the subtitles of the video
155 (requires writesubtitles or writeautomaticsub)
156 listsubtitles: Lists all available subtitles for the video
157 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
158 subtitleslangs: List of languages of the subtitles to download
159 keepvideo: Keep the video file after post-processing
160 daterange: A DateRange object, download only if the upload_date is in the range.
161 skip_download: Skip the actual download of the video file
162 cachedir: Location of the cache files in the filesystem.
163 False to disable filesystem cache.
164 noplaylist: Download single video instead of a playlist if in doubt.
165 age_limit: An integer representing the user's age in years.
166 Unsuitable videos for the given age are skipped.
167 min_views: An integer representing the minimum view count the video
168 must have in order to not be skipped.
169 Videos without view count information are always
170 downloaded. None for no limit.
171 max_views: An integer representing the maximum view count.
172 Videos that are more popular than that are not
173 downloaded.
174 Videos without view count information are always
175 downloaded. None for no limit.
176 download_archive: File name of a file where all downloads are recorded.
177 Videos already present in the file are not downloaded
178 again.
179 cookiefile: File name where cookies should be read from and dumped to.
180 nocheckcertificate:Do not verify SSL certificates
181 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
182 At the moment, this is only supported by YouTube.
183 proxy: URL of the proxy server to use
184 socket_timeout: Time to wait for unresponsive hosts, in seconds
185 bidi_workaround: Work around buggy terminals without bidirectional text
186 support, using fridibi
187 debug_printtraffic:Print out sent and received HTTP traffic
188 include_ads: Download ads as well
189 default_search: Prepend this string if an input url is not valid.
190 'auto' for elaborate guessing
191 encoding: Use this encoding instead of the system-specified.
192 extract_flat: Do not resolve URLs, return the immediate result.
193 Pass in 'in_playlist' to only show this behavior for
194 playlist items.
195 postprocessors: A list of dictionaries, each with an entry
196 * key: The name of the postprocessor. See
197 youtube_dl/postprocessor/__init__.py for a list.
198 as well as any further keyword arguments for the
199 postprocessor.
200 progress_hooks: A list of functions that get called on download
201 progress, with a dictionary with the entries
202 * status: One of "downloading" and "finished".
203 Check this first and ignore unknown values.
204
205 If status is one of "downloading" or "finished", the
206 following properties may also be present:
207 * filename: The final filename (always present)
208 * downloaded_bytes: Bytes on disk
209 * total_bytes: Size of the whole file, None if unknown
210 * tmpfilename: The filename we're currently writing to
211 * eta: The estimated time in seconds, None if unknown
212 * speed: The download speed in bytes/second, None if
213 unknown
214
215 Progress hooks are guaranteed to be called at least once
216 (with status "finished") if the download is successful.
217 merge_output_format: Extension to use when merging formats.
218 fixup: Automatically correct known faults of the file.
219 One of:
220 - "never": do nothing
221 - "warn": only emit a warning
222 - "detect_or_warn": check whether we can do anything
223 about it, warn otherwise (default)
224 source_address: (Experimental) Client-side IP address to bind to.
225 call_home: Boolean, true iff we are allowed to contact the
226 youtube-dl servers for debugging.
227 sleep_interval: Number of seconds to sleep before each download.
228 listformats: Print an overview of available video formats and exit.
229 list_thumbnails: Print a table of all thumbnails and exit.
230 match_filter: A function that gets called with the info_dict of
231 every video.
232 If it returns a message, the video is ignored.
233 If it returns None, the video is downloaded.
234 match_filter_func in utils.py is one example for this.
235 no_color: Do not emit color codes in output.
236
237 The following options determine which downloader is picked:
238 external_downloader: Executable of the external downloader to call.
239 None or unset for standard (built-in) downloader.
240 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
241
242 The following parameters are not used by YoutubeDL itself, they are used by
243 the FileDownloader:
244 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
245 noresizebuffer, retries, continuedl, noprogress, consoletitle,
246 xattr_set_filesize.
247
248 The following options are used by the post processors:
249 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
250 otherwise prefer avconv.
251 exec_cmd: Arbitrary command to run after downloading
252 """
253
254 params = None
255 _ies = []
256 _pps = []
257 _download_retcode = None
258 _num_downloads = None
259 _screen_file = None
260
261 def __init__(self, params=None, auto_init=True):
262 """Create a FileDownloader object with the given options."""
263 if params is None:
264 params = {}
265 self._ies = []
266 self._ies_instances = {}
267 self._pps = []
268 self._progress_hooks = []
269 self._download_retcode = 0
270 self._num_downloads = 0
271 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
272 self._err_file = sys.stderr
273 self.params = params
274 self.cache = Cache(self)
275
276 if params.get('bidi_workaround', False):
277 try:
278 import pty
279 master, slave = pty.openpty()
280 width = get_term_width()
281 if width is None:
282 width_args = []
283 else:
284 width_args = ['-w', str(width)]
285 sp_kwargs = dict(
286 stdin=subprocess.PIPE,
287 stdout=slave,
288 stderr=self._err_file)
289 try:
290 self._output_process = subprocess.Popen(
291 ['bidiv'] + width_args, **sp_kwargs
292 )
293 except OSError:
294 self._output_process = subprocess.Popen(
295 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
296 self._output_channel = os.fdopen(master, 'rb')
297 except OSError as ose:
298 if ose.errno == 2:
299 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
300 else:
301 raise
302
303 if (sys.version_info >= (3,) and sys.platform != 'win32' and
304 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
305 and not params.get('restrictfilenames', False)):
306 # On Python 3, the Unicode filesystem API will throw errors (#1474)
307 self.report_warning(
308 'Assuming --restrict-filenames since file system encoding '
309 'cannot encode all characters. '
310 'Set the LC_ALL environment variable to fix this.')
311 self.params['restrictfilenames'] = True
312
313 if '%(stitle)s' in self.params.get('outtmpl', ''):
314 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
315
316 self._setup_opener()
317
318 if auto_init:
319 self.print_debug_header()
320 self.add_default_info_extractors()
321
322 for pp_def_raw in self.params.get('postprocessors', []):
323 pp_class = get_postprocessor(pp_def_raw['key'])
324 pp_def = dict(pp_def_raw)
325 del pp_def['key']
326 pp = pp_class(self, **compat_kwargs(pp_def))
327 self.add_post_processor(pp)
328
329 for ph in self.params.get('progress_hooks', []):
330 self.add_progress_hook(ph)
331
332 def warn_if_short_id(self, argv):
333 # short YouTube ID starting with dash?
334 idxs = [
335 i for i, a in enumerate(argv)
336 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
337 if idxs:
338 correct_argv = (
339 ['youtube-dl'] +
340 [a for i, a in enumerate(argv) if i not in idxs] +
341 ['--'] + [argv[i] for i in idxs]
342 )
343 self.report_warning(
344 'Long argument string detected. '
345 'Use -- to separate parameters and URLs, like this:\n%s\n' %
346 args_to_str(correct_argv))
347
348 def add_info_extractor(self, ie):
349 """Add an InfoExtractor object to the end of the list."""
350 self._ies.append(ie)
351 self._ies_instances[ie.ie_key()] = ie
352 ie.set_downloader(self)
353
354 def get_info_extractor(self, ie_key):
355 """
356 Get an instance of an IE with name ie_key, it will try to get one from
357 the _ies list, if there's no instance it will create a new one and add
358 it to the extractor list.
359 """
360 ie = self._ies_instances.get(ie_key)
361 if ie is None:
362 ie = get_info_extractor(ie_key)()
363 self.add_info_extractor(ie)
364 return ie
365
366 def add_default_info_extractors(self):
367 """
368 Add the InfoExtractors returned by gen_extractors to the end of the list
369 """
370 for ie in gen_extractors():
371 self.add_info_extractor(ie)
372
373 def add_post_processor(self, pp):
374 """Add a PostProcessor object to the end of the chain."""
375 self._pps.append(pp)
376 pp.set_downloader(self)
377
378 def add_progress_hook(self, ph):
379 """Add the progress hook (currently only for the file downloader)"""
380 self._progress_hooks.append(ph)
381
382 def _bidi_workaround(self, message):
383 if not hasattr(self, '_output_channel'):
384 return message
385
386 assert hasattr(self, '_output_process')
387 assert isinstance(message, compat_str)
388 line_count = message.count('\n') + 1
389 self._output_process.stdin.write((message + '\n').encode('utf-8'))
390 self._output_process.stdin.flush()
391 res = ''.join(self._output_channel.readline().decode('utf-8')
392 for _ in range(line_count))
393 return res[:-len('\n')]
394
395 def to_screen(self, message, skip_eol=False):
396 """Print message to stdout if not in quiet mode."""
397 return self.to_stdout(message, skip_eol, check_quiet=True)
398
399 def _write_string(self, s, out=None):
400 write_string(s, out=out, encoding=self.params.get('encoding'))
401
402 def to_stdout(self, message, skip_eol=False, check_quiet=False):
403 """Print message to stdout if not in quiet mode."""
404 if self.params.get('logger'):
405 self.params['logger'].debug(message)
406 elif not check_quiet or not self.params.get('quiet', False):
407 message = self._bidi_workaround(message)
408 terminator = ['\n', ''][skip_eol]
409 output = message + terminator
410
411 self._write_string(output, self._screen_file)
412
413 def to_stderr(self, message):
414 """Print message to stderr."""
415 assert isinstance(message, compat_str)
416 if self.params.get('logger'):
417 self.params['logger'].error(message)
418 else:
419 message = self._bidi_workaround(message)
420 output = message + '\n'
421 self._write_string(output, self._err_file)
422
423 def to_console_title(self, message):
424 if not self.params.get('consoletitle', False):
425 return
426 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
427 # c_wchar_p() might not be necessary if `message` is
428 # already of type unicode()
429 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
430 elif 'TERM' in os.environ:
431 self._write_string('\033]0;%s\007' % message, self._screen_file)
432
433 def save_console_title(self):
434 if not self.params.get('consoletitle', False):
435 return
436 if 'TERM' in os.environ:
437 # Save the title on stack
438 self._write_string('\033[22;0t', self._screen_file)
439
440 def restore_console_title(self):
441 if not self.params.get('consoletitle', False):
442 return
443 if 'TERM' in os.environ:
444 # Restore the title from stack
445 self._write_string('\033[23;0t', self._screen_file)
446
447 def __enter__(self):
448 self.save_console_title()
449 return self
450
451 def __exit__(self, *args):
452 self.restore_console_title()
453
454 if self.params.get('cookiefile') is not None:
455 self.cookiejar.save()
456
457 def trouble(self, message=None, tb=None):
458 """Determine action to take when a download problem appears.
459
460 Depending on if the downloader has been configured to ignore
461 download errors or not, this method may throw an exception or
462 not when errors are found, after printing the message.
463
464 tb, if given, is additional traceback information.
465 """
466 if message is not None:
467 self.to_stderr(message)
468 if self.params.get('verbose'):
469 if tb is None:
470 if sys.exc_info()[0]: # if .trouble has been called from an except block
471 tb = ''
472 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
473 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
474 tb += compat_str(traceback.format_exc())
475 else:
476 tb_data = traceback.format_list(traceback.extract_stack())
477 tb = ''.join(tb_data)
478 self.to_stderr(tb)
479 if not self.params.get('ignoreerrors', False):
480 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
481 exc_info = sys.exc_info()[1].exc_info
482 else:
483 exc_info = sys.exc_info()
484 raise DownloadError(message, exc_info)
485 self._download_retcode = 1
486
487 def report_warning(self, message):
488 '''
489 Print the message to stderr, it will be prefixed with 'WARNING:'
490 If stderr is a tty file the 'WARNING:' will be colored
491 '''
492 if self.params.get('logger') is not None:
493 self.params['logger'].warning(message)
494 else:
495 if self.params.get('no_warnings'):
496 return
497 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
498 _msg_header = '\033[0;33mWARNING:\033[0m'
499 else:
500 _msg_header = 'WARNING:'
501 warning_message = '%s %s' % (_msg_header, message)
502 self.to_stderr(warning_message)
503
504 def report_error(self, message, tb=None):
505 '''
506 Do the same as trouble, but prefixes the message with 'ERROR:', colored
507 in red if stderr is a tty file.
508 '''
509 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
510 _msg_header = '\033[0;31mERROR:\033[0m'
511 else:
512 _msg_header = 'ERROR:'
513 error_message = '%s %s' % (_msg_header, message)
514 self.trouble(error_message, tb)
515
516 def report_file_already_downloaded(self, file_name):
517 """Report file has already been fully downloaded."""
518 try:
519 self.to_screen('[download] %s has already been downloaded' % file_name)
520 except UnicodeEncodeError:
521 self.to_screen('[download] The file has already been downloaded')
522
523 def prepare_filename(self, info_dict):
524 """Generate the output filename."""
525 try:
526 template_dict = dict(info_dict)
527
528 template_dict['epoch'] = int(time.time())
529 autonumber_size = self.params.get('autonumber_size')
530 if autonumber_size is None:
531 autonumber_size = 5
532 autonumber_templ = '%0' + str(autonumber_size) + 'd'
533 template_dict['autonumber'] = autonumber_templ % self._num_downloads
534 if template_dict.get('playlist_index') is not None:
535 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
536 if template_dict.get('resolution') is None:
537 if template_dict.get('width') and template_dict.get('height'):
538 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
539 elif template_dict.get('height'):
540 template_dict['resolution'] = '%sp' % template_dict['height']
541 elif template_dict.get('width'):
542 template_dict['resolution'] = '?x%d' % template_dict['width']
543
544 sanitize = lambda k, v: sanitize_filename(
545 compat_str(v),
546 restricted=self.params.get('restrictfilenames'),
547 is_id=(k == 'id'))
548 template_dict = dict((k, sanitize(k, v))
549 for k, v in template_dict.items()
550 if v is not None)
551 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
552
553 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
554 tmpl = compat_expanduser(outtmpl)
555 filename = tmpl % template_dict
556 # Temporary fix for #4787
557 # 'Treat' all problem characters by passing filename through preferredencoding
558 # to workaround encoding issues with subprocess on python2 @ Windows
559 if sys.version_info < (3, 0) and sys.platform == 'win32':
560 filename = encodeFilename(filename, True).decode(preferredencoding())
561 return filename
562 except ValueError as err:
563 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
564 return None
565
566 def _match_entry(self, info_dict, incomplete):
567 """ Returns None iff the file should be downloaded """
568
569 video_title = info_dict.get('title', info_dict.get('id', 'video'))
570 if 'title' in info_dict:
571 # This can happen when we're just evaluating the playlist
572 title = info_dict['title']
573 matchtitle = self.params.get('matchtitle', False)
574 if matchtitle:
575 if not re.search(matchtitle, title, re.IGNORECASE):
576 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
577 rejecttitle = self.params.get('rejecttitle', False)
578 if rejecttitle:
579 if re.search(rejecttitle, title, re.IGNORECASE):
580 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
581 date = info_dict.get('upload_date', None)
582 if date is not None:
583 dateRange = self.params.get('daterange', DateRange())
584 if date not in dateRange:
585 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
586 view_count = info_dict.get('view_count', None)
587 if view_count is not None:
588 min_views = self.params.get('min_views')
589 if min_views is not None and view_count < min_views:
590 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
591 max_views = self.params.get('max_views')
592 if max_views is not None and view_count > max_views:
593 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
594 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
595 return 'Skipping "%s" because it is age restricted' % video_title
596 if self.in_download_archive(info_dict):
597 return '%s has already been recorded in archive' % video_title
598
599 if not incomplete:
600 match_filter = self.params.get('match_filter')
601 if match_filter is not None:
602 ret = match_filter(info_dict)
603 if ret is not None:
604 return ret
605
606 return None
607
608 @staticmethod
609 def add_extra_info(info_dict, extra_info):
610 '''Set the keys from extra_info in info dict if they are missing'''
611 for key, value in extra_info.items():
612 info_dict.setdefault(key, value)
613
614 def extract_info(self, url, download=True, ie_key=None, extra_info={},
615 process=True):
616 '''
617 Returns a list with a dictionary for each video we find.
618 If 'download', also downloads the videos.
619 extra_info is a dict containing the extra values to add to each result
620 '''
621
622 if ie_key:
623 ies = [self.get_info_extractor(ie_key)]
624 else:
625 ies = self._ies
626
627 for ie in ies:
628 if not ie.suitable(url):
629 continue
630
631 if not ie.working():
632 self.report_warning('The program functionality for this site has been marked as broken, '
633 'and will probably not work.')
634
635 try:
636 ie_result = ie.extract(url)
637 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
638 break
639 if isinstance(ie_result, list):
640 # Backwards compatibility: old IE result format
641 ie_result = {
642 '_type': 'compat_list',
643 'entries': ie_result,
644 }
645 self.add_default_extra_info(ie_result, ie, url)
646 if process:
647 return self.process_ie_result(ie_result, download, extra_info)
648 else:
649 return ie_result
650 except ExtractorError as de: # An error we somewhat expected
651 self.report_error(compat_str(de), de.format_traceback())
652 break
653 except MaxDownloadsReached:
654 raise
655 except Exception as e:
656 if self.params.get('ignoreerrors', False):
657 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
658 break
659 else:
660 raise
661 else:
662 self.report_error('no suitable InfoExtractor for URL %s' % url)
663
664 def add_default_extra_info(self, ie_result, ie, url):
665 self.add_extra_info(ie_result, {
666 'extractor': ie.IE_NAME,
667 'webpage_url': url,
668 'webpage_url_basename': url_basename(url),
669 'extractor_key': ie.ie_key(),
670 })
671
672 def process_ie_result(self, ie_result, download=True, extra_info={}):
673 """
674 Take the result of the ie(may be modified) and resolve all unresolved
675 references (URLs, playlist items).
676
677 It will also download the videos if 'download'.
678 Returns the resolved ie_result.
679 """
680
681 result_type = ie_result.get('_type', 'video')
682
683 if result_type in ('url', 'url_transparent'):
684 extract_flat = self.params.get('extract_flat', False)
685 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
686 extract_flat is True):
687 if self.params.get('forcejson', False):
688 self.to_stdout(json.dumps(ie_result))
689 return ie_result
690
691 if result_type == 'video':
692 self.add_extra_info(ie_result, extra_info)
693 return self.process_video_result(ie_result, download=download)
694 elif result_type == 'url':
695 # We have to add extra_info to the results because it may be
696 # contained in a playlist
697 return self.extract_info(ie_result['url'],
698 download,
699 ie_key=ie_result.get('ie_key'),
700 extra_info=extra_info)
701 elif result_type == 'url_transparent':
702 # Use the information from the embedding page
703 info = self.extract_info(
704 ie_result['url'], ie_key=ie_result.get('ie_key'),
705 extra_info=extra_info, download=False, process=False)
706
707 force_properties = dict(
708 (k, v) for k, v in ie_result.items() if v is not None)
709 for f in ('_type', 'url'):
710 if f in force_properties:
711 del force_properties[f]
712 new_result = info.copy()
713 new_result.update(force_properties)
714
715 assert new_result.get('_type') != 'url_transparent'
716
717 return self.process_ie_result(
718 new_result, download=download, extra_info=extra_info)
719 elif result_type == 'playlist' or result_type == 'multi_video':
720 # We process each entry in the playlist
721 playlist = ie_result.get('title', None) or ie_result.get('id', None)
722 self.to_screen('[download] Downloading playlist: %s' % playlist)
723
724 playlist_results = []
725
726 playliststart = self.params.get('playliststart', 1) - 1
727 playlistend = self.params.get('playlistend', None)
728 # For backwards compatibility, interpret -1 as whole list
729 if playlistend == -1:
730 playlistend = None
731
732 playlistitems_str = self.params.get('playlist_items', None)
733 playlistitems = None
734 if playlistitems_str is not None:
735 def iter_playlistitems(format):
736 for string_segment in format.split(','):
737 if '-' in string_segment:
738 start, end = string_segment.split('-')
739 for item in range(int(start), int(end) + 1):
740 yield int(item)
741 else:
742 yield int(string_segment)
743 playlistitems = iter_playlistitems(playlistitems_str)
744
745 ie_entries = ie_result['entries']
746 if isinstance(ie_entries, list):
747 n_all_entries = len(ie_entries)
748 if playlistitems:
749 entries = [ie_entries[i - 1] for i in playlistitems]
750 else:
751 entries = ie_entries[playliststart:playlistend]
752 n_entries = len(entries)
753 self.to_screen(
754 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
755 (ie_result['extractor'], playlist, n_all_entries, n_entries))
756 elif isinstance(ie_entries, PagedList):
757 if playlistitems:
758 entries = []
759 for item in playlistitems:
760 entries.extend(ie_entries.getslice(
761 item - 1, item
762 ))
763 else:
764 entries = ie_entries.getslice(
765 playliststart, playlistend)
766 n_entries = len(entries)
767 self.to_screen(
768 "[%s] playlist %s: Downloading %d videos" %
769 (ie_result['extractor'], playlist, n_entries))
770 else: # iterable
771 if playlistitems:
772 entry_list = list(ie_entries)
773 entries = [entry_list[i - 1] for i in playlistitems]
774 else:
775 entries = list(itertools.islice(
776 ie_entries, playliststart, playlistend))
777 n_entries = len(entries)
778 self.to_screen(
779 "[%s] playlist %s: Downloading %d videos" %
780 (ie_result['extractor'], playlist, n_entries))
781
782 if self.params.get('playlistreverse', False):
783 entries = entries[::-1]
784
785 for i, entry in enumerate(entries, 1):
786 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
787 extra = {
788 'n_entries': n_entries,
789 'playlist': playlist,
790 'playlist_id': ie_result.get('id'),
791 'playlist_title': ie_result.get('title'),
792 'playlist_index': i + playliststart,
793 'extractor': ie_result['extractor'],
794 'webpage_url': ie_result['webpage_url'],
795 'webpage_url_basename': url_basename(ie_result['webpage_url']),
796 'extractor_key': ie_result['extractor_key'],
797 }
798
799 reason = self._match_entry(entry, incomplete=True)
800 if reason is not None:
801 self.to_screen('[download] ' + reason)
802 continue
803
804 entry_result = self.process_ie_result(entry,
805 download=download,
806 extra_info=extra)
807 playlist_results.append(entry_result)
808 ie_result['entries'] = playlist_results
809 return ie_result
810 elif result_type == 'compat_list':
811 self.report_warning(
812 'Extractor %s returned a compat_list result. '
813 'It needs to be updated.' % ie_result.get('extractor'))
814
815 def _fixup(r):
816 self.add_extra_info(
817 r,
818 {
819 'extractor': ie_result['extractor'],
820 'webpage_url': ie_result['webpage_url'],
821 'webpage_url_basename': url_basename(ie_result['webpage_url']),
822 'extractor_key': ie_result['extractor_key'],
823 }
824 )
825 return r
826 ie_result['entries'] = [
827 self.process_ie_result(_fixup(r), download, extra_info)
828 for r in ie_result['entries']
829 ]
830 return ie_result
831 else:
832 raise Exception('Invalid result type: %s' % result_type)
833
834 def _apply_format_filter(self, format_spec, available_formats):
835 " Returns a tuple of the remaining format_spec and filtered formats "
836
837 OPERATORS = {
838 '<': operator.lt,
839 '<=': operator.le,
840 '>': operator.gt,
841 '>=': operator.ge,
842 '=': operator.eq,
843 '!=': operator.ne,
844 }
845 operator_rex = re.compile(r'''(?x)\s*\[
846 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
847 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
848 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
849 \]$
850 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
851 m = operator_rex.search(format_spec)
852 if m:
853 try:
854 comparison_value = int(m.group('value'))
855 except ValueError:
856 comparison_value = parse_filesize(m.group('value'))
857 if comparison_value is None:
858 comparison_value = parse_filesize(m.group('value') + 'B')
859 if comparison_value is None:
860 raise ValueError(
861 'Invalid value %r in format specification %r' % (
862 m.group('value'), format_spec))
863 op = OPERATORS[m.group('op')]
864
865 if not m:
866 STR_OPERATORS = {
867 '=': operator.eq,
868 '!=': operator.ne,
869 }
870 str_operator_rex = re.compile(r'''(?x)\s*\[
871 \s*(?P<key>ext|acodec|vcodec|container|protocol)
872 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
873 \s*(?P<value>[a-zA-Z0-9_-]+)
874 \s*\]$
875 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
876 m = str_operator_rex.search(format_spec)
877 if m:
878 comparison_value = m.group('value')
879 op = STR_OPERATORS[m.group('op')]
880
881 if not m:
882 raise ValueError('Invalid format specification %r' % format_spec)
883
884 def _filter(f):
885 actual_value = f.get(m.group('key'))
886 if actual_value is None:
887 return m.group('none_inclusive')
888 return op(actual_value, comparison_value)
889 new_formats = [f for f in available_formats if _filter(f)]
890
891 new_format_spec = format_spec[:-len(m.group(0))]
892 if not new_format_spec:
893 new_format_spec = 'best'
894
895 return (new_format_spec, new_formats)
896
897 def select_format(self, format_spec, available_formats):
898 while format_spec.endswith(']'):
899 format_spec, available_formats = self._apply_format_filter(
900 format_spec, available_formats)
901 if not available_formats:
902 return None
903
904 if format_spec == 'best' or format_spec is None:
905 return available_formats[-1]
906 elif format_spec == 'worst':
907 return available_formats[0]
908 elif format_spec == 'bestaudio':
909 audio_formats = [
910 f for f in available_formats
911 if f.get('vcodec') == 'none']
912 if audio_formats:
913 return audio_formats[-1]
914 elif format_spec == 'worstaudio':
915 audio_formats = [
916 f for f in available_formats
917 if f.get('vcodec') == 'none']
918 if audio_formats:
919 return audio_formats[0]
920 elif format_spec == 'bestvideo':
921 video_formats = [
922 f for f in available_formats
923 if f.get('acodec') == 'none']
924 if video_formats:
925 return video_formats[-1]
926 elif format_spec == 'worstvideo':
927 video_formats = [
928 f for f in available_formats
929 if f.get('acodec') == 'none']
930 if video_formats:
931 return video_formats[0]
932 else:
933 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
934 if format_spec in extensions:
935 filter_f = lambda f: f['ext'] == format_spec
936 else:
937 filter_f = lambda f: f['format_id'] == format_spec
938 matches = list(filter(filter_f, available_formats))
939 if matches:
940 return matches[-1]
941 return None
942
943 def _calc_headers(self, info_dict):
944 res = std_headers.copy()
945
946 add_headers = info_dict.get('http_headers')
947 if add_headers:
948 res.update(add_headers)
949
950 cookies = self._calc_cookies(info_dict)
951 if cookies:
952 res['Cookie'] = cookies
953
954 return res
955
956 def _calc_cookies(self, info_dict):
957 class _PseudoRequest(object):
958 def __init__(self, url):
959 self.url = url
960 self.headers = {}
961 self.unverifiable = False
962
963 def add_unredirected_header(self, k, v):
964 self.headers[k] = v
965
966 def get_full_url(self):
967 return self.url
968
969 def is_unverifiable(self):
970 return self.unverifiable
971
972 def has_header(self, h):
973 return h in self.headers
974
975 def get_header(self, h, default=None):
976 return self.headers.get(h, default)
977
978 pr = _PseudoRequest(info_dict['url'])
979 self.cookiejar.add_cookie_header(pr)
980 return pr.headers.get('Cookie')
981
982 def process_video_result(self, info_dict, download=True):
983 assert info_dict.get('_type', 'video') == 'video'
984
985 if 'id' not in info_dict:
986 raise ExtractorError('Missing "id" field in extractor result')
987 if 'title' not in info_dict:
988 raise ExtractorError('Missing "title" field in extractor result')
989
990 if 'playlist' not in info_dict:
991 # It isn't part of a playlist
992 info_dict['playlist'] = None
993 info_dict['playlist_index'] = None
994
995 thumbnails = info_dict.get('thumbnails')
996 if thumbnails is None:
997 thumbnail = info_dict.get('thumbnail')
998 if thumbnail:
999 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1000 if thumbnails:
1001 thumbnails.sort(key=lambda t: (
1002 t.get('preference'), t.get('width'), t.get('height'),
1003 t.get('id'), t.get('url')))
1004 for i, t in enumerate(thumbnails):
1005 if 'width' in t and 'height' in t:
1006 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1007 if t.get('id') is None:
1008 t['id'] = '%d' % i
1009
1010 if thumbnails and 'thumbnail' not in info_dict:
1011 info_dict['thumbnail'] = thumbnails[-1]['url']
1012
1013 if 'display_id' not in info_dict and 'id' in info_dict:
1014 info_dict['display_id'] = info_dict['id']
1015
1016 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1017 # Working around negative timestamps in Windows
1018 # (see http://bugs.python.org/issue1646728)
1019 if info_dict['timestamp'] < 0 and os.name == 'nt':
1020 info_dict['timestamp'] = 0
1021 upload_date = datetime.datetime.utcfromtimestamp(
1022 info_dict['timestamp'])
1023 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1024
1025 # This extractors handle format selection themselves
1026 if info_dict['extractor'] in ['Youku']:
1027 if download:
1028 self.process_info(info_dict)
1029 return info_dict
1030
1031 # We now pick which formats have to be downloaded
1032 if info_dict.get('formats') is None:
1033 # There's only one format available
1034 formats = [info_dict]
1035 else:
1036 formats = info_dict['formats']
1037
1038 if not formats:
1039 raise ExtractorError('No video formats found!')
1040
1041 # We check that all the formats have the format and format_id fields
1042 for i, format in enumerate(formats):
1043 if 'url' not in format:
1044 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1045
1046 if format.get('format_id') is None:
1047 format['format_id'] = compat_str(i)
1048 if format.get('format') is None:
1049 format['format'] = '{id} - {res}{note}'.format(
1050 id=format['format_id'],
1051 res=self.format_resolution(format),
1052 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1053 )
1054 # Automatically determine file extension if missing
1055 if 'ext' not in format:
1056 format['ext'] = determine_ext(format['url']).lower()
1057 # Add HTTP headers, so that external programs can use them from the
1058 # json output
1059 full_format_info = info_dict.copy()
1060 full_format_info.update(format)
1061 format['http_headers'] = self._calc_headers(full_format_info)
1062
1063 format_limit = self.params.get('format_limit', None)
1064 if format_limit:
1065 formats = list(takewhile_inclusive(
1066 lambda f: f['format_id'] != format_limit, formats
1067 ))
1068
1069 # TODO Central sorting goes here
1070
1071 if formats[0] is not info_dict:
1072 # only set the 'formats' fields if the original info_dict list them
1073 # otherwise we end up with a circular reference, the first (and unique)
1074 # element in the 'formats' field in info_dict is info_dict itself,
1075 # wich can't be exported to json
1076 info_dict['formats'] = formats
1077 if self.params.get('listformats'):
1078 self.list_formats(info_dict)
1079 return
1080 if self.params.get('list_thumbnails'):
1081 self.list_thumbnails(info_dict)
1082 return
1083
1084 req_format = self.params.get('format')
1085 if req_format is None:
1086 req_format = 'best'
1087 formats_to_download = []
1088 # The -1 is for supporting YoutubeIE
1089 if req_format in ('-1', 'all'):
1090 formats_to_download = formats
1091 else:
1092 for rfstr in req_format.split(','):
1093 # We can accept formats requested in the format: 34/5/best, we pick
1094 # the first that is available, starting from left
1095 req_formats = rfstr.split('/')
1096 for rf in req_formats:
1097 if re.match(r'.+?\+.+?', rf) is not None:
1098 # Two formats have been requested like '137+139'
1099 format_1, format_2 = rf.split('+')
1100 formats_info = (self.select_format(format_1, formats),
1101 self.select_format(format_2, formats))
1102 if all(formats_info):
1103 # The first format must contain the video and the
1104 # second the audio
1105 if formats_info[0].get('vcodec') == 'none':
1106 self.report_error('The first format must '
1107 'contain the video, try using '
1108 '"-f %s+%s"' % (format_2, format_1))
1109 return
1110 output_ext = (
1111 formats_info[0]['ext']
1112 if self.params.get('merge_output_format') is None
1113 else self.params['merge_output_format'])
1114 selected_format = {
1115 'requested_formats': formats_info,
1116 'format': '%s+%s' % (formats_info[0].get('format'),
1117 formats_info[1].get('format')),
1118 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1119 formats_info[1].get('format_id')),
1120 'width': formats_info[0].get('width'),
1121 'height': formats_info[0].get('height'),
1122 'resolution': formats_info[0].get('resolution'),
1123 'fps': formats_info[0].get('fps'),
1124 'vcodec': formats_info[0].get('vcodec'),
1125 'vbr': formats_info[0].get('vbr'),
1126 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1127 'acodec': formats_info[1].get('acodec'),
1128 'abr': formats_info[1].get('abr'),
1129 'ext': output_ext,
1130 }
1131 else:
1132 selected_format = None
1133 else:
1134 selected_format = self.select_format(rf, formats)
1135 if selected_format is not None:
1136 formats_to_download.append(selected_format)
1137 break
1138 if not formats_to_download:
1139 raise ExtractorError('requested format not available',
1140 expected=True)
1141
1142 if download:
1143 if len(formats_to_download) > 1:
1144 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1145 for format in formats_to_download:
1146 new_info = dict(info_dict)
1147 new_info.update(format)
1148 self.process_info(new_info)
1149 # We update the info dict with the best quality format (backwards compatibility)
1150 info_dict.update(formats_to_download[-1])
1151 return info_dict
1152
1153 def process_info(self, info_dict):
1154 """Process a single resolved IE result."""
1155
1156 assert info_dict.get('_type', 'video') == 'video'
1157
1158 max_downloads = self.params.get('max_downloads')
1159 if max_downloads is not None:
1160 if self._num_downloads >= int(max_downloads):
1161 raise MaxDownloadsReached()
1162
1163 info_dict['fulltitle'] = info_dict['title']
1164 if len(info_dict['title']) > 200:
1165 info_dict['title'] = info_dict['title'][:197] + '...'
1166
1167 # Keep for backwards compatibility
1168 info_dict['stitle'] = info_dict['title']
1169
1170 if 'format' not in info_dict:
1171 info_dict['format'] = info_dict['ext']
1172
1173 reason = self._match_entry(info_dict, incomplete=False)
1174 if reason is not None:
1175 self.to_screen('[download] ' + reason)
1176 return
1177
1178 self._num_downloads += 1
1179
1180 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1181
1182 # Forced printings
1183 if self.params.get('forcetitle', False):
1184 self.to_stdout(info_dict['fulltitle'])
1185 if self.params.get('forceid', False):
1186 self.to_stdout(info_dict['id'])
1187 if self.params.get('forceurl', False):
1188 if info_dict.get('requested_formats') is not None:
1189 for f in info_dict['requested_formats']:
1190 self.to_stdout(f['url'] + f.get('play_path', ''))
1191 else:
1192 # For RTMP URLs, also include the playpath
1193 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1194 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1195 self.to_stdout(info_dict['thumbnail'])
1196 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1197 self.to_stdout(info_dict['description'])
1198 if self.params.get('forcefilename', False) and filename is not None:
1199 self.to_stdout(filename)
1200 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1201 self.to_stdout(formatSeconds(info_dict['duration']))
1202 if self.params.get('forceformat', False):
1203 self.to_stdout(info_dict['format'])
1204 if self.params.get('forcejson', False):
1205 self.to_stdout(json.dumps(info_dict))
1206
1207 # Do nothing else if in simulate mode
1208 if self.params.get('simulate', False):
1209 return
1210
1211 if filename is None:
1212 return
1213
1214 try:
1215 dn = os.path.dirname(encodeFilename(filename))
1216 if dn and not os.path.exists(dn):
1217 os.makedirs(dn)
1218 except (OSError, IOError) as err:
1219 self.report_error('unable to create directory ' + compat_str(err))
1220 return
1221
1222 if self.params.get('writedescription', False):
1223 descfn = filename + '.description'
1224 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1225 self.to_screen('[info] Video description is already present')
1226 elif info_dict.get('description') is None:
1227 self.report_warning('There\'s no description to write.')
1228 else:
1229 try:
1230 self.to_screen('[info] Writing video description to: ' + descfn)
1231 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1232 descfile.write(info_dict['description'])
1233 except (OSError, IOError):
1234 self.report_error('Cannot write description file ' + descfn)
1235 return
1236
1237 if self.params.get('writeannotations', False):
1238 annofn = filename + '.annotations.xml'
1239 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1240 self.to_screen('[info] Video annotations are already present')
1241 else:
1242 try:
1243 self.to_screen('[info] Writing video annotations to: ' + annofn)
1244 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1245 annofile.write(info_dict['annotations'])
1246 except (KeyError, TypeError):
1247 self.report_warning('There are no annotations to write.')
1248 except (OSError, IOError):
1249 self.report_error('Cannot write annotations file: ' + annofn)
1250 return
1251
1252 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1253 self.params.get('writeautomaticsub')])
1254
1255 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1256 # subtitles download errors are already managed as troubles in relevant IE
1257 # that way it will silently go on when used with unsupporting IE
1258 subtitles = info_dict['subtitles']
1259 sub_format = self.params.get('subtitlesformat', 'srt')
1260 for sub_lang in subtitles.keys():
1261 sub = subtitles[sub_lang]
1262 if sub is None:
1263 continue
1264 try:
1265 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1266 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1267 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1268 else:
1269 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1270 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1271 subfile.write(sub)
1272 except (OSError, IOError):
1273 self.report_error('Cannot write subtitles file ' + sub_filename)
1274 return
1275
1276 if self.params.get('writeinfojson', False):
1277 infofn = os.path.splitext(filename)[0] + '.info.json'
1278 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1279 self.to_screen('[info] Video description metadata is already present')
1280 else:
1281 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1282 try:
1283 write_json_file(info_dict, infofn)
1284 except (OSError, IOError):
1285 self.report_error('Cannot write metadata to JSON file ' + infofn)
1286 return
1287
1288 self._write_thumbnails(info_dict, filename)
1289
1290 if not self.params.get('skip_download', False):
1291 try:
1292 def dl(name, info):
1293 fd = get_suitable_downloader(info, self.params)(self, self.params)
1294 for ph in self._progress_hooks:
1295 fd.add_progress_hook(ph)
1296 if self.params.get('verbose'):
1297 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1298 return fd.download(name, info)
1299
1300 if info_dict.get('requested_formats') is not None:
1301 downloaded = []
1302 success = True
1303 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1304 if not merger.available:
1305 postprocessors = []
1306 self.report_warning('You have requested multiple '
1307 'formats but ffmpeg or avconv are not installed.'
1308 ' The formats won\'t be merged')
1309 else:
1310 postprocessors = [merger]
1311 for f in info_dict['requested_formats']:
1312 new_info = dict(info_dict)
1313 new_info.update(f)
1314 fname = self.prepare_filename(new_info)
1315 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1316 downloaded.append(fname)
1317 partial_success = dl(fname, new_info)
1318 success = success and partial_success
1319 info_dict['__postprocessors'] = postprocessors
1320 info_dict['__files_to_merge'] = downloaded
1321 else:
1322 # Just a single file
1323 success = dl(filename, info_dict)
1324 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1325 self.report_error('unable to download video data: %s' % str(err))
1326 return
1327 except (OSError, IOError) as err:
1328 raise UnavailableVideoError(err)
1329 except (ContentTooShortError, ) as err:
1330 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1331 return
1332
1333 if success:
1334 # Fixup content
1335 fixup_policy = self.params.get('fixup')
1336 if fixup_policy is None:
1337 fixup_policy = 'detect_or_warn'
1338
1339 stretched_ratio = info_dict.get('stretched_ratio')
1340 if stretched_ratio is not None and stretched_ratio != 1:
1341 if fixup_policy == 'warn':
1342 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1343 info_dict['id'], stretched_ratio))
1344 elif fixup_policy == 'detect_or_warn':
1345 stretched_pp = FFmpegFixupStretchedPP(self)
1346 if stretched_pp.available:
1347 info_dict.setdefault('__postprocessors', [])
1348 info_dict['__postprocessors'].append(stretched_pp)
1349 else:
1350 self.report_warning(
1351 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1352 info_dict['id'], stretched_ratio))
1353 else:
1354 assert fixup_policy in ('ignore', 'never')
1355
1356 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1357 if fixup_policy == 'warn':
1358 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1359 info_dict['id']))
1360 elif fixup_policy == 'detect_or_warn':
1361 fixup_pp = FFmpegFixupM4aPP(self)
1362 if fixup_pp.available:
1363 info_dict.setdefault('__postprocessors', [])
1364 info_dict['__postprocessors'].append(fixup_pp)
1365 else:
1366 self.report_warning(
1367 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1368 info_dict['id']))
1369 else:
1370 assert fixup_policy in ('ignore', 'never')
1371
1372 try:
1373 self.post_process(filename, info_dict)
1374 except (PostProcessingError) as err:
1375 self.report_error('postprocessing: %s' % str(err))
1376 return
1377 self.record_download_archive(info_dict)
1378
1379 def download(self, url_list):
1380 """Download a given list of URLs."""
1381 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1382 if (len(url_list) > 1 and
1383 '%' not in outtmpl
1384 and self.params.get('max_downloads') != 1):
1385 raise SameFileError(outtmpl)
1386
1387 for url in url_list:
1388 try:
1389 # It also downloads the videos
1390 res = self.extract_info(url)
1391 except UnavailableVideoError:
1392 self.report_error('unable to download video')
1393 except MaxDownloadsReached:
1394 self.to_screen('[info] Maximum number of downloaded files reached.')
1395 raise
1396 else:
1397 if self.params.get('dump_single_json', False):
1398 self.to_stdout(json.dumps(res))
1399
1400 return self._download_retcode
1401
1402 def download_with_info_file(self, info_filename):
1403 with io.open(info_filename, 'r', encoding='utf-8') as f:
1404 info = json.load(f)
1405 try:
1406 self.process_ie_result(info, download=True)
1407 except DownloadError:
1408 webpage_url = info.get('webpage_url')
1409 if webpage_url is not None:
1410 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1411 return self.download([webpage_url])
1412 else:
1413 raise
1414 return self._download_retcode
1415
1416 def post_process(self, filename, ie_info):
1417 """Run all the postprocessors on the given file."""
1418 info = dict(ie_info)
1419 info['filepath'] = filename
1420 pps_chain = []
1421 if ie_info.get('__postprocessors') is not None:
1422 pps_chain.extend(ie_info['__postprocessors'])
1423 pps_chain.extend(self._pps)
1424 for pp in pps_chain:
1425 keep_video = None
1426 old_filename = info['filepath']
1427 try:
1428 keep_video_wish, info = pp.run(info)
1429 if keep_video_wish is not None:
1430 if keep_video_wish:
1431 keep_video = keep_video_wish
1432 elif keep_video is None:
1433 # No clear decision yet, let IE decide
1434 keep_video = keep_video_wish
1435 except PostProcessingError as e:
1436 self.report_error(e.msg)
1437 if keep_video is False and not self.params.get('keepvideo', False):
1438 try:
1439 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1440 os.remove(encodeFilename(old_filename))
1441 except (IOError, OSError):
1442 self.report_warning('Unable to remove downloaded video file')
1443
1444 def _make_archive_id(self, info_dict):
1445 # Future-proof against any change in case
1446 # and backwards compatibility with prior versions
1447 extractor = info_dict.get('extractor_key')
1448 if extractor is None:
1449 if 'id' in info_dict:
1450 extractor = info_dict.get('ie_key') # key in a playlist
1451 if extractor is None:
1452 return None # Incomplete video information
1453 return extractor.lower() + ' ' + info_dict['id']
1454
1455 def in_download_archive(self, info_dict):
1456 fn = self.params.get('download_archive')
1457 if fn is None:
1458 return False
1459
1460 vid_id = self._make_archive_id(info_dict)
1461 if vid_id is None:
1462 return False # Incomplete video information
1463
1464 try:
1465 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1466 for line in archive_file:
1467 if line.strip() == vid_id:
1468 return True
1469 except IOError as ioe:
1470 if ioe.errno != errno.ENOENT:
1471 raise
1472 return False
1473
1474 def record_download_archive(self, info_dict):
1475 fn = self.params.get('download_archive')
1476 if fn is None:
1477 return
1478 vid_id = self._make_archive_id(info_dict)
1479 assert vid_id
1480 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1481 archive_file.write(vid_id + '\n')
1482
1483 @staticmethod
1484 def format_resolution(format, default='unknown'):
1485 if format.get('vcodec') == 'none':
1486 return 'audio only'
1487 if format.get('resolution') is not None:
1488 return format['resolution']
1489 if format.get('height') is not None:
1490 if format.get('width') is not None:
1491 res = '%sx%s' % (format['width'], format['height'])
1492 else:
1493 res = '%sp' % format['height']
1494 elif format.get('width') is not None:
1495 res = '?x%d' % format['width']
1496 else:
1497 res = default
1498 return res
1499
1500 def _format_note(self, fdict):
1501 res = ''
1502 if fdict.get('ext') in ['f4f', 'f4m']:
1503 res += '(unsupported) '
1504 if fdict.get('format_note') is not None:
1505 res += fdict['format_note'] + ' '
1506 if fdict.get('tbr') is not None:
1507 res += '%4dk ' % fdict['tbr']
1508 if fdict.get('container') is not None:
1509 if res:
1510 res += ', '
1511 res += '%s container' % fdict['container']
1512 if (fdict.get('vcodec') is not None and
1513 fdict.get('vcodec') != 'none'):
1514 if res:
1515 res += ', '
1516 res += fdict['vcodec']
1517 if fdict.get('vbr') is not None:
1518 res += '@'
1519 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1520 res += 'video@'
1521 if fdict.get('vbr') is not None:
1522 res += '%4dk' % fdict['vbr']
1523 if fdict.get('fps') is not None:
1524 res += ', %sfps' % fdict['fps']
1525 if fdict.get('acodec') is not None:
1526 if res:
1527 res += ', '
1528 if fdict['acodec'] == 'none':
1529 res += 'video only'
1530 else:
1531 res += '%-5s' % fdict['acodec']
1532 elif fdict.get('abr') is not None:
1533 if res:
1534 res += ', '
1535 res += 'audio'
1536 if fdict.get('abr') is not None:
1537 res += '@%3dk' % fdict['abr']
1538 if fdict.get('asr') is not None:
1539 res += ' (%5dHz)' % fdict['asr']
1540 if fdict.get('filesize') is not None:
1541 if res:
1542 res += ', '
1543 res += format_bytes(fdict['filesize'])
1544 elif fdict.get('filesize_approx') is not None:
1545 if res:
1546 res += ', '
1547 res += '~' + format_bytes(fdict['filesize_approx'])
1548 return res
1549
1550 def list_formats(self, info_dict):
1551 def line(format, idlen=20):
1552 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1553 format['format_id'],
1554 format['ext'],
1555 self.format_resolution(format),
1556 self._format_note(format),
1557 ))
1558
1559 formats = info_dict.get('formats', [info_dict])
1560 idlen = max(len('format code'),
1561 max(len(f['format_id']) for f in formats))
1562 formats_s = [
1563 line(f, idlen) for f in formats
1564 if f.get('preference') is None or f['preference'] >= -1000]
1565 if len(formats) > 1:
1566 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1567
1568 header_line = line({
1569 'format_id': 'format code', 'ext': 'extension',
1570 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1571 self.to_screen(
1572 '[info] Available formats for %s:\n%s\n%s' %
1573 (info_dict['id'], header_line, '\n'.join(formats_s)))
1574
1575 def list_thumbnails(self, info_dict):
1576 thumbnails = info_dict.get('thumbnails')
1577 if not thumbnails:
1578 tn_url = info_dict.get('thumbnail')
1579 if tn_url:
1580 thumbnails = [{'id': '0', 'url': tn_url}]
1581 else:
1582 self.to_screen(
1583 '[info] No thumbnails present for %s' % info_dict['id'])
1584 return
1585
1586 self.to_screen(
1587 '[info] Thumbnails for %s:' % info_dict['id'])
1588 self.to_screen(render_table(
1589 ['ID', 'width', 'height', 'URL'],
1590 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1591
1592 def urlopen(self, req):
1593 """ Start an HTTP download """
1594
1595 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1596 # always respected by websites, some tend to give out URLs with non percent-encoded
1597 # non-ASCII characters (see telemb.py, ard.py [#3412])
1598 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1599 # To work around aforementioned issue we will replace request's original URL with
1600 # percent-encoded one
1601 req_is_string = isinstance(req, compat_basestring)
1602 url = req if req_is_string else req.get_full_url()
1603 url_escaped = escape_url(url)
1604
1605 # Substitute URL if any change after escaping
1606 if url != url_escaped:
1607 if req_is_string:
1608 req = url_escaped
1609 else:
1610 req = compat_urllib_request.Request(
1611 url_escaped, data=req.data, headers=req.headers,
1612 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1613
1614 return self._opener.open(req, timeout=self._socket_timeout)
1615
1616 def print_debug_header(self):
1617 if not self.params.get('verbose'):
1618 return
1619
1620 if type('') is not compat_str:
1621 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1622 self.report_warning(
1623 'Your Python is broken! Update to a newer and supported version')
1624
1625 stdout_encoding = getattr(
1626 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1627 encoding_str = (
1628 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1629 locale.getpreferredencoding(),
1630 sys.getfilesystemencoding(),
1631 stdout_encoding,
1632 self.get_encoding()))
1633 write_string(encoding_str, encoding=None)
1634
1635 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1636 try:
1637 sp = subprocess.Popen(
1638 ['git', 'rev-parse', '--short', 'HEAD'],
1639 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1640 cwd=os.path.dirname(os.path.abspath(__file__)))
1641 out, err = sp.communicate()
1642 out = out.decode().strip()
1643 if re.match('[0-9a-f]+', out):
1644 self._write_string('[debug] Git HEAD: ' + out + '\n')
1645 except:
1646 try:
1647 sys.exc_clear()
1648 except:
1649 pass
1650 self._write_string('[debug] Python version %s - %s\n' % (
1651 platform.python_version(), platform_name()))
1652
1653 exe_versions = FFmpegPostProcessor.get_versions(self)
1654 exe_versions['rtmpdump'] = rtmpdump_version()
1655 exe_str = ', '.join(
1656 '%s %s' % (exe, v)
1657 for exe, v in sorted(exe_versions.items())
1658 if v
1659 )
1660 if not exe_str:
1661 exe_str = 'none'
1662 self._write_string('[debug] exe versions: %s\n' % exe_str)
1663
1664 proxy_map = {}
1665 for handler in self._opener.handlers:
1666 if hasattr(handler, 'proxies'):
1667 proxy_map.update(handler.proxies)
1668 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1669
1670 if self.params.get('call_home', False):
1671 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1672 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1673 latest_version = self.urlopen(
1674 'https://yt-dl.org/latest/version').read().decode('utf-8')
1675 if version_tuple(latest_version) > version_tuple(__version__):
1676 self.report_warning(
1677 'You are using an outdated version (newest version: %s)! '
1678 'See https://yt-dl.org/update if you need help updating.' %
1679 latest_version)
1680
1681 def _setup_opener(self):
1682 timeout_val = self.params.get('socket_timeout')
1683 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1684
1685 opts_cookiefile = self.params.get('cookiefile')
1686 opts_proxy = self.params.get('proxy')
1687
1688 if opts_cookiefile is None:
1689 self.cookiejar = compat_cookiejar.CookieJar()
1690 else:
1691 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1692 opts_cookiefile)
1693 if os.access(opts_cookiefile, os.R_OK):
1694 self.cookiejar.load()
1695
1696 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1697 self.cookiejar)
1698 if opts_proxy is not None:
1699 if opts_proxy == '':
1700 proxies = {}
1701 else:
1702 proxies = {'http': opts_proxy, 'https': opts_proxy}
1703 else:
1704 proxies = compat_urllib_request.getproxies()
1705 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1706 if 'http' in proxies and 'https' not in proxies:
1707 proxies['https'] = proxies['http']
1708 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1709
1710 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1711 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1712 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1713 opener = compat_urllib_request.build_opener(
1714 https_handler, proxy_handler, cookie_processor, ydlh)
1715 # Delete the default user-agent header, which would otherwise apply in
1716 # cases where our custom HTTP handler doesn't come into play
1717 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1718 opener.addheaders = []
1719 self._opener = opener
1720
1721 def encode(self, s):
1722 if isinstance(s, bytes):
1723 return s # Already encoded
1724
1725 try:
1726 return s.encode(self.get_encoding())
1727 except UnicodeEncodeError as err:
1728 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1729 raise
1730
1731 def get_encoding(self):
1732 encoding = self.params.get('encoding')
1733 if encoding is None:
1734 encoding = preferredencoding()
1735 return encoding
1736
1737 def _write_thumbnails(self, info_dict, filename):
1738 if self.params.get('writethumbnail', False):
1739 thumbnails = info_dict.get('thumbnails')
1740 if thumbnails:
1741 thumbnails = [thumbnails[-1]]
1742 elif self.params.get('write_all_thumbnails', False):
1743 thumbnails = info_dict.get('thumbnails')
1744 else:
1745 return
1746
1747 if not thumbnails:
1748 # No thumbnails present, so return immediately
1749 return
1750
1751 for t in thumbnails:
1752 thumb_ext = determine_ext(t['url'], 'jpg')
1753 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1754 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1755 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1756
1757 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1758 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1759 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1760 else:
1761 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1762 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1763 try:
1764 uf = self.urlopen(t['url'])
1765 with open(thumb_filename, 'wb') as thumbf:
1766 shutil.copyfileobj(uf, thumbf)
1767 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1768 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1769 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1770 self.report_warning('Unable to download thumbnail "%s": %s' %
1771 (t['url'], compat_str(err)))