]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Merge remote-tracking branch 'petrkutalek/dvtv'
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
b82f815f 10import itertools
8694c600 11import json
62fec3b2 12import locale
8222d8de 13import os
dca08720 14import platform
8222d8de
JMF
15import re
16import shutil
dca08720 17import subprocess
8222d8de
JMF
18import socket
19import sys
20import time
21import traceback
22
1e5b9a95
PH
23if os.name == 'nt':
24 import ctypes
25
8c25f81b 26from .compat import (
dca08720 27 compat_cookiejar,
4644ac55 28 compat_expanduser,
ce02ed60 29 compat_http_client,
4f026faf 30 compat_kwargs,
ce02ed60
PH
31 compat_str,
32 compat_urllib_error,
33 compat_urllib_request,
8c25f81b
PH
34)
35from .utils import (
d05cfe06 36 escape_url,
ce02ed60
PH
37 ContentTooShortError,
38 date_from_str,
39 DateRange,
acd69589 40 DEFAULT_OUTTMPL,
ce02ed60
PH
41 determine_ext,
42 DownloadError,
43 encodeFilename,
44 ExtractorError,
02dbf93f 45 format_bytes,
525ef922 46 formatSeconds,
1c088fa8 47 get_term_width,
ce02ed60 48 locked_file,
dca08720 49 make_HTTPS_handler,
ce02ed60 50 MaxDownloadsReached,
b7ab0590 51 PagedList,
ce02ed60 52 PostProcessingError,
dca08720 53 platform_name,
ce02ed60
PH
54 preferredencoding,
55 SameFileError,
56 sanitize_filename,
57 subtitles_filename,
58 takewhile_inclusive,
59 UnavailableVideoError,
29eb5174 60 url_basename,
ce02ed60
PH
61 write_json_file,
62 write_string,
dca08720 63 YoutubeDLHandler,
6350728b 64 prepend_extension,
7d4111ed 65 args_to_str,
ce02ed60 66)
a0e07d31 67from .cache import Cache
023fa8c4 68from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 69from .downloader import get_suitable_downloader
4c83c967 70from .downloader.rtmp import rtmpdump_version
4f026faf
PH
71from .postprocessor import (
72 FFmpegMergerPP,
73 FFmpegPostProcessor,
74 get_postprocessor,
75)
dca08720 76from .version import __version__
8222d8de
JMF
77
78
79class YoutubeDL(object):
80 """YoutubeDL class.
81
82 YoutubeDL objects are the ones responsible of downloading the
83 actual video file and writing it to disk if the user has requested
84 it, among some other tasks. In most cases there should be one per
85 program. As, given a video URL, the downloader doesn't know how to
86 extract all the needed information, task that InfoExtractors do, it
87 has to pass the URL to one of them.
88
89 For this, YoutubeDL objects have a method that allows
90 InfoExtractors to be registered in a given order. When it is passed
91 a URL, the YoutubeDL object handles it to the first InfoExtractor it
92 finds that reports being able to handle it. The InfoExtractor extracts
93 all the information about the video or videos the URL refers to, and
94 YoutubeDL process the extracted information, possibly using a File
95 Downloader to download the video.
96
97 YoutubeDL objects accept a lot of parameters. In order not to saturate
98 the object constructor with arguments, it receives a dictionary of
99 options instead. These options are available through the params
100 attribute for the InfoExtractors to use. The YoutubeDL also
101 registers itself as the downloader in charge for the InfoExtractors
102 that are added to it, so this is a "mutual registration".
103
104 Available options:
105
106 username: Username for authentication purposes.
107 password: Password for authentication purposes.
c6c19746 108 videopassword: Password for acces a video.
8222d8de
JMF
109 usenetrc: Use netrc for authentication instead.
110 verbose: Print additional info to stdout.
111 quiet: Do not print messages to stdout.
ad8915b7 112 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
113 forceurl: Force printing final URL.
114 forcetitle: Force printing title.
115 forceid: Force printing ID.
116 forcethumbnail: Force printing thumbnail URL.
117 forcedescription: Force printing description.
118 forcefilename: Force printing final filename.
525ef922 119 forceduration: Force printing duration.
8694c600 120 forcejson: Force printing info_dict as JSON.
63e0be34
PH
121 dump_single_json: Force printing the info_dict of the whole playlist
122 (or video) as a single JSON line.
8222d8de 123 simulate: Do not download the video files.
d8600787 124 format: Video format code. See options.py for more information.
8222d8de
JMF
125 format_limit: Highest quality format to try.
126 outtmpl: Template for output names.
127 restrictfilenames: Do not allow "&" and spaces in file names
128 ignoreerrors: Do not stop on download errors.
129 nooverwrites: Prevent overwriting files.
130 playliststart: Playlist item to start at.
131 playlistend: Playlist item to end at.
ff815fe6 132 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
133 matchtitle: Download only matching titles.
134 rejecttitle: Reject downloads for matching titles.
8bf9319e 135 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
136 logtostderr: Log messages to stderr instead of stdout.
137 writedescription: Write the video description to a .description file
138 writeinfojson: Write the video description to a .info.json file
1fb07d10 139 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
140 writethumbnail: Write the thumbnail image to a file
141 writesubtitles: Write the video subtitles to a file
b004821f 142 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 143 allsubtitles: Downloads all the subtitles of the video
0b7f3118 144 (requires writesubtitles or writeautomaticsub)
8222d8de 145 listsubtitles: Lists all available subtitles for the video
b98a6b2f 146 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 147 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
148 keepvideo: Keep the video file after post-processing
149 daterange: A DateRange object, download only if the upload_date is in the range.
150 skip_download: Skip the actual download of the video file
c35f9e72 151 cachedir: Location of the cache files in the filesystem.
a0e07d31 152 False to disable filesystem cache.
47192f92 153 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
154 age_limit: An integer representing the user's age in years.
155 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
156 min_views: An integer representing the minimum view count the video
157 must have in order to not be skipped.
158 Videos without view count information are always
159 downloaded. None for no limit.
160 max_views: An integer representing the maximum view count.
161 Videos that are more popular than that are not
162 downloaded.
163 Videos without view count information are always
164 downloaded. None for no limit.
165 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
166 Videos already present in the file are not downloaded
167 again.
dca08720 168 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 169 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
170 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
171 At the moment, this is only supported by YouTube.
a1ee09e8 172 proxy: URL of the proxy server to use
e344693b 173 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
174 bidi_workaround: Work around buggy terminals without bidirectional text
175 support, using fridibi
a0ddb8a2 176 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 177 include_ads: Download ads as well
04b4d394
PH
178 default_search: Prepend this string if an input url is not valid.
179 'auto' for elaborate guessing
62fec3b2 180 encoding: Use this encoding instead of the system-specified.
e8ee972c 181 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
182 Pass in 'in_playlist' to only show this behavior for
183 playlist items.
4f026faf 184 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
185 * key: The name of the postprocessor. See
186 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
187 as well as any further keyword arguments for the
188 postprocessor.
71b640cc
PH
189 progress_hooks: A list of functions that get called on download
190 progress, with a dictionary with the entries
191 * filename: The final filename
192 * status: One of "downloading" and "finished"
193
194 The dict may also have some of the following entries:
195
196 * downloaded_bytes: Bytes on disk
197 * total_bytes: Size of the whole file, None if unknown
198 * tmpfilename: The filename we're currently writing to
199 * eta: The estimated time in seconds, None if unknown
200 * speed: The download speed in bytes/second, None if
201 unknown
202
203 Progress hooks are guaranteed to be called at least once
204 (with status "finished") if the download is successful.
205
fe7e0c98 206
8222d8de
JMF
207 The following parameters are not used by YoutubeDL itself, they are used by
208 the FileDownloader:
209 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
210 noresizebuffer, retries, continuedl, noprogress, consoletitle
76b1bd67
JMF
211
212 The following options are used by the post processors:
213 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
214 otherwise prefer avconv.
8d31fa3c 215 exec_cmd: Arbitrary command to run after downloading
8222d8de
JMF
216 """
217
218 params = None
219 _ies = []
220 _pps = []
221 _download_retcode = None
222 _num_downloads = None
223 _screen_file = None
224
3511266b 225 def __init__(self, params=None, auto_init=True):
8222d8de 226 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
227 if params is None:
228 params = {}
8222d8de 229 self._ies = []
56c73665 230 self._ies_instances = {}
8222d8de 231 self._pps = []
933605d7 232 self._progress_hooks = []
8222d8de
JMF
233 self._download_retcode = 0
234 self._num_downloads = 0
235 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 236 self._err_file = sys.stderr
e9f9a10f 237 self.params = params
a0e07d31 238 self.cache = Cache(self)
34308b30 239
0783b09b 240 if params.get('bidi_workaround', False):
1c088fa8
PH
241 try:
242 import pty
243 master, slave = pty.openpty()
244 width = get_term_width()
245 if width is None:
246 width_args = []
247 else:
248 width_args = ['-w', str(width)]
5d681e96 249 sp_kwargs = dict(
1c088fa8
PH
250 stdin=subprocess.PIPE,
251 stdout=slave,
252 stderr=self._err_file)
5d681e96
PH
253 try:
254 self._output_process = subprocess.Popen(
255 ['bidiv'] + width_args, **sp_kwargs
256 )
257 except OSError:
5d681e96
PH
258 self._output_process = subprocess.Popen(
259 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
260 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
261 except OSError as ose:
262 if ose.errno == 2:
6febd1c1 263 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
264 else:
265 raise
0783b09b 266
34308b30
PH
267 if (sys.version_info >= (3,) and sys.platform != 'win32' and
268 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
53d9009b 269 and not params.get('restrictfilenames', False)):
34308b30
PH
270 # On Python 3, the Unicode filesystem API will throw errors (#1474)
271 self.report_warning(
6febd1c1 272 'Assuming --restrict-filenames since file system encoding '
1b725173 273 'cannot encode all characters. '
6febd1c1 274 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 275 self.params['restrictfilenames'] = True
34308b30 276
a3927cf7 277 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 278 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 279
dca08720
PH
280 self._setup_opener()
281
3511266b
PH
282 if auto_init:
283 self.print_debug_header()
284 self.add_default_info_extractors()
285
4f026faf
PH
286 for pp_def_raw in self.params.get('postprocessors', []):
287 pp_class = get_postprocessor(pp_def_raw['key'])
288 pp_def = dict(pp_def_raw)
289 del pp_def['key']
290 pp = pp_class(self, **compat_kwargs(pp_def))
291 self.add_post_processor(pp)
292
71b640cc
PH
293 for ph in self.params.get('progress_hooks', []):
294 self.add_progress_hook(ph)
295
7d4111ed
PH
296 def warn_if_short_id(self, argv):
297 # short YouTube ID starting with dash?
298 idxs = [
299 i for i, a in enumerate(argv)
300 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
301 if idxs:
302 correct_argv = (
303 ['youtube-dl'] +
304 [a for i, a in enumerate(argv) if i not in idxs] +
305 ['--'] + [argv[i] for i in idxs]
306 )
307 self.report_warning(
308 'Long argument string detected. '
309 'Use -- to separate parameters and URLs, like this:\n%s\n' %
310 args_to_str(correct_argv))
311
8222d8de
JMF
312 def add_info_extractor(self, ie):
313 """Add an InfoExtractor object to the end of the list."""
314 self._ies.append(ie)
56c73665 315 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
316 ie.set_downloader(self)
317
56c73665
JMF
318 def get_info_extractor(self, ie_key):
319 """
320 Get an instance of an IE with name ie_key, it will try to get one from
321 the _ies list, if there's no instance it will create a new one and add
322 it to the extractor list.
323 """
324 ie = self._ies_instances.get(ie_key)
325 if ie is None:
326 ie = get_info_extractor(ie_key)()
327 self.add_info_extractor(ie)
328 return ie
329
023fa8c4
JMF
330 def add_default_info_extractors(self):
331 """
332 Add the InfoExtractors returned by gen_extractors to the end of the list
333 """
334 for ie in gen_extractors():
335 self.add_info_extractor(ie)
336
8222d8de
JMF
337 def add_post_processor(self, pp):
338 """Add a PostProcessor object to the end of the chain."""
339 self._pps.append(pp)
340 pp.set_downloader(self)
341
933605d7
JMF
342 def add_progress_hook(self, ph):
343 """Add the progress hook (currently only for the file downloader)"""
344 self._progress_hooks.append(ph)
8ab470f1 345
1c088fa8 346 def _bidi_workaround(self, message):
5d681e96 347 if not hasattr(self, '_output_channel'):
1c088fa8
PH
348 return message
349
5d681e96 350 assert hasattr(self, '_output_process')
11b85ce6 351 assert isinstance(message, compat_str)
6febd1c1
PH
352 line_count = message.count('\n') + 1
353 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 354 self._output_process.stdin.flush()
6febd1c1 355 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 356 for _ in range(line_count))
6febd1c1 357 return res[:-len('\n')]
1c088fa8 358
8222d8de 359 def to_screen(self, message, skip_eol=False):
0783b09b
PH
360 """Print message to stdout if not in quiet mode."""
361 return self.to_stdout(message, skip_eol, check_quiet=True)
362
734f90bb 363 def _write_string(self, s, out=None):
b58ddb32 364 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 365
0783b09b 366 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 367 """Print message to stdout if not in quiet mode."""
8bf9319e 368 if self.params.get('logger'):
43afe285 369 self.params['logger'].debug(message)
0783b09b 370 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 371 message = self._bidi_workaround(message)
6febd1c1 372 terminator = ['\n', ''][skip_eol]
8222d8de 373 output = message + terminator
1c088fa8 374
734f90bb 375 self._write_string(output, self._screen_file)
8222d8de
JMF
376
377 def to_stderr(self, message):
378 """Print message to stderr."""
11b85ce6 379 assert isinstance(message, compat_str)
8bf9319e 380 if self.params.get('logger'):
43afe285
IB
381 self.params['logger'].error(message)
382 else:
1c088fa8 383 message = self._bidi_workaround(message)
6febd1c1 384 output = message + '\n'
734f90bb 385 self._write_string(output, self._err_file)
8222d8de 386
1e5b9a95
PH
387 def to_console_title(self, message):
388 if not self.params.get('consoletitle', False):
389 return
390 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
391 # c_wchar_p() might not be necessary if `message` is
392 # already of type unicode()
393 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
394 elif 'TERM' in os.environ:
734f90bb 395 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 396
bdde425c
PH
397 def save_console_title(self):
398 if not self.params.get('consoletitle', False):
399 return
400 if 'TERM' in os.environ:
efd6c574 401 # Save the title on stack
734f90bb 402 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
403
404 def restore_console_title(self):
405 if not self.params.get('consoletitle', False):
406 return
407 if 'TERM' in os.environ:
efd6c574 408 # Restore the title from stack
734f90bb 409 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
410
411 def __enter__(self):
412 self.save_console_title()
413 return self
414
415 def __exit__(self, *args):
416 self.restore_console_title()
f89197d7 417
dca08720
PH
418 if self.params.get('cookiefile') is not None:
419 self.cookiejar.save()
bdde425c 420
8222d8de
JMF
421 def trouble(self, message=None, tb=None):
422 """Determine action to take when a download problem appears.
423
424 Depending on if the downloader has been configured to ignore
425 download errors or not, this method may throw an exception or
426 not when errors are found, after printing the message.
427
428 tb, if given, is additional traceback information.
429 """
430 if message is not None:
431 self.to_stderr(message)
432 if self.params.get('verbose'):
433 if tb is None:
434 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 435 tb = ''
8222d8de 436 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 437 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
438 tb += compat_str(traceback.format_exc())
439 else:
440 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 441 tb = ''.join(tb_data)
8222d8de
JMF
442 self.to_stderr(tb)
443 if not self.params.get('ignoreerrors', False):
444 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
445 exc_info = sys.exc_info()[1].exc_info
446 else:
447 exc_info = sys.exc_info()
448 raise DownloadError(message, exc_info)
449 self._download_retcode = 1
450
451 def report_warning(self, message):
452 '''
453 Print the message to stderr, it will be prefixed with 'WARNING:'
454 If stderr is a tty file the 'WARNING:' will be colored
455 '''
6d07ce01
JMF
456 if self.params.get('logger') is not None:
457 self.params['logger'].warning(message)
8222d8de 458 else:
ad8915b7
PH
459 if self.params.get('no_warnings'):
460 return
6d07ce01
JMF
461 if self._err_file.isatty() and os.name != 'nt':
462 _msg_header = '\033[0;33mWARNING:\033[0m'
463 else:
464 _msg_header = 'WARNING:'
465 warning_message = '%s %s' % (_msg_header, message)
466 self.to_stderr(warning_message)
8222d8de
JMF
467
468 def report_error(self, message, tb=None):
469 '''
470 Do the same as trouble, but prefixes the message with 'ERROR:', colored
471 in red if stderr is a tty file.
472 '''
0783b09b 473 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 474 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 475 else:
6febd1c1
PH
476 _msg_header = 'ERROR:'
477 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
478 self.trouble(error_message, tb)
479
8222d8de
JMF
480 def report_file_already_downloaded(self, file_name):
481 """Report file has already been fully downloaded."""
482 try:
6febd1c1 483 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 484 except UnicodeEncodeError:
6febd1c1 485 self.to_screen('[download] The file has already been downloaded')
8222d8de 486
8222d8de
JMF
487 def prepare_filename(self, info_dict):
488 """Generate the output filename."""
489 try:
490 template_dict = dict(info_dict)
491
492 template_dict['epoch'] = int(time.time())
493 autonumber_size = self.params.get('autonumber_size')
494 if autonumber_size is None:
495 autonumber_size = 5
6febd1c1 496 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 497 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 498 if template_dict.get('playlist_index') is not None:
c6b4132a 499 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
500 if template_dict.get('resolution') is None:
501 if template_dict.get('width') and template_dict.get('height'):
502 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
503 elif template_dict.get('height'):
805ef3c6 504 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 505 elif template_dict.get('width'):
805ef3c6 506 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 507
586a91b6 508 sanitize = lambda k, v: sanitize_filename(
45598aab 509 compat_str(v),
8222d8de 510 restricted=self.params.get('restrictfilenames'),
6febd1c1 511 is_id=(k == 'id'))
586a91b6 512 template_dict = dict((k, sanitize(k, v))
45598aab
PH
513 for k, v in template_dict.items()
514 if v is not None)
6febd1c1 515 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 516
acd69589 517 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 518 tmpl = compat_expanduser(outtmpl)
586a91b6 519 filename = tmpl % template_dict
8222d8de 520 return filename
8222d8de 521 except ValueError as err:
6febd1c1 522 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
523 return None
524
525 def _match_entry(self, info_dict):
526 """ Returns None iff the file should be downloaded """
527
6febd1c1 528 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
529 if 'title' in info_dict:
530 # This can happen when we're just evaluating the playlist
531 title = info_dict['title']
532 matchtitle = self.params.get('matchtitle', False)
533 if matchtitle:
534 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 535 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
536 rejecttitle = self.params.get('rejecttitle', False)
537 if rejecttitle:
538 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 539 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
540 date = info_dict.get('upload_date', None)
541 if date is not None:
542 dateRange = self.params.get('daterange', DateRange())
543 if date not in dateRange:
6febd1c1 544 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
545 view_count = info_dict.get('view_count', None)
546 if view_count is not None:
547 min_views = self.params.get('min_views')
548 if min_views is not None and view_count < min_views:
6febd1c1 549 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
550 max_views = self.params.get('max_views')
551 if max_views is not None and view_count > max_views:
6febd1c1 552 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
8dbe9899
PH
553 age_limit = self.params.get('age_limit')
554 if age_limit is not None:
be843678
PH
555 actual_age_limit = info_dict.get('age_limit')
556 if actual_age_limit is None:
557 actual_age_limit = 0
558 if age_limit < actual_age_limit:
6febd1c1 559 return 'Skipping "' + title + '" because it is age restricted'
c1c9a79c 560 if self.in_download_archive(info_dict):
6febd1c1 561 return '%s has already been recorded in archive' % video_title
8222d8de 562 return None
fe7e0c98 563
b6c45014
JMF
564 @staticmethod
565 def add_extra_info(info_dict, extra_info):
566 '''Set the keys from extra_info in info dict if they are missing'''
567 for key, value in extra_info.items():
568 info_dict.setdefault(key, value)
569
7fc3fa05
PH
570 def extract_info(self, url, download=True, ie_key=None, extra_info={},
571 process=True):
8222d8de
JMF
572 '''
573 Returns a list with a dictionary for each video we find.
574 If 'download', also downloads the videos.
575 extra_info is a dict containing the extra values to add to each result
576 '''
fe7e0c98 577
8222d8de 578 if ie_key:
56c73665 579 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
580 else:
581 ies = self._ies
582
583 for ie in ies:
584 if not ie.suitable(url):
585 continue
586
587 if not ie.working():
6febd1c1
PH
588 self.report_warning('The program functionality for this site has been marked as broken, '
589 'and will probably not work.')
8222d8de
JMF
590
591 try:
592 ie_result = ie.extract(url)
5f6a1245 593 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
594 break
595 if isinstance(ie_result, list):
596 # Backwards compatibility: old IE result format
8222d8de
JMF
597 ie_result = {
598 '_type': 'compat_list',
599 'entries': ie_result,
600 }
ea38e55f 601 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
602 if process:
603 return self.process_ie_result(ie_result, download, extra_info)
604 else:
605 return ie_result
5f6a1245 606 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
607 self.report_error(compat_str(de), de.format_traceback())
608 break
d3e5bbf4
PH
609 except MaxDownloadsReached:
610 raise
8222d8de
JMF
611 except Exception as e:
612 if self.params.get('ignoreerrors', False):
613 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
614 break
615 else:
616 raise
617 else:
1a489545 618 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 619
ea38e55f
PH
620 def add_default_extra_info(self, ie_result, ie, url):
621 self.add_extra_info(ie_result, {
622 'extractor': ie.IE_NAME,
623 'webpage_url': url,
624 'webpage_url_basename': url_basename(url),
625 'extractor_key': ie.ie_key(),
626 })
627
8222d8de
JMF
628 def process_ie_result(self, ie_result, download=True, extra_info={}):
629 """
630 Take the result of the ie(may be modified) and resolve all unresolved
631 references (URLs, playlist items).
632
633 It will also download the videos if 'download'.
634 Returns the resolved ie_result.
635 """
636
e8ee972c
PH
637 result_type = ie_result.get('_type', 'video')
638
057a5206
PH
639 if result_type in ('url', 'url_transparent'):
640 extract_flat = self.params.get('extract_flat', False)
641 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
642 extract_flat is True):
057a5206
PH
643 if self.params.get('forcejson', False):
644 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
645 return ie_result
646
8222d8de 647 if result_type == 'video':
b6c45014 648 self.add_extra_info(ie_result, extra_info)
feee2ecf 649 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
650 elif result_type == 'url':
651 # We have to add extra_info to the results because it may be
652 # contained in a playlist
653 return self.extract_info(ie_result['url'],
654 download,
655 ie_key=ie_result.get('ie_key'),
656 extra_info=extra_info)
7fc3fa05
PH
657 elif result_type == 'url_transparent':
658 # Use the information from the embedding page
659 info = self.extract_info(
660 ie_result['url'], ie_key=ie_result.get('ie_key'),
661 extra_info=extra_info, download=False, process=False)
662
412c617d
PH
663 force_properties = dict(
664 (k, v) for k, v in ie_result.items() if v is not None)
665 for f in ('_type', 'url'):
666 if f in force_properties:
667 del force_properties[f]
668 new_result = info.copy()
669 new_result.update(force_properties)
7fc3fa05
PH
670
671 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
672
673 return self.process_ie_result(
674 new_result, download=download, extra_info=extra_info)
42e12102 675 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
676 # We process each entry in the playlist
677 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 678 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
679
680 playlist_results = []
681
8222d8de 682 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
683 playlistend = self.params.get('playlistend', None)
684 # For backwards compatibility, interpret -1 as whole list
8222d8de 685 if playlistend == -1:
a19fd00c 686 playlistend = None
8222d8de 687
b82f815f
PH
688 ie_entries = ie_result['entries']
689 if isinstance(ie_entries, list):
690 n_all_entries = len(ie_entries)
691 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
692 n_entries = len(entries)
693 self.to_screen(
694 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
695 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f
PH
696 elif isinstance(ie_entries, PagedList):
697 entries = ie_entries.getslice(
b7ab0590
PH
698 playliststart, playlistend)
699 n_entries = len(entries)
700 self.to_screen(
701 "[%s] playlist %s: Downloading %d videos" %
702 (ie_result['extractor'], playlist, n_entries))
b82f815f
PH
703 else: # iterable
704 entries = list(itertools.islice(
705 ie_entries, playliststart, playlistend))
706 n_entries = len(entries)
707 self.to_screen(
708 "[%s] playlist %s: Downloading %d videos" %
709 (ie_result['extractor'], playlist, n_entries))
8222d8de 710
ff815fe6
MS
711 if self.params.get('playlistreverse', False):
712 entries = entries[::-1]
713
fe7e0c98 714 for i, entry in enumerate(entries, 1):
734ea11e 715 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 716 extra = {
c6b4132a 717 'n_entries': n_entries,
fe7e0c98 718 'playlist': playlist,
a1cf99d0
PH
719 'playlist_id': ie_result.get('id'),
720 'playlist_title': ie_result.get('title'),
fe7e0c98 721 'playlist_index': i + playliststart,
b6c45014 722 'extractor': ie_result['extractor'],
9103bbc5 723 'webpage_url': ie_result['webpage_url'],
29eb5174 724 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 725 'extractor_key': ie_result['extractor_key'],
fe7e0c98 726 }
7012b23c
PH
727
728 reason = self._match_entry(entry)
729 if reason is not None:
6febd1c1 730 self.to_screen('[download] ' + reason)
7012b23c
PH
731 continue
732
8222d8de
JMF
733 entry_result = self.process_ie_result(entry,
734 download=download,
735 extra_info=extra)
736 playlist_results.append(entry_result)
737 ie_result['entries'] = playlist_results
738 return ie_result
739 elif result_type == 'compat_list':
c9bf4114
PH
740 self.report_warning(
741 'Extractor %s returned a compat_list result. '
742 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 743
8222d8de 744 def _fixup(r):
9e1a5b84
JW
745 self.add_extra_info(
746 r,
9103bbc5
JMF
747 {
748 'extractor': ie_result['extractor'],
749 'webpage_url': ie_result['webpage_url'],
29eb5174 750 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 751 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
752 }
753 )
8222d8de
JMF
754 return r
755 ie_result['entries'] = [
b6c45014 756 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
757 for r in ie_result['entries']
758 ]
759 return ie_result
760 else:
761 raise Exception('Invalid result type: %s' % result_type)
762
a9c58ad9
JMF
763 def select_format(self, format_spec, available_formats):
764 if format_spec == 'best' or format_spec is None:
765 return available_formats[-1]
766 elif format_spec == 'worst':
767 return available_formats[0]
ba7678f9
PH
768 elif format_spec == 'bestaudio':
769 audio_formats = [
770 f for f in available_formats
771 if f.get('vcodec') == 'none']
772 if audio_formats:
773 return audio_formats[-1]
774 elif format_spec == 'worstaudio':
775 audio_formats = [
776 f for f in available_formats
777 if f.get('vcodec') == 'none']
778 if audio_formats:
779 return audio_formats[0]
bc6d5978
JMF
780 elif format_spec == 'bestvideo':
781 video_formats = [
782 f for f in available_formats
783 if f.get('acodec') == 'none']
784 if video_formats:
785 return video_formats[-1]
786 elif format_spec == 'worstvideo':
787 video_formats = [
788 f for f in available_formats
789 if f.get('acodec') == 'none']
790 if video_formats:
791 return video_formats[0]
a9c58ad9 792 else:
e2e5dae6 793 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
49e86983
JMF
794 if format_spec in extensions:
795 filter_f = lambda f: f['ext'] == format_spec
796 else:
797 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 798 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
799 if matches:
800 return matches[-1]
801 return None
802
dd82ffea
JMF
803 def process_video_result(self, info_dict, download=True):
804 assert info_dict.get('_type', 'video') == 'video'
805
bec1fad2
PH
806 if 'id' not in info_dict:
807 raise ExtractorError('Missing "id" field in extractor result')
808 if 'title' not in info_dict:
809 raise ExtractorError('Missing "title" field in extractor result')
810
dd82ffea
JMF
811 if 'playlist' not in info_dict:
812 # It isn't part of a playlist
813 info_dict['playlist'] = None
814 info_dict['playlist_index'] = None
815
d5519808
PH
816 thumbnails = info_dict.get('thumbnails')
817 if thumbnails:
be6d7229
PH
818 thumbnails.sort(key=lambda t: (
819 t.get('width'), t.get('height'), t.get('url')))
d5519808
PH
820 for t in thumbnails:
821 if 'width' in t and 'height' in t:
822 t['resolution'] = '%dx%d' % (t['width'], t['height'])
823
824 if thumbnails and 'thumbnail' not in info_dict:
825 info_dict['thumbnail'] = thumbnails[-1]['url']
826
c9ae7b95 827 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
828 info_dict['display_id'] = info_dict['id']
829
955c4514 830 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
706d7d4e
S
831 # Working around negative timestamps in Windows
832 # (see http://bugs.python.org/issue1646728)
833 if info_dict['timestamp'] < 0 and os.name == 'nt':
834 info_dict['timestamp'] = 0
9d2ecdbc 835 upload_date = datetime.datetime.utcfromtimestamp(
955c4514 836 info_dict['timestamp'])
9d2ecdbc
PH
837 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
838
6ff000b8 839 # This extractors handle format selection themselves
6febd1c1 840 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
841 if download:
842 self.process_info(info_dict)
6ff000b8
JMF
843 return info_dict
844
dd82ffea
JMF
845 # We now pick which formats have to be downloaded
846 if info_dict.get('formats') is None:
847 # There's only one format available
848 formats = [info_dict]
849 else:
850 formats = info_dict['formats']
851
db95dc13
PH
852 if not formats:
853 raise ExtractorError('No video formats found!')
854
dd82ffea 855 # We check that all the formats have the format and format_id fields
db95dc13 856 for i, format in enumerate(formats):
bec1fad2
PH
857 if 'url' not in format:
858 raise ExtractorError('Missing "url" key in result (index %d)' % i)
859
dd82ffea 860 if format.get('format_id') is None:
8016c922 861 format['format_id'] = compat_str(i)
8c51aa65 862 if format.get('format') is None:
6febd1c1 863 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
864 id=format['format_id'],
865 res=self.format_resolution(format),
6febd1c1 866 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 867 )
c1002e96
PH
868 # Automatically determine file extension if missing
869 if 'ext' not in format:
cce929ea 870 format['ext'] = determine_ext(format['url']).lower()
dd82ffea 871
99e206d5
JMF
872 format_limit = self.params.get('format_limit', None)
873 if format_limit:
f4d96df0
PH
874 formats = list(takewhile_inclusive(
875 lambda f: f['format_id'] != format_limit, formats
876 ))
4bcc7bd1
PH
877
878 # TODO Central sorting goes here
99e206d5 879
f89197d7 880 if formats[0] is not info_dict:
b3d9ef88
JMF
881 # only set the 'formats' fields if the original info_dict list them
882 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 883 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
884 # wich can't be exported to json
885 info_dict['formats'] = formats
bfaae0a7 886 if self.params.get('listformats', None):
887 self.list_formats(info_dict)
888 return
889
de3ef3ed 890 req_format = self.params.get('format')
a9c58ad9
JMF
891 if req_format is None:
892 req_format = 'best'
dd82ffea 893 formats_to_download = []
dd82ffea 894 # The -1 is for supporting YoutubeIE
a9c58ad9 895 if req_format in ('-1', 'all'):
dd82ffea
JMF
896 formats_to_download = formats
897 else:
1de33faf
PH
898 for rfstr in req_format.split(','):
899 # We can accept formats requested in the format: 34/5/best, we pick
900 # the first that is available, starting from left
901 req_formats = rfstr.split('/')
902 for rf in req_formats:
903 if re.match(r'.+?\+.+?', rf) is not None:
904 # Two formats have been requested like '137+139'
905 format_1, format_2 = rf.split('+')
906 formats_info = (self.select_format(format_1, formats),
9e1a5b84 907 self.select_format(format_2, formats))
1de33faf 908 if all(formats_info):
c2954908
JMF
909 # The first format must contain the video and the
910 # second the audio
911 if formats_info[0].get('vcodec') == 'none':
912 self.report_error('The first format must '
9e1a5b84
JW
913 'contain the video, try using '
914 '"-f %s+%s"' % (format_2, format_1))
c2954908 915 return
1de33faf
PH
916 selected_format = {
917 'requested_formats': formats_info,
918 'format': rf,
919 'ext': formats_info[0]['ext'],
920 }
921 else:
922 selected_format = None
6350728b 923 else:
1de33faf
PH
924 selected_format = self.select_format(rf, formats)
925 if selected_format is not None:
926 formats_to_download.append(selected_format)
927 break
dd82ffea 928 if not formats_to_download:
6febd1c1 929 raise ExtractorError('requested format not available',
78a3a9f8 930 expected=True)
dd82ffea
JMF
931
932 if download:
933 if len(formats_to_download) > 1:
6febd1c1 934 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
935 for format in formats_to_download:
936 new_info = dict(info_dict)
937 new_info.update(format)
938 self.process_info(new_info)
939 # We update the info dict with the best quality format (backwards compatibility)
940 info_dict.update(formats_to_download[-1])
941 return info_dict
942
8222d8de
JMF
943 def process_info(self, info_dict):
944 """Process a single resolved IE result."""
945
946 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
947
948 max_downloads = self.params.get('max_downloads')
949 if max_downloads is not None:
950 if self._num_downloads >= int(max_downloads):
951 raise MaxDownloadsReached()
8222d8de
JMF
952
953 info_dict['fulltitle'] = info_dict['title']
954 if len(info_dict['title']) > 200:
6febd1c1 955 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
956
957 # Keep for backwards compatibility
958 info_dict['stitle'] = info_dict['title']
959
11b85ce6 960 if 'format' not in info_dict:
8222d8de
JMF
961 info_dict['format'] = info_dict['ext']
962
963 reason = self._match_entry(info_dict)
964 if reason is not None:
6febd1c1 965 self.to_screen('[download] ' + reason)
8222d8de
JMF
966 return
967
fd288278 968 self._num_downloads += 1
8222d8de
JMF
969
970 filename = self.prepare_filename(info_dict)
971
972 # Forced printings
973 if self.params.get('forcetitle', False):
0783b09b 974 self.to_stdout(info_dict['fulltitle'])
8222d8de 975 if self.params.get('forceid', False):
0783b09b 976 self.to_stdout(info_dict['id'])
8222d8de 977 if self.params.get('forceurl', False):
16ae61f6 978 if info_dict.get('requested_formats') is not None:
979 for f in info_dict['requested_formats']:
980 self.to_stdout(f['url'] + f.get('play_path', ''))
981 else:
982 # For RTMP URLs, also include the playpath
983 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 984 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 985 self.to_stdout(info_dict['thumbnail'])
216d71d0 986 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 987 self.to_stdout(info_dict['description'])
8222d8de 988 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 989 self.to_stdout(filename)
525ef922
PH
990 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
991 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 992 if self.params.get('forceformat', False):
0783b09b 993 self.to_stdout(info_dict['format'])
9d153818 994 if self.params.get('forcejson', False):
a0d96c98 995 info_dict['_filename'] = filename
0783b09b 996 self.to_stdout(json.dumps(info_dict))
63e0be34
PH
997 if self.params.get('dump_single_json', False):
998 info_dict['_filename'] = filename
8222d8de
JMF
999
1000 # Do nothing else if in simulate mode
1001 if self.params.get('simulate', False):
1002 return
1003
1004 if filename is None:
1005 return
1006
1007 try:
1008 dn = os.path.dirname(encodeFilename(filename))
d26e981d 1009 if dn and not os.path.exists(dn):
8222d8de
JMF
1010 os.makedirs(dn)
1011 except (OSError, IOError) as err:
6febd1c1 1012 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
1013 return
1014
1015 if self.params.get('writedescription', False):
6febd1c1 1016 descfn = filename + '.description'
7b6fefc9 1017 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1018 self.to_screen('[info] Video description is already present')
7b6fefc9
PH
1019 else:
1020 try:
6febd1c1 1021 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1022 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1023 descfile.write(info_dict['description'])
1024 except (KeyError, TypeError):
6febd1c1 1025 self.report_warning('There\'s no description to write.')
7b6fefc9 1026 except (OSError, IOError):
6febd1c1 1027 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1028 return
8222d8de 1029
1fb07d10 1030 if self.params.get('writeannotations', False):
6febd1c1 1031 annofn = filename + '.annotations.xml'
7b6fefc9 1032 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1033 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1034 else:
1035 try:
6febd1c1 1036 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1037 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1038 annofile.write(info_dict['annotations'])
1039 except (KeyError, TypeError):
6febd1c1 1040 self.report_warning('There are no annotations to write.')
7b6fefc9 1041 except (OSError, IOError):
6febd1c1 1042 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1043 return
1fb07d10 1044
c4a91be7 1045 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1046 self.params.get('writeautomaticsub')])
c4a91be7 1047
fe7e0c98 1048 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
1049 # subtitles download errors are already managed as troubles in relevant IE
1050 # that way it will silently go on when used with unsupporting IE
8222d8de 1051 subtitles = info_dict['subtitles']
ca715127 1052 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
1053 for sub_lang in subtitles.keys():
1054 sub = subtitles[sub_lang]
6804038d
JMF
1055 if sub is None:
1056 continue
8222d8de 1057 try:
d4051a8e 1058 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1059 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1060 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1061 else:
6febd1c1 1062 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1063 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5f6a1245 1064 subfile.write(sub)
8222d8de 1065 except (OSError, IOError):
e4db1951 1066 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1067 return
1068
8222d8de 1069 if self.params.get('writeinfojson', False):
6febd1c1 1070 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 1071 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1072 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1073 else:
6febd1c1 1074 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1075 try:
92120217 1076 write_json_file(info_dict, infofn)
7b6fefc9 1077 except (OSError, IOError):
6febd1c1 1078 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1079 return
8222d8de
JMF
1080
1081 if self.params.get('writethumbnail', False):
d8269e1d 1082 if info_dict.get('thumbnail') is not None:
6febd1c1
PH
1083 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1084 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
0a9ce268 1085 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
6febd1c1 1086 self.to_screen('[%s] %s: Thumbnail is already present' %
7b6fefc9
PH
1087 (info_dict['extractor'], info_dict['id']))
1088 else:
6febd1c1 1089 self.to_screen('[%s] %s: Downloading thumbnail ...' %
7b6fefc9
PH
1090 (info_dict['extractor'], info_dict['id']))
1091 try:
e9c092f1 1092 uf = self.urlopen(info_dict['thumbnail'])
7b6fefc9
PH
1093 with open(thumb_filename, 'wb') as thumbf:
1094 shutil.copyfileobj(uf, thumbf)
6febd1c1 1095 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
9e1a5b84 1096 (info_dict['extractor'], info_dict['id'], thumb_filename))
7b6fefc9 1097 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1098 self.report_warning('Unable to download thumbnail "%s": %s' %
9e1a5b84 1099 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
1100
1101 if not self.params.get('skip_download', False):
1102 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1103 success = True
1104 else:
1105 try:
6350728b
JMF
1106 def dl(name, info):
1107 fd = get_suitable_downloader(info)(self, self.params)
1108 for ph in self._progress_hooks:
1109 fd.add_progress_hook(ph)
8d5797b0
PH
1110 if self.params.get('verbose'):
1111 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
6350728b
JMF
1112 return fd.download(name, info)
1113 if info_dict.get('requested_formats') is not None:
1114 downloaded = []
1115 success = True
b7f81164 1116 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
48844745 1117 if not merger._executable:
58c3c7ae
JMF
1118 postprocessors = []
1119 self.report_warning('You have requested multiple '
9e1a5b84
JW
1120 'formats but ffmpeg or avconv are not installed.'
1121 ' The formats won\'t be merged')
58c3c7ae
JMF
1122 else:
1123 postprocessors = [merger]
6350728b
JMF
1124 for f in info_dict['requested_formats']:
1125 new_info = dict(info_dict)
1126 new_info.update(f)
1127 fname = self.prepare_filename(new_info)
1128 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1129 downloaded.append(fname)
1130 partial_success = dl(fname, new_info)
1131 success = success and partial_success
58c3c7ae 1132 info_dict['__postprocessors'] = postprocessors
6350728b
JMF
1133 info_dict['__files_to_merge'] = downloaded
1134 else:
1135 # Just a single file
1136 success = dl(filename, info_dict)
8222d8de 1137 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1138 self.report_error('unable to download video data: %s' % str(err))
8222d8de 1139 return
c40c6aaa
JMF
1140 except (OSError, IOError) as err:
1141 raise UnavailableVideoError(err)
8222d8de 1142 except (ContentTooShortError, ) as err:
6febd1c1 1143 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
8222d8de
JMF
1144 return
1145
1146 if success:
1147 try:
1148 self.post_process(filename, info_dict)
1149 except (PostProcessingError) as err:
6febd1c1 1150 self.report_error('postprocessing: %s' % str(err))
8222d8de 1151 return
cd58dc3e 1152 self.record_download_archive(info_dict)
8222d8de
JMF
1153
1154 def download(self, url_list):
1155 """Download a given list of URLs."""
acd69589 1156 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1157 if (len(url_list) > 1 and
acd69589 1158 '%' not in outtmpl
0c75c3fa 1159 and self.params.get('max_downloads') != 1):
acd69589 1160 raise SameFileError(outtmpl)
8222d8de
JMF
1161
1162 for url in url_list:
1163 try:
5f6a1245 1164 # It also downloads the videos
63e0be34 1165 res = self.extract_info(url)
8222d8de 1166 except UnavailableVideoError:
6febd1c1 1167 self.report_error('unable to download video')
8222d8de 1168 except MaxDownloadsReached:
6febd1c1 1169 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1170 raise
63e0be34
PH
1171 else:
1172 if self.params.get('dump_single_json', False):
1173 self.to_stdout(json.dumps(res))
8222d8de
JMF
1174
1175 return self._download_retcode
1176
1dcc4c0c 1177 def download_with_info_file(self, info_filename):
395293a8 1178 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1179 info = json.load(f)
d4943898
JMF
1180 try:
1181 self.process_ie_result(info, download=True)
1182 except DownloadError:
1183 webpage_url = info.get('webpage_url')
1184 if webpage_url is not None:
6febd1c1 1185 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1186 return self.download([webpage_url])
1187 else:
1188 raise
1189 return self._download_retcode
1dcc4c0c 1190
8222d8de
JMF
1191 def post_process(self, filename, ie_info):
1192 """Run all the postprocessors on the given file."""
1193 info = dict(ie_info)
1194 info['filepath'] = filename
1195 keep_video = None
6350728b
JMF
1196 pps_chain = []
1197 if ie_info.get('__postprocessors') is not None:
1198 pps_chain.extend(ie_info['__postprocessors'])
1199 pps_chain.extend(self._pps)
1200 for pp in pps_chain:
8222d8de 1201 try:
fe7e0c98 1202 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
1203 if keep_video_wish is not None:
1204 if keep_video_wish:
1205 keep_video = keep_video_wish
1206 elif keep_video is None:
1207 # No clear decision yet, let IE decide
1208 keep_video = keep_video_wish
1209 except PostProcessingError as e:
bbcbf4d4 1210 self.report_error(e.msg)
8222d8de
JMF
1211 if keep_video is False and not self.params.get('keepvideo', False):
1212 try:
6febd1c1 1213 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
8222d8de
JMF
1214 os.remove(encodeFilename(filename))
1215 except (IOError, OSError):
6febd1c1 1216 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1217
5db07df6
PH
1218 def _make_archive_id(self, info_dict):
1219 # Future-proof against any change in case
1220 # and backwards compatibility with prior versions
d31209a1 1221 extractor = info_dict.get('extractor_key')
7012b23c
PH
1222 if extractor is None:
1223 if 'id' in info_dict:
1224 extractor = info_dict.get('ie_key') # key in a playlist
1225 if extractor is None:
5db07df6 1226 return None # Incomplete video information
6febd1c1 1227 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1228
1229 def in_download_archive(self, info_dict):
1230 fn = self.params.get('download_archive')
1231 if fn is None:
1232 return False
1233
1234 vid_id = self._make_archive_id(info_dict)
1235 if vid_id is None:
7012b23c 1236 return False # Incomplete video information
5db07df6 1237
c1c9a79c
PH
1238 try:
1239 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1240 for line in archive_file:
1241 if line.strip() == vid_id:
1242 return True
1243 except IOError as ioe:
1244 if ioe.errno != errno.ENOENT:
1245 raise
1246 return False
1247
1248 def record_download_archive(self, info_dict):
1249 fn = self.params.get('download_archive')
1250 if fn is None:
1251 return
5db07df6
PH
1252 vid_id = self._make_archive_id(info_dict)
1253 assert vid_id
c1c9a79c 1254 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1255 archive_file.write(vid_id + '\n')
dd82ffea 1256
8c51aa65 1257 @staticmethod
8abeeb94 1258 def format_resolution(format, default='unknown'):
fb04e403
PH
1259 if format.get('vcodec') == 'none':
1260 return 'audio only'
f49d89ee
PH
1261 if format.get('resolution') is not None:
1262 return format['resolution']
8c51aa65
JMF
1263 if format.get('height') is not None:
1264 if format.get('width') is not None:
6febd1c1 1265 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1266 else:
6febd1c1 1267 res = '%sp' % format['height']
f49d89ee 1268 elif format.get('width') is not None:
6febd1c1 1269 res = '?x%d' % format['width']
8c51aa65 1270 else:
8abeeb94 1271 res = default
8c51aa65
JMF
1272 return res
1273
c57f7757
PH
1274 def _format_note(self, fdict):
1275 res = ''
1276 if fdict.get('ext') in ['f4f', 'f4m']:
1277 res += '(unsupported) '
1278 if fdict.get('format_note') is not None:
1279 res += fdict['format_note'] + ' '
1280 if fdict.get('tbr') is not None:
1281 res += '%4dk ' % fdict['tbr']
1282 if fdict.get('container') is not None:
1283 if res:
1284 res += ', '
1285 res += '%s container' % fdict['container']
1286 if (fdict.get('vcodec') is not None and
1287 fdict.get('vcodec') != 'none'):
1288 if res:
1289 res += ', '
1290 res += fdict['vcodec']
91c7271a 1291 if fdict.get('vbr') is not None:
c57f7757
PH
1292 res += '@'
1293 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1294 res += 'video@'
1295 if fdict.get('vbr') is not None:
1296 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1297 if fdict.get('fps') is not None:
1298 res += ', %sfps' % fdict['fps']
c57f7757
PH
1299 if fdict.get('acodec') is not None:
1300 if res:
1301 res += ', '
1302 if fdict['acodec'] == 'none':
1303 res += 'video only'
1304 else:
1305 res += '%-5s' % fdict['acodec']
1306 elif fdict.get('abr') is not None:
1307 if res:
1308 res += ', '
1309 res += 'audio'
1310 if fdict.get('abr') is not None:
1311 res += '@%3dk' % fdict['abr']
1312 if fdict.get('asr') is not None:
1313 res += ' (%5dHz)' % fdict['asr']
1314 if fdict.get('filesize') is not None:
1315 if res:
1316 res += ', '
1317 res += format_bytes(fdict['filesize'])
9732d77e
PH
1318 elif fdict.get('filesize_approx') is not None:
1319 if res:
1320 res += ', '
1321 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1322 return res
91c7271a 1323
c57f7757 1324 def list_formats(self, info_dict):
02dbf93f 1325 def line(format, idlen=20):
6febd1c1 1326 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1327 format['format_id'],
1328 format['ext'],
8c51aa65 1329 self.format_resolution(format),
c57f7757 1330 self._format_note(format),
02dbf93f 1331 ))
57dd9a8f 1332
94badb25 1333 formats = info_dict.get('formats', [info_dict])
6febd1c1 1334 idlen = max(len('format code'),
02dbf93f
PH
1335 max(len(f['format_id']) for f in formats))
1336 formats_s = [line(f, idlen) for f in formats]
94badb25 1337 if len(formats) > 1:
c57f7757
PH
1338 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1339 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1340
1341 header_line = line({
6febd1c1
PH
1342 'format_id': 'format code', 'ext': 'extension',
1343 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1344 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1345 (info_dict['id'], header_line, '\n'.join(formats_s)))
dca08720
PH
1346
1347 def urlopen(self, req):
1348 """ Start an HTTP download """
37419b4f 1349
d05cfe06
S
1350 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1351 # always respected by websites, some tend to give out URLs with non percent-encoded
1352 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1353 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1354 # To work around aforementioned issue we will replace request's original URL with
1355 # percent-encoded one
ee0d9070 1356 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
68b09730 1357 url = req if req_is_string else req.get_full_url()
d05cfe06 1358 url_escaped = escape_url(url)
37419b4f
S
1359
1360 # Substitute URL if any change after escaping
1361 if url != url_escaped:
68b09730 1362 if req_is_string:
37419b4f
S
1363 req = url_escaped
1364 else:
1365 req = compat_urllib_request.Request(
1366 url_escaped, data=req.data, headers=req.headers,
1367 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1368
19a41fc6 1369 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1370
1371 def print_debug_header(self):
1372 if not self.params.get('verbose'):
1373 return
62fec3b2 1374
4192b51c
PH
1375 if type('') is not compat_str:
1376 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1377 self.report_warning(
1378 'Your Python is broken! Update to a newer and supported version')
1379
c6afed48
PH
1380 stdout_encoding = getattr(
1381 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1382 encoding_str = (
734f90bb
PH
1383 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1384 locale.getpreferredencoding(),
1385 sys.getfilesystemencoding(),
c6afed48 1386 stdout_encoding,
b0472057 1387 self.get_encoding()))
4192b51c 1388 write_string(encoding_str, encoding=None)
734f90bb
PH
1389
1390 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1391 try:
1392 sp = subprocess.Popen(
1393 ['git', 'rev-parse', '--short', 'HEAD'],
1394 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1395 cwd=os.path.dirname(os.path.abspath(__file__)))
1396 out, err = sp.communicate()
1397 out = out.decode().strip()
1398 if re.match('[0-9a-f]+', out):
734f90bb 1399 self._write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1400 except:
1401 try:
1402 sys.exc_clear()
1403 except:
1404 pass
d28b5171
PH
1405 self._write_string('[debug] Python version %s - %s\n' % (
1406 platform.python_version(), platform_name()))
1407
1408 exe_versions = FFmpegPostProcessor.get_versions()
4c83c967 1409 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1410 exe_str = ', '.join(
1411 '%s %s' % (exe, v)
1412 for exe, v in sorted(exe_versions.items())
1413 if v
1414 )
1415 if not exe_str:
1416 exe_str = 'none'
1417 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1418
1419 proxy_map = {}
1420 for handler in self._opener.handlers:
1421 if hasattr(handler, 'proxies'):
1422 proxy_map.update(handler.proxies)
734f90bb 1423 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1424
e344693b 1425 def _setup_opener(self):
6ad14cab 1426 timeout_val = self.params.get('socket_timeout')
19a41fc6 1427 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1428
dca08720
PH
1429 opts_cookiefile = self.params.get('cookiefile')
1430 opts_proxy = self.params.get('proxy')
1431
1432 if opts_cookiefile is None:
1433 self.cookiejar = compat_cookiejar.CookieJar()
1434 else:
1435 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1436 opts_cookiefile)
1437 if os.access(opts_cookiefile, os.R_OK):
1438 self.cookiejar.load()
1439
1440 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1441 self.cookiejar)
1442 if opts_proxy is not None:
1443 if opts_proxy == '':
1444 proxies = {}
1445 else:
1446 proxies = {'http': opts_proxy, 'https': opts_proxy}
1447 else:
1448 proxies = compat_urllib_request.getproxies()
1449 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1450 if 'http' in proxies and 'https' not in proxies:
1451 proxies['https'] = proxies['http']
1452 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1453
1454 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
dca08720 1455 https_handler = make_HTTPS_handler(
a0ddb8a2
PH
1456 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1457 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
dca08720 1458 opener = compat_urllib_request.build_opener(
a0ddb8a2 1459 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1460 # Delete the default user-agent header, which would otherwise apply in
1461 # cases where our custom HTTP handler doesn't come into play
1462 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1463 opener.addheaders = []
1464 self._opener = opener
62fec3b2
PH
1465
1466 def encode(self, s):
1467 if isinstance(s, bytes):
1468 return s # Already encoded
1469
1470 try:
1471 return s.encode(self.get_encoding())
1472 except UnicodeEncodeError as err:
1473 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1474 raise
1475
1476 def get_encoding(self):
1477 encoding = self.params.get('encoding')
1478 if encoding is None:
1479 encoding = preferredencoding()
1480 return encoding