]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[xuite] Fix _VALID_URL
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
b82f815f 10import itertools
8694c600 11import json
62fec3b2 12import locale
083c9df9 13import operator
8222d8de 14import os
dca08720 15import platform
8222d8de
JMF
16import re
17import shutil
dca08720 18import subprocess
8222d8de
JMF
19import socket
20import sys
21import time
22import traceback
23
1e5b9a95
PH
24if os.name == 'nt':
25 import ctypes
26
8c25f81b 27from .compat import (
dca08720 28 compat_cookiejar,
4644ac55 29 compat_expanduser,
ce02ed60 30 compat_http_client,
4f026faf 31 compat_kwargs,
ce02ed60
PH
32 compat_str,
33 compat_urllib_error,
34 compat_urllib_request,
8c25f81b
PH
35)
36from .utils import (
d05cfe06 37 escape_url,
ce02ed60
PH
38 ContentTooShortError,
39 date_from_str,
40 DateRange,
acd69589 41 DEFAULT_OUTTMPL,
ce02ed60
PH
42 determine_ext,
43 DownloadError,
44 encodeFilename,
45 ExtractorError,
02dbf93f 46 format_bytes,
525ef922 47 formatSeconds,
1c088fa8 48 get_term_width,
ce02ed60 49 locked_file,
dca08720 50 make_HTTPS_handler,
ce02ed60 51 MaxDownloadsReached,
b7ab0590 52 PagedList,
083c9df9 53 parse_filesize,
ce02ed60 54 PostProcessingError,
dca08720 55 platform_name,
ce02ed60 56 preferredencoding,
cfb56d1a 57 render_table,
ce02ed60
PH
58 SameFileError,
59 sanitize_filename,
e5660ee6 60 std_headers,
ce02ed60
PH
61 subtitles_filename,
62 takewhile_inclusive,
63 UnavailableVideoError,
29eb5174 64 url_basename,
58b1f00d 65 version_tuple,
ce02ed60
PH
66 write_json_file,
67 write_string,
dca08720 68 YoutubeDLHandler,
6350728b 69 prepend_extension,
7d4111ed 70 args_to_str,
05900629 71 age_restricted,
ce02ed60 72)
a0e07d31 73from .cache import Cache
023fa8c4 74from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 75from .downloader import get_suitable_downloader
4c83c967 76from .downloader.rtmp import rtmpdump_version
4f026faf 77from .postprocessor import (
62cd676c 78 FFmpegFixupM4aPP,
6271f1ca 79 FFmpegFixupStretchedPP,
4f026faf
PH
80 FFmpegMergerPP,
81 FFmpegPostProcessor,
82 get_postprocessor,
83)
dca08720 84from .version import __version__
8222d8de
JMF
85
86
87class YoutubeDL(object):
88 """YoutubeDL class.
89
90 YoutubeDL objects are the ones responsible of downloading the
91 actual video file and writing it to disk if the user has requested
92 it, among some other tasks. In most cases there should be one per
93 program. As, given a video URL, the downloader doesn't know how to
94 extract all the needed information, task that InfoExtractors do, it
95 has to pass the URL to one of them.
96
97 For this, YoutubeDL objects have a method that allows
98 InfoExtractors to be registered in a given order. When it is passed
99 a URL, the YoutubeDL object handles it to the first InfoExtractor it
100 finds that reports being able to handle it. The InfoExtractor extracts
101 all the information about the video or videos the URL refers to, and
102 YoutubeDL process the extracted information, possibly using a File
103 Downloader to download the video.
104
105 YoutubeDL objects accept a lot of parameters. In order not to saturate
106 the object constructor with arguments, it receives a dictionary of
107 options instead. These options are available through the params
108 attribute for the InfoExtractors to use. The YoutubeDL also
109 registers itself as the downloader in charge for the InfoExtractors
110 that are added to it, so this is a "mutual registration".
111
112 Available options:
113
114 username: Username for authentication purposes.
115 password: Password for authentication purposes.
c6c19746 116 videopassword: Password for acces a video.
8222d8de
JMF
117 usenetrc: Use netrc for authentication instead.
118 verbose: Print additional info to stdout.
119 quiet: Do not print messages to stdout.
ad8915b7 120 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
121 forceurl: Force printing final URL.
122 forcetitle: Force printing title.
123 forceid: Force printing ID.
124 forcethumbnail: Force printing thumbnail URL.
125 forcedescription: Force printing description.
126 forcefilename: Force printing final filename.
525ef922 127 forceduration: Force printing duration.
8694c600 128 forcejson: Force printing info_dict as JSON.
63e0be34
PH
129 dump_single_json: Force printing the info_dict of the whole playlist
130 (or video) as a single JSON line.
8222d8de 131 simulate: Do not download the video files.
d8600787 132 format: Video format code. See options.py for more information.
8222d8de
JMF
133 format_limit: Highest quality format to try.
134 outtmpl: Template for output names.
135 restrictfilenames: Do not allow "&" and spaces in file names
136 ignoreerrors: Do not stop on download errors.
137 nooverwrites: Prevent overwriting files.
138 playliststart: Playlist item to start at.
139 playlistend: Playlist item to end at.
c14e88f0 140 playlist_items: Specific indices of playlist to download.
ff815fe6 141 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
142 matchtitle: Download only matching titles.
143 rejecttitle: Reject downloads for matching titles.
8bf9319e 144 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
145 logtostderr: Log messages to stderr instead of stdout.
146 writedescription: Write the video description to a .description file
147 writeinfojson: Write the video description to a .info.json file
1fb07d10 148 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 149 writethumbnail: Write the thumbnail image to a file
ec82d85a 150 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 151 writesubtitles: Write the video subtitles to a file
b004821f 152 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 153 allsubtitles: Downloads all the subtitles of the video
0b7f3118 154 (requires writesubtitles or writeautomaticsub)
8222d8de 155 listsubtitles: Lists all available subtitles for the video
b98a6b2f 156 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 157 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
158 keepvideo: Keep the video file after post-processing
159 daterange: A DateRange object, download only if the upload_date is in the range.
160 skip_download: Skip the actual download of the video file
c35f9e72 161 cachedir: Location of the cache files in the filesystem.
a0e07d31 162 False to disable filesystem cache.
47192f92 163 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
164 age_limit: An integer representing the user's age in years.
165 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
166 min_views: An integer representing the minimum view count the video
167 must have in order to not be skipped.
168 Videos without view count information are always
169 downloaded. None for no limit.
170 max_views: An integer representing the maximum view count.
171 Videos that are more popular than that are not
172 downloaded.
173 Videos without view count information are always
174 downloaded. None for no limit.
175 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
176 Videos already present in the file are not downloaded
177 again.
dca08720 178 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 179 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
180 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
181 At the moment, this is only supported by YouTube.
a1ee09e8 182 proxy: URL of the proxy server to use
e344693b 183 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
184 bidi_workaround: Work around buggy terminals without bidirectional text
185 support, using fridibi
a0ddb8a2 186 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 187 include_ads: Download ads as well
04b4d394
PH
188 default_search: Prepend this string if an input url is not valid.
189 'auto' for elaborate guessing
62fec3b2 190 encoding: Use this encoding instead of the system-specified.
e8ee972c 191 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
192 Pass in 'in_playlist' to only show this behavior for
193 playlist items.
4f026faf 194 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
195 * key: The name of the postprocessor. See
196 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
197 as well as any further keyword arguments for the
198 postprocessor.
71b640cc
PH
199 progress_hooks: A list of functions that get called on download
200 progress, with a dictionary with the entries
ee69b99a
PH
201 * status: One of "downloading" and "finished".
202 Check this first and ignore unknown values.
71b640cc 203
ee69b99a
PH
204 If status is one of "downloading" or "finished", the
205 following properties may also be present:
206 * filename: The final filename (always present)
71b640cc
PH
207 * downloaded_bytes: Bytes on disk
208 * total_bytes: Size of the whole file, None if unknown
209 * tmpfilename: The filename we're currently writing to
210 * eta: The estimated time in seconds, None if unknown
211 * speed: The download speed in bytes/second, None if
212 unknown
213
214 Progress hooks are guaranteed to be called at least once
215 (with status "finished") if the download is successful.
45598f15 216 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
217 fixup: Automatically correct known faults of the file.
218 One of:
219 - "never": do nothing
220 - "warn": only emit a warning
221 - "detect_or_warn": check whether we can do anything
62cd676c 222 about it, warn otherwise (default)
be4a824d 223 source_address: (Experimental) Client-side IP address to bind to.
8bfa7545
PH
224 call_home: Boolean, true iff we are allowed to contact the
225 youtube-dl servers for debugging.
5f0d813d 226 sleep_interval: Number of seconds to sleep before each download.
222516d9 227 external_downloader: Executable of the external downloader to call.
cfb56d1a
PH
228 listformats: Print an overview of available video formats and exit.
229 list_thumbnails: Print a table of all thumbnails and exit.
71b640cc 230
fe7e0c98 231
8222d8de
JMF
232 The following parameters are not used by YoutubeDL itself, they are used by
233 the FileDownloader:
234 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f
PH
235 noresizebuffer, retries, continuedl, noprogress, consoletitle,
236 xattr_set_filesize.
76b1bd67
JMF
237
238 The following options are used by the post processors:
239 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
240 otherwise prefer avconv.
8d31fa3c 241 exec_cmd: Arbitrary command to run after downloading
8222d8de
JMF
242 """
243
244 params = None
245 _ies = []
246 _pps = []
247 _download_retcode = None
248 _num_downloads = None
249 _screen_file = None
250
3511266b 251 def __init__(self, params=None, auto_init=True):
8222d8de 252 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
253 if params is None:
254 params = {}
8222d8de 255 self._ies = []
56c73665 256 self._ies_instances = {}
8222d8de 257 self._pps = []
933605d7 258 self._progress_hooks = []
8222d8de
JMF
259 self._download_retcode = 0
260 self._num_downloads = 0
261 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 262 self._err_file = sys.stderr
e9f9a10f 263 self.params = params
a0e07d31 264 self.cache = Cache(self)
34308b30 265
0783b09b 266 if params.get('bidi_workaround', False):
1c088fa8
PH
267 try:
268 import pty
269 master, slave = pty.openpty()
270 width = get_term_width()
271 if width is None:
272 width_args = []
273 else:
274 width_args = ['-w', str(width)]
5d681e96 275 sp_kwargs = dict(
1c088fa8
PH
276 stdin=subprocess.PIPE,
277 stdout=slave,
278 stderr=self._err_file)
5d681e96
PH
279 try:
280 self._output_process = subprocess.Popen(
281 ['bidiv'] + width_args, **sp_kwargs
282 )
283 except OSError:
5d681e96
PH
284 self._output_process = subprocess.Popen(
285 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
286 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
287 except OSError as ose:
288 if ose.errno == 2:
6febd1c1 289 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
290 else:
291 raise
0783b09b 292
34308b30
PH
293 if (sys.version_info >= (3,) and sys.platform != 'win32' and
294 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
53d9009b 295 and not params.get('restrictfilenames', False)):
34308b30
PH
296 # On Python 3, the Unicode filesystem API will throw errors (#1474)
297 self.report_warning(
6febd1c1 298 'Assuming --restrict-filenames since file system encoding '
1b725173 299 'cannot encode all characters. '
6febd1c1 300 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 301 self.params['restrictfilenames'] = True
34308b30 302
a3927cf7 303 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 304 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 305
dca08720
PH
306 self._setup_opener()
307
3511266b
PH
308 if auto_init:
309 self.print_debug_header()
310 self.add_default_info_extractors()
311
4f026faf
PH
312 for pp_def_raw in self.params.get('postprocessors', []):
313 pp_class = get_postprocessor(pp_def_raw['key'])
314 pp_def = dict(pp_def_raw)
315 del pp_def['key']
316 pp = pp_class(self, **compat_kwargs(pp_def))
317 self.add_post_processor(pp)
318
71b640cc
PH
319 for ph in self.params.get('progress_hooks', []):
320 self.add_progress_hook(ph)
321
7d4111ed
PH
322 def warn_if_short_id(self, argv):
323 # short YouTube ID starting with dash?
324 idxs = [
325 i for i, a in enumerate(argv)
326 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
327 if idxs:
328 correct_argv = (
329 ['youtube-dl'] +
330 [a for i, a in enumerate(argv) if i not in idxs] +
331 ['--'] + [argv[i] for i in idxs]
332 )
333 self.report_warning(
334 'Long argument string detected. '
335 'Use -- to separate parameters and URLs, like this:\n%s\n' %
336 args_to_str(correct_argv))
337
8222d8de
JMF
338 def add_info_extractor(self, ie):
339 """Add an InfoExtractor object to the end of the list."""
340 self._ies.append(ie)
56c73665 341 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
342 ie.set_downloader(self)
343
56c73665
JMF
344 def get_info_extractor(self, ie_key):
345 """
346 Get an instance of an IE with name ie_key, it will try to get one from
347 the _ies list, if there's no instance it will create a new one and add
348 it to the extractor list.
349 """
350 ie = self._ies_instances.get(ie_key)
351 if ie is None:
352 ie = get_info_extractor(ie_key)()
353 self.add_info_extractor(ie)
354 return ie
355
023fa8c4
JMF
356 def add_default_info_extractors(self):
357 """
358 Add the InfoExtractors returned by gen_extractors to the end of the list
359 """
360 for ie in gen_extractors():
361 self.add_info_extractor(ie)
362
8222d8de
JMF
363 def add_post_processor(self, pp):
364 """Add a PostProcessor object to the end of the chain."""
365 self._pps.append(pp)
366 pp.set_downloader(self)
367
933605d7
JMF
368 def add_progress_hook(self, ph):
369 """Add the progress hook (currently only for the file downloader)"""
370 self._progress_hooks.append(ph)
8ab470f1 371
1c088fa8 372 def _bidi_workaround(self, message):
5d681e96 373 if not hasattr(self, '_output_channel'):
1c088fa8
PH
374 return message
375
5d681e96 376 assert hasattr(self, '_output_process')
11b85ce6 377 assert isinstance(message, compat_str)
6febd1c1
PH
378 line_count = message.count('\n') + 1
379 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 380 self._output_process.stdin.flush()
6febd1c1 381 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 382 for _ in range(line_count))
6febd1c1 383 return res[:-len('\n')]
1c088fa8 384
8222d8de 385 def to_screen(self, message, skip_eol=False):
0783b09b
PH
386 """Print message to stdout if not in quiet mode."""
387 return self.to_stdout(message, skip_eol, check_quiet=True)
388
734f90bb 389 def _write_string(self, s, out=None):
b58ddb32 390 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 391
0783b09b 392 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 393 """Print message to stdout if not in quiet mode."""
8bf9319e 394 if self.params.get('logger'):
43afe285 395 self.params['logger'].debug(message)
0783b09b 396 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 397 message = self._bidi_workaround(message)
6febd1c1 398 terminator = ['\n', ''][skip_eol]
8222d8de 399 output = message + terminator
1c088fa8 400
734f90bb 401 self._write_string(output, self._screen_file)
8222d8de
JMF
402
403 def to_stderr(self, message):
404 """Print message to stderr."""
11b85ce6 405 assert isinstance(message, compat_str)
8bf9319e 406 if self.params.get('logger'):
43afe285
IB
407 self.params['logger'].error(message)
408 else:
1c088fa8 409 message = self._bidi_workaround(message)
6febd1c1 410 output = message + '\n'
734f90bb 411 self._write_string(output, self._err_file)
8222d8de 412
1e5b9a95
PH
413 def to_console_title(self, message):
414 if not self.params.get('consoletitle', False):
415 return
416 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
417 # c_wchar_p() might not be necessary if `message` is
418 # already of type unicode()
419 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
420 elif 'TERM' in os.environ:
734f90bb 421 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 422
bdde425c
PH
423 def save_console_title(self):
424 if not self.params.get('consoletitle', False):
425 return
426 if 'TERM' in os.environ:
efd6c574 427 # Save the title on stack
734f90bb 428 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
429
430 def restore_console_title(self):
431 if not self.params.get('consoletitle', False):
432 return
433 if 'TERM' in os.environ:
efd6c574 434 # Restore the title from stack
734f90bb 435 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
436
437 def __enter__(self):
438 self.save_console_title()
439 return self
440
441 def __exit__(self, *args):
442 self.restore_console_title()
f89197d7 443
dca08720
PH
444 if self.params.get('cookiefile') is not None:
445 self.cookiejar.save()
bdde425c 446
8222d8de
JMF
447 def trouble(self, message=None, tb=None):
448 """Determine action to take when a download problem appears.
449
450 Depending on if the downloader has been configured to ignore
451 download errors or not, this method may throw an exception or
452 not when errors are found, after printing the message.
453
454 tb, if given, is additional traceback information.
455 """
456 if message is not None:
457 self.to_stderr(message)
458 if self.params.get('verbose'):
459 if tb is None:
460 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 461 tb = ''
8222d8de 462 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 463 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
464 tb += compat_str(traceback.format_exc())
465 else:
466 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 467 tb = ''.join(tb_data)
8222d8de
JMF
468 self.to_stderr(tb)
469 if not self.params.get('ignoreerrors', False):
470 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
471 exc_info = sys.exc_info()[1].exc_info
472 else:
473 exc_info = sys.exc_info()
474 raise DownloadError(message, exc_info)
475 self._download_retcode = 1
476
477 def report_warning(self, message):
478 '''
479 Print the message to stderr, it will be prefixed with 'WARNING:'
480 If stderr is a tty file the 'WARNING:' will be colored
481 '''
6d07ce01
JMF
482 if self.params.get('logger') is not None:
483 self.params['logger'].warning(message)
8222d8de 484 else:
ad8915b7
PH
485 if self.params.get('no_warnings'):
486 return
6d07ce01
JMF
487 if self._err_file.isatty() and os.name != 'nt':
488 _msg_header = '\033[0;33mWARNING:\033[0m'
489 else:
490 _msg_header = 'WARNING:'
491 warning_message = '%s %s' % (_msg_header, message)
492 self.to_stderr(warning_message)
8222d8de
JMF
493
494 def report_error(self, message, tb=None):
495 '''
496 Do the same as trouble, but prefixes the message with 'ERROR:', colored
497 in red if stderr is a tty file.
498 '''
0783b09b 499 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 500 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 501 else:
6febd1c1
PH
502 _msg_header = 'ERROR:'
503 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
504 self.trouble(error_message, tb)
505
8222d8de
JMF
506 def report_file_already_downloaded(self, file_name):
507 """Report file has already been fully downloaded."""
508 try:
6febd1c1 509 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 510 except UnicodeEncodeError:
6febd1c1 511 self.to_screen('[download] The file has already been downloaded')
8222d8de 512
8222d8de
JMF
513 def prepare_filename(self, info_dict):
514 """Generate the output filename."""
515 try:
516 template_dict = dict(info_dict)
517
518 template_dict['epoch'] = int(time.time())
519 autonumber_size = self.params.get('autonumber_size')
520 if autonumber_size is None:
521 autonumber_size = 5
6febd1c1 522 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 523 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 524 if template_dict.get('playlist_index') is not None:
c6b4132a 525 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
526 if template_dict.get('resolution') is None:
527 if template_dict.get('width') and template_dict.get('height'):
528 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
529 elif template_dict.get('height'):
805ef3c6 530 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 531 elif template_dict.get('width'):
805ef3c6 532 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 533
586a91b6 534 sanitize = lambda k, v: sanitize_filename(
45598aab 535 compat_str(v),
8222d8de 536 restricted=self.params.get('restrictfilenames'),
6febd1c1 537 is_id=(k == 'id'))
586a91b6 538 template_dict = dict((k, sanitize(k, v))
45598aab
PH
539 for k, v in template_dict.items()
540 if v is not None)
6febd1c1 541 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 542
acd69589 543 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 544 tmpl = compat_expanduser(outtmpl)
586a91b6 545 filename = tmpl % template_dict
3a0d2f52
S
546 # Temporary fix for #4787
547 # 'Treat' all problem characters by passing filename through preferredencoding
548 # to workaround encoding issues with subprocess on python2 @ Windows
549 if sys.version_info < (3, 0) and sys.platform == 'win32':
550 filename = encodeFilename(filename, True).decode(preferredencoding())
8222d8de 551 return filename
8222d8de 552 except ValueError as err:
6febd1c1 553 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
554 return None
555
556 def _match_entry(self, info_dict):
557 """ Returns None iff the file should be downloaded """
558
6febd1c1 559 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
560 if 'title' in info_dict:
561 # This can happen when we're just evaluating the playlist
562 title = info_dict['title']
563 matchtitle = self.params.get('matchtitle', False)
564 if matchtitle:
565 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 566 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
567 rejecttitle = self.params.get('rejecttitle', False)
568 if rejecttitle:
569 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 570 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
571 date = info_dict.get('upload_date', None)
572 if date is not None:
573 dateRange = self.params.get('daterange', DateRange())
574 if date not in dateRange:
6febd1c1 575 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
576 view_count = info_dict.get('view_count', None)
577 if view_count is not None:
578 min_views = self.params.get('min_views')
579 if min_views is not None and view_count < min_views:
6febd1c1 580 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
581 max_views = self.params.get('max_views')
582 if max_views is not None and view_count > max_views:
6febd1c1 583 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629
PH
584 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
585 return 'Skipping "%s" because it is age restricted' % title
c1c9a79c 586 if self.in_download_archive(info_dict):
6febd1c1 587 return '%s has already been recorded in archive' % video_title
8222d8de 588 return None
fe7e0c98 589
b6c45014
JMF
590 @staticmethod
591 def add_extra_info(info_dict, extra_info):
592 '''Set the keys from extra_info in info dict if they are missing'''
593 for key, value in extra_info.items():
594 info_dict.setdefault(key, value)
595
7fc3fa05
PH
596 def extract_info(self, url, download=True, ie_key=None, extra_info={},
597 process=True):
8222d8de
JMF
598 '''
599 Returns a list with a dictionary for each video we find.
600 If 'download', also downloads the videos.
601 extra_info is a dict containing the extra values to add to each result
602 '''
fe7e0c98 603
8222d8de 604 if ie_key:
56c73665 605 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
606 else:
607 ies = self._ies
608
609 for ie in ies:
610 if not ie.suitable(url):
611 continue
612
613 if not ie.working():
6febd1c1
PH
614 self.report_warning('The program functionality for this site has been marked as broken, '
615 'and will probably not work.')
8222d8de
JMF
616
617 try:
618 ie_result = ie.extract(url)
5f6a1245 619 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
620 break
621 if isinstance(ie_result, list):
622 # Backwards compatibility: old IE result format
8222d8de
JMF
623 ie_result = {
624 '_type': 'compat_list',
625 'entries': ie_result,
626 }
ea38e55f 627 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
628 if process:
629 return self.process_ie_result(ie_result, download, extra_info)
630 else:
631 return ie_result
5f6a1245 632 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
633 self.report_error(compat_str(de), de.format_traceback())
634 break
d3e5bbf4
PH
635 except MaxDownloadsReached:
636 raise
8222d8de
JMF
637 except Exception as e:
638 if self.params.get('ignoreerrors', False):
639 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
640 break
641 else:
642 raise
643 else:
1a489545 644 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 645
ea38e55f
PH
646 def add_default_extra_info(self, ie_result, ie, url):
647 self.add_extra_info(ie_result, {
648 'extractor': ie.IE_NAME,
649 'webpage_url': url,
650 'webpage_url_basename': url_basename(url),
651 'extractor_key': ie.ie_key(),
652 })
653
8222d8de
JMF
654 def process_ie_result(self, ie_result, download=True, extra_info={}):
655 """
656 Take the result of the ie(may be modified) and resolve all unresolved
657 references (URLs, playlist items).
658
659 It will also download the videos if 'download'.
660 Returns the resolved ie_result.
661 """
662
e8ee972c
PH
663 result_type = ie_result.get('_type', 'video')
664
057a5206
PH
665 if result_type in ('url', 'url_transparent'):
666 extract_flat = self.params.get('extract_flat', False)
667 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
668 extract_flat is True):
057a5206
PH
669 if self.params.get('forcejson', False):
670 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
671 return ie_result
672
8222d8de 673 if result_type == 'video':
b6c45014 674 self.add_extra_info(ie_result, extra_info)
feee2ecf 675 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
676 elif result_type == 'url':
677 # We have to add extra_info to the results because it may be
678 # contained in a playlist
679 return self.extract_info(ie_result['url'],
680 download,
681 ie_key=ie_result.get('ie_key'),
682 extra_info=extra_info)
7fc3fa05
PH
683 elif result_type == 'url_transparent':
684 # Use the information from the embedding page
685 info = self.extract_info(
686 ie_result['url'], ie_key=ie_result.get('ie_key'),
687 extra_info=extra_info, download=False, process=False)
688
412c617d
PH
689 force_properties = dict(
690 (k, v) for k, v in ie_result.items() if v is not None)
691 for f in ('_type', 'url'):
692 if f in force_properties:
693 del force_properties[f]
694 new_result = info.copy()
695 new_result.update(force_properties)
7fc3fa05
PH
696
697 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
698
699 return self.process_ie_result(
700 new_result, download=download, extra_info=extra_info)
42e12102 701 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
702 # We process each entry in the playlist
703 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 704 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
705
706 playlist_results = []
707
8222d8de 708 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
709 playlistend = self.params.get('playlistend', None)
710 # For backwards compatibility, interpret -1 as whole list
8222d8de 711 if playlistend == -1:
a19fd00c 712 playlistend = None
8222d8de 713
c14e88f0
PH
714 playlistitems_str = self.params.get('playlist_items', None)
715 playlistitems = None
716 if playlistitems_str is not None:
717 def iter_playlistitems(format):
718 for string_segment in format.split(','):
719 if '-' in string_segment:
720 start, end = string_segment.split('-')
721 for item in range(int(start), int(end) + 1):
722 yield int(item)
723 else:
724 yield int(string_segment)
725 playlistitems = iter_playlistitems(playlistitems_str)
726
b82f815f
PH
727 ie_entries = ie_result['entries']
728 if isinstance(ie_entries, list):
729 n_all_entries = len(ie_entries)
c14e88f0
PH
730 if playlistitems:
731 entries = [ie_entries[i - 1] for i in playlistitems]
732 else:
733 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
734 n_entries = len(entries)
735 self.to_screen(
736 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
737 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 738 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
739 if playlistitems:
740 entries = []
741 for item in playlistitems:
742 entries.extend(ie_entries.getslice(
743 item - 1, item
744 ))
745 else:
746 entries = ie_entries.getslice(
747 playliststart, playlistend)
b7ab0590
PH
748 n_entries = len(entries)
749 self.to_screen(
750 "[%s] playlist %s: Downloading %d videos" %
751 (ie_result['extractor'], playlist, n_entries))
b82f815f 752 else: # iterable
c14e88f0
PH
753 if playlistitems:
754 entry_list = list(ie_entries)
755 entries = [entry_list[i - 1] for i in playlistitems]
756 else:
757 entries = list(itertools.islice(
758 ie_entries, playliststart, playlistend))
b82f815f
PH
759 n_entries = len(entries)
760 self.to_screen(
761 "[%s] playlist %s: Downloading %d videos" %
762 (ie_result['extractor'], playlist, n_entries))
8222d8de 763
ff815fe6
MS
764 if self.params.get('playlistreverse', False):
765 entries = entries[::-1]
766
fe7e0c98 767 for i, entry in enumerate(entries, 1):
734ea11e 768 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 769 extra = {
c6b4132a 770 'n_entries': n_entries,
fe7e0c98 771 'playlist': playlist,
a1cf99d0
PH
772 'playlist_id': ie_result.get('id'),
773 'playlist_title': ie_result.get('title'),
fe7e0c98 774 'playlist_index': i + playliststart,
b6c45014 775 'extractor': ie_result['extractor'],
9103bbc5 776 'webpage_url': ie_result['webpage_url'],
29eb5174 777 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 778 'extractor_key': ie_result['extractor_key'],
fe7e0c98 779 }
7012b23c
PH
780
781 reason = self._match_entry(entry)
782 if reason is not None:
6febd1c1 783 self.to_screen('[download] ' + reason)
7012b23c
PH
784 continue
785
8222d8de
JMF
786 entry_result = self.process_ie_result(entry,
787 download=download,
788 extra_info=extra)
789 playlist_results.append(entry_result)
790 ie_result['entries'] = playlist_results
791 return ie_result
792 elif result_type == 'compat_list':
c9bf4114
PH
793 self.report_warning(
794 'Extractor %s returned a compat_list result. '
795 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 796
8222d8de 797 def _fixup(r):
9e1a5b84
JW
798 self.add_extra_info(
799 r,
9103bbc5
JMF
800 {
801 'extractor': ie_result['extractor'],
802 'webpage_url': ie_result['webpage_url'],
29eb5174 803 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 804 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
805 }
806 )
8222d8de
JMF
807 return r
808 ie_result['entries'] = [
b6c45014 809 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
810 for r in ie_result['entries']
811 ]
812 return ie_result
813 else:
814 raise Exception('Invalid result type: %s' % result_type)
815
083c9df9
PH
816 def _apply_format_filter(self, format_spec, available_formats):
817 " Returns a tuple of the remaining format_spec and filtered formats "
818
819 OPERATORS = {
820 '<': operator.lt,
821 '<=': operator.le,
822 '>': operator.gt,
823 '>=': operator.ge,
824 '=': operator.eq,
825 '!=': operator.ne,
826 }
827 operator_rex = re.compile(r'''(?x)\s*\[
9f0df77a 828 (?P<key>width|height|tbr|abr|vbr|filesize|fps)
083c9df9
PH
829 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
830 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
831 \]$
832 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
833 m = operator_rex.search(format_spec)
834 if not m:
835 raise ValueError('Invalid format specification %r' % format_spec)
836
837 try:
838 comparison_value = int(m.group('value'))
839 except ValueError:
840 comparison_value = parse_filesize(m.group('value'))
841 if comparison_value is None:
842 comparison_value = parse_filesize(m.group('value') + 'B')
843 if comparison_value is None:
844 raise ValueError(
845 'Invalid value %r in format specification %r' % (
846 m.group('value'), format_spec))
847 op = OPERATORS[m.group('op')]
848
849 def _filter(f):
850 actual_value = f.get(m.group('key'))
851 if actual_value is None:
852 return m.group('none_inclusive')
853 return op(actual_value, comparison_value)
854 new_formats = [f for f in available_formats if _filter(f)]
855
856 new_format_spec = format_spec[:-len(m.group(0))]
857 if not new_format_spec:
858 new_format_spec = 'best'
859
860 return (new_format_spec, new_formats)
861
a9c58ad9 862 def select_format(self, format_spec, available_formats):
083c9df9
PH
863 while format_spec.endswith(']'):
864 format_spec, available_formats = self._apply_format_filter(
865 format_spec, available_formats)
866 if not available_formats:
867 return None
868
a9c58ad9
JMF
869 if format_spec == 'best' or format_spec is None:
870 return available_formats[-1]
871 elif format_spec == 'worst':
872 return available_formats[0]
ba7678f9
PH
873 elif format_spec == 'bestaudio':
874 audio_formats = [
875 f for f in available_formats
876 if f.get('vcodec') == 'none']
877 if audio_formats:
878 return audio_formats[-1]
879 elif format_spec == 'worstaudio':
880 audio_formats = [
881 f for f in available_formats
882 if f.get('vcodec') == 'none']
883 if audio_formats:
884 return audio_formats[0]
bc6d5978
JMF
885 elif format_spec == 'bestvideo':
886 video_formats = [
887 f for f in available_formats
888 if f.get('acodec') == 'none']
889 if video_formats:
890 return video_formats[-1]
891 elif format_spec == 'worstvideo':
892 video_formats = [
893 f for f in available_formats
894 if f.get('acodec') == 'none']
895 if video_formats:
896 return video_formats[0]
a9c58ad9 897 else:
0217c783 898 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
49e86983
JMF
899 if format_spec in extensions:
900 filter_f = lambda f: f['ext'] == format_spec
901 else:
902 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 903 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
904 if matches:
905 return matches[-1]
906 return None
907
e5660ee6
JMF
908 def _calc_headers(self, info_dict):
909 res = std_headers.copy()
910
911 add_headers = info_dict.get('http_headers')
912 if add_headers:
913 res.update(add_headers)
914
915 cookies = self._calc_cookies(info_dict)
916 if cookies:
917 res['Cookie'] = cookies
918
919 return res
920
921 def _calc_cookies(self, info_dict):
922 class _PseudoRequest(object):
923 def __init__(self, url):
924 self.url = url
925 self.headers = {}
926 self.unverifiable = False
927
928 def add_unredirected_header(self, k, v):
929 self.headers[k] = v
930
931 def get_full_url(self):
932 return self.url
1070711d
JMF
933
934 def is_unverifiable(self):
935 return self.unverifiable
e5660ee6 936
4b405cfc
JMF
937 def has_header(self, h):
938 return h in self.headers
939
e5660ee6
JMF
940 pr = _PseudoRequest(info_dict['url'])
941 self.cookiejar.add_cookie_header(pr)
942 return pr.headers.get('Cookie')
943
dd82ffea
JMF
944 def process_video_result(self, info_dict, download=True):
945 assert info_dict.get('_type', 'video') == 'video'
946
bec1fad2
PH
947 if 'id' not in info_dict:
948 raise ExtractorError('Missing "id" field in extractor result')
949 if 'title' not in info_dict:
950 raise ExtractorError('Missing "title" field in extractor result')
951
dd82ffea
JMF
952 if 'playlist' not in info_dict:
953 # It isn't part of a playlist
954 info_dict['playlist'] = None
955 info_dict['playlist_index'] = None
956
d5519808 957 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
958 if thumbnails is None:
959 thumbnail = info_dict.get('thumbnail')
960 if thumbnail:
961 thumbnails = [{'url': thumbnail}]
d5519808 962 if thumbnails:
be6d7229 963 thumbnails.sort(key=lambda t: (
cfb56d1a
PH
964 t.get('preference'), t.get('width'), t.get('height'),
965 t.get('id'), t.get('url')))
d5519808
PH
966 for t in thumbnails:
967 if 'width' in t and 'height' in t:
968 t['resolution'] = '%dx%d' % (t['width'], t['height'])
969
970 if thumbnails and 'thumbnail' not in info_dict:
971 info_dict['thumbnail'] = thumbnails[-1]['url']
972
c9ae7b95 973 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
974 info_dict['display_id'] = info_dict['id']
975
955c4514 976 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
706d7d4e
S
977 # Working around negative timestamps in Windows
978 # (see http://bugs.python.org/issue1646728)
979 if info_dict['timestamp'] < 0 and os.name == 'nt':
980 info_dict['timestamp'] = 0
9d2ecdbc 981 upload_date = datetime.datetime.utcfromtimestamp(
955c4514 982 info_dict['timestamp'])
9d2ecdbc
PH
983 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
984
6ff000b8 985 # This extractors handle format selection themselves
6febd1c1 986 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
987 if download:
988 self.process_info(info_dict)
6ff000b8
JMF
989 return info_dict
990
dd82ffea
JMF
991 # We now pick which formats have to be downloaded
992 if info_dict.get('formats') is None:
993 # There's only one format available
994 formats = [info_dict]
995 else:
996 formats = info_dict['formats']
997
db95dc13
PH
998 if not formats:
999 raise ExtractorError('No video formats found!')
1000
dd82ffea 1001 # We check that all the formats have the format and format_id fields
db95dc13 1002 for i, format in enumerate(formats):
bec1fad2
PH
1003 if 'url' not in format:
1004 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1005
dd82ffea 1006 if format.get('format_id') is None:
8016c922 1007 format['format_id'] = compat_str(i)
8c51aa65 1008 if format.get('format') is None:
6febd1c1 1009 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1010 id=format['format_id'],
1011 res=self.format_resolution(format),
6febd1c1 1012 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1013 )
c1002e96
PH
1014 # Automatically determine file extension if missing
1015 if 'ext' not in format:
cce929ea 1016 format['ext'] = determine_ext(format['url']).lower()
e5660ee6
JMF
1017 # Add HTTP headers, so that external programs can use them from the
1018 # json output
1019 full_format_info = info_dict.copy()
1020 full_format_info.update(format)
1021 format['http_headers'] = self._calc_headers(full_format_info)
dd82ffea 1022
99e206d5
JMF
1023 format_limit = self.params.get('format_limit', None)
1024 if format_limit:
f4d96df0
PH
1025 formats = list(takewhile_inclusive(
1026 lambda f: f['format_id'] != format_limit, formats
1027 ))
4bcc7bd1
PH
1028
1029 # TODO Central sorting goes here
99e206d5 1030
f89197d7 1031 if formats[0] is not info_dict:
b3d9ef88
JMF
1032 # only set the 'formats' fields if the original info_dict list them
1033 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1034 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
1035 # wich can't be exported to json
1036 info_dict['formats'] = formats
cfb56d1a 1037 if self.params.get('listformats'):
bfaae0a7 1038 self.list_formats(info_dict)
1039 return
cfb56d1a
PH
1040 if self.params.get('list_thumbnails'):
1041 self.list_thumbnails(info_dict)
1042 return
bfaae0a7 1043
de3ef3ed 1044 req_format = self.params.get('format')
a9c58ad9
JMF
1045 if req_format is None:
1046 req_format = 'best'
dd82ffea 1047 formats_to_download = []
dd82ffea 1048 # The -1 is for supporting YoutubeIE
a9c58ad9 1049 if req_format in ('-1', 'all'):
dd82ffea
JMF
1050 formats_to_download = formats
1051 else:
1de33faf
PH
1052 for rfstr in req_format.split(','):
1053 # We can accept formats requested in the format: 34/5/best, we pick
1054 # the first that is available, starting from left
1055 req_formats = rfstr.split('/')
1056 for rf in req_formats:
1057 if re.match(r'.+?\+.+?', rf) is not None:
1058 # Two formats have been requested like '137+139'
1059 format_1, format_2 = rf.split('+')
1060 formats_info = (self.select_format(format_1, formats),
9e1a5b84 1061 self.select_format(format_2, formats))
1de33faf 1062 if all(formats_info):
c2954908
JMF
1063 # The first format must contain the video and the
1064 # second the audio
1065 if formats_info[0].get('vcodec') == 'none':
1066 self.report_error('The first format must '
9e1a5b84
JW
1067 'contain the video, try using '
1068 '"-f %s+%s"' % (format_2, format_1))
c2954908 1069 return
45598f15
PH
1070 output_ext = (
1071 formats_info[0]['ext']
1072 if self.params.get('merge_output_format') is None
1073 else self.params['merge_output_format'])
1de33faf
PH
1074 selected_format = {
1075 'requested_formats': formats_info,
1076 'format': rf,
dcf53d44 1077 'format_id': rf,
1de33faf 1078 'ext': formats_info[0]['ext'],
f90ad273
PH
1079 'width': formats_info[0].get('width'),
1080 'height': formats_info[0].get('height'),
1081 'resolution': formats_info[0].get('resolution'),
1082 'fps': formats_info[0].get('fps'),
1083 'vcodec': formats_info[0].get('vcodec'),
1084 'vbr': formats_info[0].get('vbr'),
6271f1ca 1085 'stretched_ratio': formats_info[0].get('stretched_ratio'),
f90ad273
PH
1086 'acodec': formats_info[1].get('acodec'),
1087 'abr': formats_info[1].get('abr'),
45598f15 1088 'ext': output_ext,
1de33faf
PH
1089 }
1090 else:
1091 selected_format = None
6350728b 1092 else:
1de33faf
PH
1093 selected_format = self.select_format(rf, formats)
1094 if selected_format is not None:
1095 formats_to_download.append(selected_format)
1096 break
dd82ffea 1097 if not formats_to_download:
6febd1c1 1098 raise ExtractorError('requested format not available',
78a3a9f8 1099 expected=True)
dd82ffea
JMF
1100
1101 if download:
1102 if len(formats_to_download) > 1:
6febd1c1 1103 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1104 for format in formats_to_download:
1105 new_info = dict(info_dict)
1106 new_info.update(format)
1107 self.process_info(new_info)
1108 # We update the info dict with the best quality format (backwards compatibility)
1109 info_dict.update(formats_to_download[-1])
1110 return info_dict
1111
8222d8de
JMF
1112 def process_info(self, info_dict):
1113 """Process a single resolved IE result."""
1114
1115 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1116
1117 max_downloads = self.params.get('max_downloads')
1118 if max_downloads is not None:
1119 if self._num_downloads >= int(max_downloads):
1120 raise MaxDownloadsReached()
8222d8de
JMF
1121
1122 info_dict['fulltitle'] = info_dict['title']
1123 if len(info_dict['title']) > 200:
6febd1c1 1124 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
1125
1126 # Keep for backwards compatibility
1127 info_dict['stitle'] = info_dict['title']
1128
11b85ce6 1129 if 'format' not in info_dict:
8222d8de
JMF
1130 info_dict['format'] = info_dict['ext']
1131
1132 reason = self._match_entry(info_dict)
1133 if reason is not None:
6febd1c1 1134 self.to_screen('[download] ' + reason)
8222d8de
JMF
1135 return
1136
fd288278 1137 self._num_downloads += 1
8222d8de 1138
e72c7e41 1139 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1140
1141 # Forced printings
1142 if self.params.get('forcetitle', False):
0783b09b 1143 self.to_stdout(info_dict['fulltitle'])
8222d8de 1144 if self.params.get('forceid', False):
0783b09b 1145 self.to_stdout(info_dict['id'])
8222d8de 1146 if self.params.get('forceurl', False):
16ae61f6 1147 if info_dict.get('requested_formats') is not None:
1148 for f in info_dict['requested_formats']:
1149 self.to_stdout(f['url'] + f.get('play_path', ''))
1150 else:
1151 # For RTMP URLs, also include the playpath
1152 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1153 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1154 self.to_stdout(info_dict['thumbnail'])
216d71d0 1155 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1156 self.to_stdout(info_dict['description'])
8222d8de 1157 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1158 self.to_stdout(filename)
525ef922
PH
1159 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1160 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1161 if self.params.get('forceformat', False):
0783b09b 1162 self.to_stdout(info_dict['format'])
9d153818 1163 if self.params.get('forcejson', False):
0783b09b 1164 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
1165
1166 # Do nothing else if in simulate mode
1167 if self.params.get('simulate', False):
1168 return
1169
1170 if filename is None:
1171 return
1172
1173 try:
1174 dn = os.path.dirname(encodeFilename(filename))
d26e981d 1175 if dn and not os.path.exists(dn):
8222d8de
JMF
1176 os.makedirs(dn)
1177 except (OSError, IOError) as err:
6febd1c1 1178 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
1179 return
1180
1181 if self.params.get('writedescription', False):
6febd1c1 1182 descfn = filename + '.description'
7b6fefc9 1183 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1184 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1185 elif info_dict.get('description') is None:
1186 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1187 else:
1188 try:
6febd1c1 1189 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1190 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1191 descfile.write(info_dict['description'])
7b6fefc9 1192 except (OSError, IOError):
6febd1c1 1193 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1194 return
8222d8de 1195
1fb07d10 1196 if self.params.get('writeannotations', False):
6febd1c1 1197 annofn = filename + '.annotations.xml'
7b6fefc9 1198 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1199 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1200 else:
1201 try:
6febd1c1 1202 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1203 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1204 annofile.write(info_dict['annotations'])
1205 except (KeyError, TypeError):
6febd1c1 1206 self.report_warning('There are no annotations to write.')
7b6fefc9 1207 except (OSError, IOError):
6febd1c1 1208 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1209 return
1fb07d10 1210
c4a91be7 1211 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1212 self.params.get('writeautomaticsub')])
c4a91be7 1213
fe7e0c98 1214 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
1215 # subtitles download errors are already managed as troubles in relevant IE
1216 # that way it will silently go on when used with unsupporting IE
8222d8de 1217 subtitles = info_dict['subtitles']
ca715127 1218 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
1219 for sub_lang in subtitles.keys():
1220 sub = subtitles[sub_lang]
6804038d
JMF
1221 if sub is None:
1222 continue
8222d8de 1223 try:
d4051a8e 1224 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1225 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1226 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1227 else:
6febd1c1 1228 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1229 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5f6a1245 1230 subfile.write(sub)
8222d8de 1231 except (OSError, IOError):
e4db1951 1232 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1233 return
1234
8222d8de 1235 if self.params.get('writeinfojson', False):
6febd1c1 1236 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 1237 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1238 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1239 else:
6febd1c1 1240 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1241 try:
92120217 1242 write_json_file(info_dict, infofn)
7b6fefc9 1243 except (OSError, IOError):
6febd1c1 1244 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1245 return
8222d8de 1246
ec82d85a 1247 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1248
1249 if not self.params.get('skip_download', False):
4340deca
P
1250 try:
1251 def dl(name, info):
a055469f 1252 fd = get_suitable_downloader(info, self.params)(self, self.params)
4340deca
P
1253 for ph in self._progress_hooks:
1254 fd.add_progress_hook(ph)
1255 if self.params.get('verbose'):
1256 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1257 return fd.download(name, info)
ee69b99a 1258
4340deca
P
1259 if info_dict.get('requested_formats') is not None:
1260 downloaded = []
1261 success = True
1262 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1263 if not merger._executable:
1264 postprocessors = []
1265 self.report_warning('You have requested multiple '
1266 'formats but ffmpeg or avconv are not installed.'
1267 ' The formats won\'t be merged')
6350728b 1268 else:
4340deca
P
1269 postprocessors = [merger]
1270 for f in info_dict['requested_formats']:
1271 new_info = dict(info_dict)
1272 new_info.update(f)
1273 fname = self.prepare_filename(new_info)
1274 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1275 downloaded.append(fname)
1276 partial_success = dl(fname, new_info)
1277 success = success and partial_success
1278 info_dict['__postprocessors'] = postprocessors
1279 info_dict['__files_to_merge'] = downloaded
1280 else:
1281 # Just a single file
1282 success = dl(filename, info_dict)
1283 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1284 self.report_error('unable to download video data: %s' % str(err))
1285 return
1286 except (OSError, IOError) as err:
1287 raise UnavailableVideoError(err)
1288 except (ContentTooShortError, ) as err:
1289 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1290 return
8222d8de
JMF
1291
1292 if success:
6271f1ca 1293 # Fixup content
62cd676c
PH
1294 fixup_policy = self.params.get('fixup')
1295 if fixup_policy is None:
1296 fixup_policy = 'detect_or_warn'
1297
6271f1ca
PH
1298 stretched_ratio = info_dict.get('stretched_ratio')
1299 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
1300 if fixup_policy == 'warn':
1301 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1302 info_dict['id'], stretched_ratio))
1303 elif fixup_policy == 'detect_or_warn':
1304 stretched_pp = FFmpegFixupStretchedPP(self)
1305 if stretched_pp.available:
1306 info_dict.setdefault('__postprocessors', [])
1307 info_dict['__postprocessors'].append(stretched_pp)
1308 else:
1309 self.report_warning(
1310 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1311 info_dict['id'], stretched_ratio))
1312 else:
62cd676c
PH
1313 assert fixup_policy in ('ignore', 'never')
1314
1315 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1316 if fixup_policy == 'warn':
1317 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1318 info_dict['id']))
1319 elif fixup_policy == 'detect_or_warn':
1320 fixup_pp = FFmpegFixupM4aPP(self)
1321 if fixup_pp.available:
1322 info_dict.setdefault('__postprocessors', [])
1323 info_dict['__postprocessors'].append(fixup_pp)
1324 else:
1325 self.report_warning(
1326 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1327 info_dict['id']))
1328 else:
1329 assert fixup_policy in ('ignore', 'never')
6271f1ca 1330
8222d8de
JMF
1331 try:
1332 self.post_process(filename, info_dict)
1333 except (PostProcessingError) as err:
6febd1c1 1334 self.report_error('postprocessing: %s' % str(err))
8222d8de 1335 return
cd58dc3e 1336 self.record_download_archive(info_dict)
8222d8de
JMF
1337
1338 def download(self, url_list):
1339 """Download a given list of URLs."""
acd69589 1340 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1341 if (len(url_list) > 1 and
acd69589 1342 '%' not in outtmpl
0c75c3fa 1343 and self.params.get('max_downloads') != 1):
acd69589 1344 raise SameFileError(outtmpl)
8222d8de
JMF
1345
1346 for url in url_list:
1347 try:
5f6a1245 1348 # It also downloads the videos
63e0be34 1349 res = self.extract_info(url)
8222d8de 1350 except UnavailableVideoError:
6febd1c1 1351 self.report_error('unable to download video')
8222d8de 1352 except MaxDownloadsReached:
6febd1c1 1353 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1354 raise
63e0be34
PH
1355 else:
1356 if self.params.get('dump_single_json', False):
1357 self.to_stdout(json.dumps(res))
8222d8de
JMF
1358
1359 return self._download_retcode
1360
1dcc4c0c 1361 def download_with_info_file(self, info_filename):
395293a8 1362 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1363 info = json.load(f)
d4943898
JMF
1364 try:
1365 self.process_ie_result(info, download=True)
1366 except DownloadError:
1367 webpage_url = info.get('webpage_url')
1368 if webpage_url is not None:
6febd1c1 1369 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1370 return self.download([webpage_url])
1371 else:
1372 raise
1373 return self._download_retcode
1dcc4c0c 1374
8222d8de
JMF
1375 def post_process(self, filename, ie_info):
1376 """Run all the postprocessors on the given file."""
1377 info = dict(ie_info)
1378 info['filepath'] = filename
6350728b
JMF
1379 pps_chain = []
1380 if ie_info.get('__postprocessors') is not None:
1381 pps_chain.extend(ie_info['__postprocessors'])
1382 pps_chain.extend(self._pps)
1383 for pp in pps_chain:
f3ff1a36
JMF
1384 keep_video = None
1385 old_filename = info['filepath']
8222d8de 1386 try:
f3ff1a36 1387 keep_video_wish, info = pp.run(info)
8222d8de
JMF
1388 if keep_video_wish is not None:
1389 if keep_video_wish:
1390 keep_video = keep_video_wish
1391 elif keep_video is None:
1392 # No clear decision yet, let IE decide
1393 keep_video = keep_video_wish
1394 except PostProcessingError as e:
bbcbf4d4 1395 self.report_error(e.msg)
f3ff1a36
JMF
1396 if keep_video is False and not self.params.get('keepvideo', False):
1397 try:
1398 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1399 os.remove(encodeFilename(old_filename))
1400 except (IOError, OSError):
1401 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1402
5db07df6
PH
1403 def _make_archive_id(self, info_dict):
1404 # Future-proof against any change in case
1405 # and backwards compatibility with prior versions
d31209a1 1406 extractor = info_dict.get('extractor_key')
7012b23c
PH
1407 if extractor is None:
1408 if 'id' in info_dict:
1409 extractor = info_dict.get('ie_key') # key in a playlist
1410 if extractor is None:
5db07df6 1411 return None # Incomplete video information
6febd1c1 1412 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1413
1414 def in_download_archive(self, info_dict):
1415 fn = self.params.get('download_archive')
1416 if fn is None:
1417 return False
1418
1419 vid_id = self._make_archive_id(info_dict)
1420 if vid_id is None:
7012b23c 1421 return False # Incomplete video information
5db07df6 1422
c1c9a79c
PH
1423 try:
1424 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1425 for line in archive_file:
1426 if line.strip() == vid_id:
1427 return True
1428 except IOError as ioe:
1429 if ioe.errno != errno.ENOENT:
1430 raise
1431 return False
1432
1433 def record_download_archive(self, info_dict):
1434 fn = self.params.get('download_archive')
1435 if fn is None:
1436 return
5db07df6
PH
1437 vid_id = self._make_archive_id(info_dict)
1438 assert vid_id
c1c9a79c 1439 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1440 archive_file.write(vid_id + '\n')
dd82ffea 1441
8c51aa65 1442 @staticmethod
8abeeb94 1443 def format_resolution(format, default='unknown'):
fb04e403
PH
1444 if format.get('vcodec') == 'none':
1445 return 'audio only'
f49d89ee
PH
1446 if format.get('resolution') is not None:
1447 return format['resolution']
8c51aa65
JMF
1448 if format.get('height') is not None:
1449 if format.get('width') is not None:
6febd1c1 1450 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1451 else:
6febd1c1 1452 res = '%sp' % format['height']
f49d89ee 1453 elif format.get('width') is not None:
6febd1c1 1454 res = '?x%d' % format['width']
8c51aa65 1455 else:
8abeeb94 1456 res = default
8c51aa65
JMF
1457 return res
1458
c57f7757
PH
1459 def _format_note(self, fdict):
1460 res = ''
1461 if fdict.get('ext') in ['f4f', 'f4m']:
1462 res += '(unsupported) '
1463 if fdict.get('format_note') is not None:
1464 res += fdict['format_note'] + ' '
1465 if fdict.get('tbr') is not None:
1466 res += '%4dk ' % fdict['tbr']
1467 if fdict.get('container') is not None:
1468 if res:
1469 res += ', '
1470 res += '%s container' % fdict['container']
1471 if (fdict.get('vcodec') is not None and
1472 fdict.get('vcodec') != 'none'):
1473 if res:
1474 res += ', '
1475 res += fdict['vcodec']
91c7271a 1476 if fdict.get('vbr') is not None:
c57f7757
PH
1477 res += '@'
1478 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1479 res += 'video@'
1480 if fdict.get('vbr') is not None:
1481 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1482 if fdict.get('fps') is not None:
1483 res += ', %sfps' % fdict['fps']
c57f7757
PH
1484 if fdict.get('acodec') is not None:
1485 if res:
1486 res += ', '
1487 if fdict['acodec'] == 'none':
1488 res += 'video only'
1489 else:
1490 res += '%-5s' % fdict['acodec']
1491 elif fdict.get('abr') is not None:
1492 if res:
1493 res += ', '
1494 res += 'audio'
1495 if fdict.get('abr') is not None:
1496 res += '@%3dk' % fdict['abr']
1497 if fdict.get('asr') is not None:
1498 res += ' (%5dHz)' % fdict['asr']
1499 if fdict.get('filesize') is not None:
1500 if res:
1501 res += ', '
1502 res += format_bytes(fdict['filesize'])
9732d77e
PH
1503 elif fdict.get('filesize_approx') is not None:
1504 if res:
1505 res += ', '
1506 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1507 return res
91c7271a 1508
c57f7757 1509 def list_formats(self, info_dict):
02dbf93f 1510 def line(format, idlen=20):
6febd1c1 1511 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1512 format['format_id'],
1513 format['ext'],
8c51aa65 1514 self.format_resolution(format),
c57f7757 1515 self._format_note(format),
02dbf93f 1516 ))
57dd9a8f 1517
94badb25 1518 formats = info_dict.get('formats', [info_dict])
6febd1c1 1519 idlen = max(len('format code'),
02dbf93f 1520 max(len(f['format_id']) for f in formats))
e65566a9
PH
1521 formats_s = [
1522 line(f, idlen) for f in formats
1523 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1524 if len(formats) > 1:
c57f7757
PH
1525 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1526 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1527
1528 header_line = line({
6febd1c1
PH
1529 'format_id': 'format code', 'ext': 'extension',
1530 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
cfb56d1a
PH
1531 self.to_screen(
1532 '[info] Available formats for %s:\n%s\n%s' %
1533 (info_dict['id'], header_line, '\n'.join(formats_s)))
1534
1535 def list_thumbnails(self, info_dict):
1536 thumbnails = info_dict.get('thumbnails')
1537 if not thumbnails:
1538 tn_url = info_dict.get('thumbnail')
1539 if tn_url:
1540 thumbnails = [{'id': '0', 'url': tn_url}]
1541 else:
1542 self.to_screen(
1543 '[info] No thumbnails present for %s' % info_dict['id'])
1544 return
1545
1546 self.to_screen(
1547 '[info] Thumbnails for %s:' % info_dict['id'])
1548 self.to_screen(render_table(
1549 ['ID', 'width', 'height', 'URL'],
1550 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720
PH
1551
1552 def urlopen(self, req):
1553 """ Start an HTTP download """
37419b4f 1554
d05cfe06
S
1555 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1556 # always respected by websites, some tend to give out URLs with non percent-encoded
1557 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1558 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1559 # To work around aforementioned issue we will replace request's original URL with
1560 # percent-encoded one
ee0d9070 1561 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
68b09730 1562 url = req if req_is_string else req.get_full_url()
d05cfe06 1563 url_escaped = escape_url(url)
37419b4f
S
1564
1565 # Substitute URL if any change after escaping
1566 if url != url_escaped:
68b09730 1567 if req_is_string:
37419b4f
S
1568 req = url_escaped
1569 else:
1570 req = compat_urllib_request.Request(
1571 url_escaped, data=req.data, headers=req.headers,
1572 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1573
19a41fc6 1574 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1575
1576 def print_debug_header(self):
1577 if not self.params.get('verbose'):
1578 return
62fec3b2 1579
4192b51c
PH
1580 if type('') is not compat_str:
1581 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1582 self.report_warning(
1583 'Your Python is broken! Update to a newer and supported version')
1584
c6afed48
PH
1585 stdout_encoding = getattr(
1586 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1587 encoding_str = (
734f90bb
PH
1588 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1589 locale.getpreferredencoding(),
1590 sys.getfilesystemencoding(),
c6afed48 1591 stdout_encoding,
b0472057 1592 self.get_encoding()))
4192b51c 1593 write_string(encoding_str, encoding=None)
734f90bb
PH
1594
1595 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1596 try:
1597 sp = subprocess.Popen(
1598 ['git', 'rev-parse', '--short', 'HEAD'],
1599 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1600 cwd=os.path.dirname(os.path.abspath(__file__)))
1601 out, err = sp.communicate()
1602 out = out.decode().strip()
1603 if re.match('[0-9a-f]+', out):
734f90bb 1604 self._write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1605 except:
1606 try:
1607 sys.exc_clear()
1608 except:
1609 pass
d28b5171
PH
1610 self._write_string('[debug] Python version %s - %s\n' % (
1611 platform.python_version(), platform_name()))
1612
1613 exe_versions = FFmpegPostProcessor.get_versions()
4c83c967 1614 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1615 exe_str = ', '.join(
1616 '%s %s' % (exe, v)
1617 for exe, v in sorted(exe_versions.items())
1618 if v
1619 )
1620 if not exe_str:
1621 exe_str = 'none'
1622 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1623
1624 proxy_map = {}
1625 for handler in self._opener.handlers:
1626 if hasattr(handler, 'proxies'):
1627 proxy_map.update(handler.proxies)
734f90bb 1628 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1629
58b1f00d
PH
1630 if self.params.get('call_home', False):
1631 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1632 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1633 latest_version = self.urlopen(
1634 'https://yt-dl.org/latest/version').read().decode('utf-8')
1635 if version_tuple(latest_version) > version_tuple(__version__):
1636 self.report_warning(
1637 'You are using an outdated version (newest version: %s)! '
1638 'See https://yt-dl.org/update if you need help updating.' %
1639 latest_version)
1640
e344693b 1641 def _setup_opener(self):
6ad14cab 1642 timeout_val = self.params.get('socket_timeout')
19a41fc6 1643 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1644
dca08720
PH
1645 opts_cookiefile = self.params.get('cookiefile')
1646 opts_proxy = self.params.get('proxy')
1647
1648 if opts_cookiefile is None:
1649 self.cookiejar = compat_cookiejar.CookieJar()
1650 else:
1651 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1652 opts_cookiefile)
1653 if os.access(opts_cookiefile, os.R_OK):
1654 self.cookiejar.load()
1655
1656 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1657 self.cookiejar)
1658 if opts_proxy is not None:
1659 if opts_proxy == '':
1660 proxies = {}
1661 else:
1662 proxies = {'http': opts_proxy, 'https': opts_proxy}
1663 else:
1664 proxies = compat_urllib_request.getproxies()
1665 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1666 if 'http' in proxies and 'https' not in proxies:
1667 proxies['https'] = proxies['http']
1668 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1669
1670 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
1671 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1672 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
dca08720 1673 opener = compat_urllib_request.build_opener(
a0ddb8a2 1674 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1675 # Delete the default user-agent header, which would otherwise apply in
1676 # cases where our custom HTTP handler doesn't come into play
1677 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1678 opener.addheaders = []
1679 self._opener = opener
62fec3b2
PH
1680
1681 def encode(self, s):
1682 if isinstance(s, bytes):
1683 return s # Already encoded
1684
1685 try:
1686 return s.encode(self.get_encoding())
1687 except UnicodeEncodeError as err:
1688 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1689 raise
1690
1691 def get_encoding(self):
1692 encoding = self.params.get('encoding')
1693 if encoding is None:
1694 encoding = preferredencoding()
1695 return encoding
ec82d85a
PH
1696
1697 def _write_thumbnails(self, info_dict, filename):
1698 if self.params.get('writethumbnail', False):
1699 thumbnails = info_dict.get('thumbnails')
1700 if thumbnails:
1701 thumbnails = [thumbnails[-1]]
1702 elif self.params.get('write_all_thumbnails', False):
1703 thumbnails = info_dict.get('thumbnails')
1704 else:
1705 return
1706
1707 if not thumbnails:
1708 # No thumbnails present, so return immediately
1709 return
1710
1711 for t in thumbnails:
1712 thumb_ext = determine_ext(t['url'], 'jpg')
1713 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1714 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1715 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1716
1717 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1718 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1719 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1720 else:
1721 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1722 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1723 try:
1724 uf = self.urlopen(t['url'])
1725 with open(thumb_filename, 'wb') as thumbf:
1726 shutil.copyfileobj(uf, thumbf)
1727 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1728 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1729 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1730 self.report_warning('Unable to download thumbnail "%s": %s' %
1731 (t['url'], compat_str(err)))