]> jfr.im git - yt-dlp.git/blame - youtube_dlc/YoutubeDL.py
Added `--force-overwrites` option (https://github.com/ytdl-org/youtube-dl/pull/20405)
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import socket
23import sys
24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474
S
29from string import ascii_letters
30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b
PH
44)
45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
732044af 54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 57 DownloadError,
c0384f22 58 encode_compat_str,
ce02ed60 59 encodeFilename,
9b9c5355 60 error_to_compat_str,
590bc6f6 61 expand_path,
ce02ed60 62 ExtractorError,
02dbf93f 63 format_bytes,
76d321f6 64 format_field,
525ef922 65 formatSeconds,
773f291d 66 GeoRestrictedError,
c9969434 67 int_or_none,
732044af 68 iri_to_uri,
773f291d 69 ISO3166Utils,
ce02ed60 70 locked_file,
dca08720 71 make_HTTPS_handler,
ce02ed60 72 MaxDownloadsReached,
cd6fc19e 73 orderedSet,
b7ab0590 74 PagedList,
083c9df9 75 parse_filesize,
91410c9b 76 PerRequestProxyHandler,
dca08720 77 platform_name,
eedb7ba5 78 PostProcessingError,
ce02ed60 79 preferredencoding,
eedb7ba5 80 prepend_extension,
51fb4995 81 register_socks_protocols,
cfb56d1a 82 render_table,
eedb7ba5 83 replace_extension,
ce02ed60
PH
84 SameFileError,
85 sanitize_filename,
1bb5c511 86 sanitize_path,
dcf77cf1 87 sanitize_url,
67dda517 88 sanitized_Request,
e5660ee6 89 std_headers,
1211bb6d 90 str_or_none,
ce02ed60 91 subtitles_filename,
732044af 92 to_high_limit_path,
ce02ed60 93 UnavailableVideoError,
29eb5174 94 url_basename,
58b1f00d 95 version_tuple,
ce02ed60
PH
96 write_json_file,
97 write_string,
1bab3437 98 YoutubeDLCookieJar,
6a3f4c3f 99 YoutubeDLCookieProcessor,
dca08720 100 YoutubeDLHandler,
fca6dba8 101 YoutubeDLRedirectHandler,
f5b1bca9 102 process_communicate_or_kill,
ce02ed60 103)
a0e07d31 104from .cache import Cache
e0986e31 105from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
4c54b89e 106from .extractor.openload import PhantomJSwrapper
3bc2ddcc 107from .downloader import get_suitable_downloader
4c83c967 108from .downloader.rtmp import rtmpdump_version
4f026faf 109from .postprocessor import (
f17f8651 110 FFmpegFixupM3u8PP,
62cd676c 111 FFmpegFixupM4aPP,
6271f1ca 112 FFmpegFixupStretchedPP,
4f026faf
PH
113 FFmpegMergerPP,
114 FFmpegPostProcessor,
57df9f53 115 FFmpegSubtitlesConvertorPP,
4f026faf
PH
116 get_postprocessor,
117)
dca08720 118from .version import __version__
8222d8de 119
e9c0cdd3
YCH
120if compat_os_name == 'nt':
121 import ctypes
122
2459b6e1 123
8222d8de
JMF
124class YoutubeDL(object):
125 """YoutubeDL class.
126
127 YoutubeDL objects are the ones responsible of downloading the
128 actual video file and writing it to disk if the user has requested
129 it, among some other tasks. In most cases there should be one per
130 program. As, given a video URL, the downloader doesn't know how to
131 extract all the needed information, task that InfoExtractors do, it
132 has to pass the URL to one of them.
133
134 For this, YoutubeDL objects have a method that allows
135 InfoExtractors to be registered in a given order. When it is passed
136 a URL, the YoutubeDL object handles it to the first InfoExtractor it
137 finds that reports being able to handle it. The InfoExtractor extracts
138 all the information about the video or videos the URL refers to, and
139 YoutubeDL process the extracted information, possibly using a File
140 Downloader to download the video.
141
142 YoutubeDL objects accept a lot of parameters. In order not to saturate
143 the object constructor with arguments, it receives a dictionary of
144 options instead. These options are available through the params
145 attribute for the InfoExtractors to use. The YoutubeDL also
146 registers itself as the downloader in charge for the InfoExtractors
147 that are added to it, so this is a "mutual registration".
148
149 Available options:
150
151 username: Username for authentication purposes.
152 password: Password for authentication purposes.
180940e0 153 videopassword: Password for accessing a video.
1da50aa3
S
154 ap_mso: Adobe Pass multiple-system operator identifier.
155 ap_username: Multiple-system operator account username.
156 ap_password: Multiple-system operator account password.
8222d8de
JMF
157 usenetrc: Use netrc for authentication instead.
158 verbose: Print additional info to stdout.
159 quiet: Do not print messages to stdout.
ad8915b7 160 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
161 forceurl: Force printing final URL.
162 forcetitle: Force printing title.
163 forceid: Force printing ID.
164 forcethumbnail: Force printing thumbnail URL.
165 forcedescription: Force printing description.
166 forcefilename: Force printing final filename.
525ef922 167 forceduration: Force printing duration.
8694c600 168 forcejson: Force printing info_dict as JSON.
63e0be34
PH
169 dump_single_json: Force printing the info_dict of the whole playlist
170 (or video) as a single JSON line.
2d30509f 171 force_write_download_archive: Force writing download archive regardless of
172 'skip_download' or 'simulate'.
8222d8de 173 simulate: Do not download the video files.
eb8a4433 174 format: Video format code. see "FORMAT SELECTION" for more details.
175 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
176 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
909d24dd 177 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
178 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
8222d8de 179 outtmpl: Template for output names.
bdc3fd2f
U
180 restrictfilenames: Do not allow "&" and spaces in file names.
181 trim_file_name: Limit length of filename (extension excluded).
f5546c0b 182 ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
d22dec74 183 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 184 overwrites: Overwrite all video and metadata files if True,
185 overwrite only non-video files if None
186 and don't overwrite any file if False
8222d8de
JMF
187 playliststart: Playlist item to start at.
188 playlistend: Playlist item to end at.
c14e88f0 189 playlist_items: Specific indices of playlist to download.
ff815fe6 190 playlistreverse: Download playlist items in reverse order.
75822ca7 191 playlistrandom: Download playlist items in random order.
8222d8de
JMF
192 matchtitle: Download only matching titles.
193 rejecttitle: Reject downloads for matching titles.
8bf9319e 194 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
195 logtostderr: Log messages to stderr instead of stdout.
196 writedescription: Write the video description to a .description file
197 writeinfojson: Write the video description to a .info.json file
1fb07d10 198 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 199 writethumbnail: Write the thumbnail image to a file
ec82d85a 200 write_all_thumbnails: Write all thumbnail formats to files
732044af 201 writelink: Write an internet shortcut file, depending on the
202 current platform (.url/.webloc/.desktop)
203 writeurllink: Write a Windows internet shortcut file (.url)
204 writewebloclink: Write a macOS internet shortcut file (.webloc)
205 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 206 writesubtitles: Write the video subtitles to a file
741dd8ea 207 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 208 allsubtitles: Downloads all the subtitles of the video
0b7f3118 209 (requires writesubtitles or writeautomaticsub)
8222d8de 210 listsubtitles: Lists all available subtitles for the video
a504ced0 211 subtitlesformat: The format code for subtitles
aa6a10c4 212 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
213 keepvideo: Keep the video file after post-processing
214 daterange: A DateRange object, download only if the upload_date is in the range.
215 skip_download: Skip the actual download of the video file
c35f9e72 216 cachedir: Location of the cache files in the filesystem.
a0e07d31 217 False to disable filesystem cache.
47192f92 218 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
219 age_limit: An integer representing the user's age in years.
220 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
221 min_views: An integer representing the minimum view count the video
222 must have in order to not be skipped.
223 Videos without view count information are always
224 downloaded. None for no limit.
225 max_views: An integer representing the maximum view count.
226 Videos that are more popular than that are not
227 downloaded.
228 Videos without view count information are always
229 downloaded. None for no limit.
230 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
231 Videos already present in the file are not downloaded
232 again.
ea6e0c2b 233 break_on_existing: Stop the download process after attempting to download a file that's
234 in the archive.
dca08720 235 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 236 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
237 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
238 At the moment, this is only supported by YouTube.
a1ee09e8 239 proxy: URL of the proxy server to use
38cce791 240 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 241 on geo-restricted sites.
e344693b 242 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
243 bidi_workaround: Work around buggy terminals without bidirectional text
244 support, using fridibi
a0ddb8a2 245 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 246 include_ads: Download ads as well
04b4d394
PH
247 default_search: Prepend this string if an input url is not valid.
248 'auto' for elaborate guessing
62fec3b2 249 encoding: Use this encoding instead of the system-specified.
e8ee972c 250 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
251 Pass in 'in_playlist' to only show this behavior for
252 playlist items.
4f026faf 253 postprocessors: A list of dictionaries, each with an entry
71b640cc 254 * key: The name of the postprocessor. See
cefecac1 255 youtube_dlc/postprocessor/__init__.py for a list.
4f026faf
PH
256 as well as any further keyword arguments for the
257 postprocessor.
ab8e5e51
AM
258 post_hooks: A list of functions that get called as the final step
259 for each video file, after all postprocessors have been
260 called. The filename will be passed as the only argument.
71b640cc
PH
261 progress_hooks: A list of functions that get called on download
262 progress, with a dictionary with the entries
5cda4eda 263 * status: One of "downloading", "error", or "finished".
ee69b99a 264 Check this first and ignore unknown values.
71b640cc 265
5cda4eda 266 If status is one of "downloading", or "finished", the
ee69b99a
PH
267 following properties may also be present:
268 * filename: The final filename (always present)
5cda4eda 269 * tmpfilename: The filename we're currently writing to
71b640cc
PH
270 * downloaded_bytes: Bytes on disk
271 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
272 * total_bytes_estimate: Guess of the eventual file size,
273 None if unavailable.
274 * elapsed: The number of seconds since download started.
71b640cc
PH
275 * eta: The estimated time in seconds, None if unknown
276 * speed: The download speed in bytes/second, None if
277 unknown
5cda4eda
PH
278 * fragment_index: The counter of the currently
279 downloaded video fragment.
280 * fragment_count: The number of fragments (= individual
281 files that will be merged)
71b640cc
PH
282
283 Progress hooks are guaranteed to be called at least once
284 (with status "finished") if the download is successful.
45598f15 285 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
286 fixup: Automatically correct known faults of the file.
287 One of:
288 - "never": do nothing
289 - "warn": only emit a warning
290 - "detect_or_warn": check whether we can do anything
62cd676c 291 about it, warn otherwise (default)
504f20dd 292 source_address: Client-side IP address to bind to.
6ec6cb4e 293 call_home: Boolean, true iff we are allowed to contact the
cefecac1 294 youtube-dlc servers for debugging.
7aa589a5
S
295 sleep_interval: Number of seconds to sleep before each download when
296 used alone or a lower bound of a range for randomized
297 sleep before each download (minimum possible number
298 of seconds to sleep) when used along with
299 max_sleep_interval.
300 max_sleep_interval:Upper bound of a range for randomized sleep before each
301 download (maximum possible number of seconds to sleep).
302 Must only be used along with sleep_interval.
303 Actual sleep time will be a random float from range
304 [sleep_interval; max_sleep_interval].
cfb56d1a
PH
305 listformats: Print an overview of available video formats and exit.
306 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
307 match_filter: A function that gets called with the info_dict of
308 every video.
309 If it returns a message, the video is ignored.
310 If it returns None, the video is downloaded.
311 match_filter_func in utils.py is one example for this.
7e5db8c9 312 no_color: Do not emit color codes in output.
0a840f58 313 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 314 HTTP header
0a840f58 315 geo_bypass_country:
773f291d
S
316 Two-letter ISO 3166-2 country code that will be used for
317 explicit geographic restriction bypassing via faking
504f20dd 318 X-Forwarded-For HTTP header
5f95927a
S
319 geo_bypass_ip_block:
320 IP range in CIDR notation that will be used similarly to
504f20dd 321 geo_bypass_country
71b640cc 322
85729c51
PH
323 The following options determine which downloader is picked:
324 external_downloader: Executable of the external downloader to call.
325 None or unset for standard (built-in) downloader.
bf09af3a
S
326 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
327 if True, otherwise use ffmpeg/avconv if False, otherwise
328 use downloader suggested by extractor if None.
fe7e0c98 329
8222d8de 330 The following parameters are not used by YoutubeDL itself, they are used by
cefecac1 331 the downloader (see youtube_dlc/downloader/common.py):
8222d8de 332 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 333 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c
S
334 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
335 http_chunk_size.
76b1bd67
JMF
336
337 The following options are used by the post processors:
d4a24f40
S
338 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
339 otherwise prefer ffmpeg.
c0b7d117
S
340 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
341 to the binary or its containing directory.
1b77b347 342 postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
343 of additional command-line arguments for the postprocessor.
344 Use 'default' as the name for arguments to passed to all PP.
3836b02c 345
3600fd59
S
346 The following options are used by the Youtube extractor:
347 youtube_include_dash_manifest: If True (default), DASH manifests and related
348 data will be downloaded and processed by extractor.
349 You can reduce network I/O by disabling it if you don't
350 care about DASH.
8222d8de
JMF
351 """
352
c9969434
S
353 _NUMERIC_FIELDS = set((
354 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
355 'timestamp', 'upload_year', 'upload_month', 'upload_day',
356 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
357 'average_rating', 'comment_count', 'age_limit',
358 'start_time', 'end_time',
359 'chapter_number', 'season_number', 'episode_number',
360 'track_number', 'disc_number', 'release_year',
361 'playlist_index',
362 ))
363
8222d8de
JMF
364 params = None
365 _ies = []
366 _pps = []
367 _download_retcode = None
368 _num_downloads = None
369 _screen_file = None
370
3511266b 371 def __init__(self, params=None, auto_init=True):
8222d8de 372 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
373 if params is None:
374 params = {}
8222d8de 375 self._ies = []
56c73665 376 self._ies_instances = {}
8222d8de 377 self._pps = []
ab8e5e51 378 self._post_hooks = []
933605d7 379 self._progress_hooks = []
8222d8de
JMF
380 self._download_retcode = 0
381 self._num_downloads = 0
382 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 383 self._err_file = sys.stderr
4abf617b
S
384 self.params = {
385 # Default parameters
386 'nocheckcertificate': False,
387 }
388 self.params.update(params)
a0e07d31 389 self.cache = Cache(self)
a45e8619 390 self.archive = set()
ecdec191
JB
391
392 """Preload the archive, if any is specified"""
393 def preload_download_archive(self):
394 fn = self.params.get('download_archive')
395 if fn is None:
396 return False
397 try:
398 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
399 for line in archive_file:
a45e8619 400 self.archive.add(line.strip())
ecdec191
JB
401 except IOError as ioe:
402 if ioe.errno != errno.ENOENT:
403 raise
1d74d8d9 404 return False
ecdec191 405 return True
34308b30 406
be5df5ee
S
407 def check_deprecated(param, option, suggestion):
408 if self.params.get(param) is not None:
409 self.report_warning(
410 '%s is deprecated. Use %s instead.' % (option, suggestion))
411 return True
412 return False
413
1de7ea76
JB
414 if self.params.get('verbose'):
415 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
416
ecdec191
JB
417 preload_download_archive(self)
418
be5df5ee 419 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
420 if self.params.get('geo_verification_proxy') is None:
421 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
422
be5df5ee
S
423 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
424 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
425 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
426
0783b09b 427 if params.get('bidi_workaround', False):
1c088fa8
PH
428 try:
429 import pty
430 master, slave = pty.openpty()
003c69a8 431 width = compat_get_terminal_size().columns
1c088fa8
PH
432 if width is None:
433 width_args = []
434 else:
435 width_args = ['-w', str(width)]
5d681e96 436 sp_kwargs = dict(
1c088fa8
PH
437 stdin=subprocess.PIPE,
438 stdout=slave,
439 stderr=self._err_file)
5d681e96
PH
440 try:
441 self._output_process = subprocess.Popen(
442 ['bidiv'] + width_args, **sp_kwargs
443 )
444 except OSError:
5d681e96
PH
445 self._output_process = subprocess.Popen(
446 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
447 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 448 except OSError as ose:
66e7ace1 449 if ose.errno == errno.ENOENT:
6febd1c1 450 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
451 else:
452 raise
0783b09b 453
3089bc74
S
454 if (sys.platform != 'win32'
455 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
456 and not params.get('restrictfilenames', False)):
e9137224 457 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 458 self.report_warning(
6febd1c1 459 'Assuming --restrict-filenames since file system encoding '
1b725173 460 'cannot encode all characters. '
6febd1c1 461 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 462 self.params['restrictfilenames'] = True
34308b30 463
486dd09e
PH
464 if isinstance(params.get('outtmpl'), bytes):
465 self.report_warning(
466 'Parameter outtmpl is bytes, but should be a unicode string. '
467 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
468
dca08720
PH
469 self._setup_opener()
470
3511266b
PH
471 if auto_init:
472 self.print_debug_header()
473 self.add_default_info_extractors()
474
4f026faf
PH
475 for pp_def_raw in self.params.get('postprocessors', []):
476 pp_class = get_postprocessor(pp_def_raw['key'])
477 pp_def = dict(pp_def_raw)
478 del pp_def['key']
479 pp = pp_class(self, **compat_kwargs(pp_def))
480 self.add_post_processor(pp)
481
ab8e5e51
AM
482 for ph in self.params.get('post_hooks', []):
483 self.add_post_hook(ph)
484
71b640cc
PH
485 for ph in self.params.get('progress_hooks', []):
486 self.add_progress_hook(ph)
487
51fb4995
YCH
488 register_socks_protocols()
489
7d4111ed
PH
490 def warn_if_short_id(self, argv):
491 # short YouTube ID starting with dash?
492 idxs = [
493 i for i, a in enumerate(argv)
494 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
495 if idxs:
496 correct_argv = (
cefecac1 497 ['youtube-dlc']
3089bc74
S
498 + [a for i, a in enumerate(argv) if i not in idxs]
499 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
500 )
501 self.report_warning(
502 'Long argument string detected. '
503 'Use -- to separate parameters and URLs, like this:\n%s\n' %
504 args_to_str(correct_argv))
505
8222d8de
JMF
506 def add_info_extractor(self, ie):
507 """Add an InfoExtractor object to the end of the list."""
508 self._ies.append(ie)
e52d7f85
JMF
509 if not isinstance(ie, type):
510 self._ies_instances[ie.ie_key()] = ie
511 ie.set_downloader(self)
8222d8de 512
56c73665
JMF
513 def get_info_extractor(self, ie_key):
514 """
515 Get an instance of an IE with name ie_key, it will try to get one from
516 the _ies list, if there's no instance it will create a new one and add
517 it to the extractor list.
518 """
519 ie = self._ies_instances.get(ie_key)
520 if ie is None:
521 ie = get_info_extractor(ie_key)()
522 self.add_info_extractor(ie)
523 return ie
524
023fa8c4
JMF
525 def add_default_info_extractors(self):
526 """
527 Add the InfoExtractors returned by gen_extractors to the end of the list
528 """
e52d7f85 529 for ie in gen_extractor_classes():
023fa8c4
JMF
530 self.add_info_extractor(ie)
531
8222d8de
JMF
532 def add_post_processor(self, pp):
533 """Add a PostProcessor object to the end of the chain."""
534 self._pps.append(pp)
535 pp.set_downloader(self)
536
ab8e5e51
AM
537 def add_post_hook(self, ph):
538 """Add the post hook"""
539 self._post_hooks.append(ph)
540
933605d7
JMF
541 def add_progress_hook(self, ph):
542 """Add the progress hook (currently only for the file downloader)"""
543 self._progress_hooks.append(ph)
8ab470f1 544
1c088fa8 545 def _bidi_workaround(self, message):
5d681e96 546 if not hasattr(self, '_output_channel'):
1c088fa8
PH
547 return message
548
5d681e96 549 assert hasattr(self, '_output_process')
11b85ce6 550 assert isinstance(message, compat_str)
6febd1c1
PH
551 line_count = message.count('\n') + 1
552 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 553 self._output_process.stdin.flush()
6febd1c1 554 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 555 for _ in range(line_count))
6febd1c1 556 return res[:-len('\n')]
1c088fa8 557
8222d8de 558 def to_screen(self, message, skip_eol=False):
0783b09b
PH
559 """Print message to stdout if not in quiet mode."""
560 return self.to_stdout(message, skip_eol, check_quiet=True)
561
734f90bb 562 def _write_string(self, s, out=None):
b58ddb32 563 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 564
0783b09b 565 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 566 """Print message to stdout if not in quiet mode."""
8bf9319e 567 if self.params.get('logger'):
43afe285 568 self.params['logger'].debug(message)
0783b09b 569 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 570 message = self._bidi_workaround(message)
6febd1c1 571 terminator = ['\n', ''][skip_eol]
8222d8de 572 output = message + terminator
1c088fa8 573
734f90bb 574 self._write_string(output, self._screen_file)
8222d8de
JMF
575
576 def to_stderr(self, message):
577 """Print message to stderr."""
11b85ce6 578 assert isinstance(message, compat_str)
8bf9319e 579 if self.params.get('logger'):
43afe285
IB
580 self.params['logger'].error(message)
581 else:
1c088fa8 582 message = self._bidi_workaround(message)
6febd1c1 583 output = message + '\n'
734f90bb 584 self._write_string(output, self._err_file)
8222d8de 585
1e5b9a95
PH
586 def to_console_title(self, message):
587 if not self.params.get('consoletitle', False):
588 return
4bede0d8
C
589 if compat_os_name == 'nt':
590 if ctypes.windll.kernel32.GetConsoleWindow():
591 # c_wchar_p() might not be necessary if `message` is
592 # already of type unicode()
593 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 594 elif 'TERM' in os.environ:
d9eebbc7 595 self._write_string('\033[0;%s\007' % message, self._screen_file)
1e5b9a95 596
bdde425c
PH
597 def save_console_title(self):
598 if not self.params.get('consoletitle', False):
599 return
94c3442e
S
600 if self.params.get('simulate', False):
601 return
4bede0d8 602 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 603 # Save the title on stack
734f90bb 604 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
605
606 def restore_console_title(self):
607 if not self.params.get('consoletitle', False):
608 return
94c3442e
S
609 if self.params.get('simulate', False):
610 return
4bede0d8 611 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 612 # Restore the title from stack
734f90bb 613 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
614
615 def __enter__(self):
616 self.save_console_title()
617 return self
618
619 def __exit__(self, *args):
620 self.restore_console_title()
f89197d7 621
dca08720 622 if self.params.get('cookiefile') is not None:
1bab3437 623 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 624
8222d8de
JMF
625 def trouble(self, message=None, tb=None):
626 """Determine action to take when a download problem appears.
627
628 Depending on if the downloader has been configured to ignore
629 download errors or not, this method may throw an exception or
630 not when errors are found, after printing the message.
631
632 tb, if given, is additional traceback information.
633 """
634 if message is not None:
635 self.to_stderr(message)
636 if self.params.get('verbose'):
637 if tb is None:
638 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 639 tb = ''
8222d8de 640 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 641 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 642 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
643 else:
644 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 645 tb = ''.join(tb_data)
8222d8de
JMF
646 self.to_stderr(tb)
647 if not self.params.get('ignoreerrors', False):
648 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
649 exc_info = sys.exc_info()[1].exc_info
650 else:
651 exc_info = sys.exc_info()
652 raise DownloadError(message, exc_info)
653 self._download_retcode = 1
654
655 def report_warning(self, message):
656 '''
657 Print the message to stderr, it will be prefixed with 'WARNING:'
658 If stderr is a tty file the 'WARNING:' will be colored
659 '''
6d07ce01
JMF
660 if self.params.get('logger') is not None:
661 self.params['logger'].warning(message)
8222d8de 662 else:
ad8915b7
PH
663 if self.params.get('no_warnings'):
664 return
e9c0cdd3 665 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
666 _msg_header = '\033[0;33mWARNING:\033[0m'
667 else:
668 _msg_header = 'WARNING:'
669 warning_message = '%s %s' % (_msg_header, message)
670 self.to_stderr(warning_message)
8222d8de
JMF
671
672 def report_error(self, message, tb=None):
673 '''
674 Do the same as trouble, but prefixes the message with 'ERROR:', colored
675 in red if stderr is a tty file.
676 '''
e9c0cdd3 677 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 678 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 679 else:
6febd1c1
PH
680 _msg_header = 'ERROR:'
681 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
682 self.trouble(error_message, tb)
683
8222d8de
JMF
684 def report_file_already_downloaded(self, file_name):
685 """Report file has already been fully downloaded."""
686 try:
6febd1c1 687 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 688 except UnicodeEncodeError:
6febd1c1 689 self.to_screen('[download] The file has already been downloaded')
8222d8de 690
0c3d0f51 691 def report_file_delete(self, file_name):
692 """Report that existing file will be deleted."""
693 try:
694 self.to_screen('Deleting already existent file %s' % file_name)
695 except UnicodeEncodeError:
696 self.to_screen('Deleting already existent file')
697
8222d8de
JMF
698 def prepare_filename(self, info_dict):
699 """Generate the output filename."""
700 try:
701 template_dict = dict(info_dict)
702
703 template_dict['epoch'] = int(time.time())
704 autonumber_size = self.params.get('autonumber_size')
705 if autonumber_size is None:
706 autonumber_size = 5
89db639d 707 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
17b75c0d
PH
708 if template_dict.get('resolution') is None:
709 if template_dict.get('width') and template_dict.get('height'):
710 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
711 elif template_dict.get('height'):
805ef3c6 712 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 713 elif template_dict.get('width'):
51ce9117 714 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 715
586a91b6 716 sanitize = lambda k, v: sanitize_filename(
45598aab 717 compat_str(v),
1bb5c511 718 restricted=self.params.get('restrictfilenames'),
40df485f 719 is_id=(k == 'id' or k.endswith('_id')))
d0d9ade4 720 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
45598aab 721 for k, v in template_dict.items()
f0e14fdd 722 if v is not None and not isinstance(v, (list, tuple, dict)))
6febd1c1 723 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 724
b3613d36 725 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
d0d9ade4 726
89db639d
S
727 # For fields playlist_index and autonumber convert all occurrences
728 # of %(field)s to %(field)0Nd for backward compatibility
729 field_size_compat_map = {
730 'playlist_index': len(str(template_dict['n_entries'])),
731 'autonumber': autonumber_size,
732 }
733 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
734 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
735 if mobj:
736 outtmpl = re.sub(
737 FIELD_SIZE_COMPAT_RE,
738 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
739 outtmpl)
740
d0d9ade4
S
741 # Missing numeric fields used together with integer presentation types
742 # in format specification will break the argument substitution since
743 # string 'NA' is returned for missing fields. We will patch output
744 # template for missing fields to meet string presentation type.
c9969434 745 for numeric_field in self._NUMERIC_FIELDS:
d0d9ade4
S
746 if numeric_field not in template_dict:
747 # As of [1] format syntax is:
748 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
749 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
750 FORMAT_RE = r'''(?x)
751 (?<!%)
752 %
753 \({0}\) # mapping key
754 (?:[#0\-+ ]+)? # conversion flags (optional)
755 (?:\d+)? # minimum field width (optional)
756 (?:\.\d+)? # precision (optional)
757 [hlL]? # length modifier (optional)
758 [diouxXeEfFgGcrs%] # conversion type
759 '''
760 outtmpl = re.sub(
761 FORMAT_RE.format(numeric_field),
762 r'%({0})s'.format(numeric_field), outtmpl)
763
15da37c7
S
764 # expand_path translates '%%' into '%' and '$$' into '$'
765 # correspondingly that is not what we want since we need to keep
766 # '%%' intact for template dict substitution step. Working around
767 # with boundary-alike separator hack.
961ea474 768 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
769 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
770
771 # outtmpl should be expand_path'ed before template dict substitution
772 # because meta fields may contain env variables we don't want to
773 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
774 # title "Hello $PATH", we don't want `$PATH` to be expanded.
775 filename = expand_path(outtmpl).replace(sep, '') % template_dict
776
bdc3fd2f
U
777 # https://github.com/blackjack4494/youtube-dlc/issues/85
778 trim_file_name = self.params.get('trim_file_name', False)
779 if trim_file_name:
780 fn_groups = filename.rsplit('.')
781 ext = fn_groups[-1]
782 sub_ext = ''
783 if len(fn_groups) > 2:
784 sub_ext = fn_groups[-2]
785 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
786
3a0d2f52
S
787 # Temporary fix for #4787
788 # 'Treat' all problem characters by passing filename through preferredencoding
789 # to workaround encoding issues with subprocess on python2 @ Windows
790 if sys.version_info < (3, 0) and sys.platform == 'win32':
791 filename = encodeFilename(filename, True).decode(preferredencoding())
b3613d36 792 return sanitize_path(filename)
8222d8de 793 except ValueError as err:
6febd1c1 794 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
795 return None
796
442c37b7 797 def _match_entry(self, info_dict, incomplete):
ecdec191 798 """ Returns None if the file should be downloaded """
8222d8de 799
6febd1c1 800 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
801 if 'title' in info_dict:
802 # This can happen when we're just evaluating the playlist
803 title = info_dict['title']
804 matchtitle = self.params.get('matchtitle', False)
805 if matchtitle:
806 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 807 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
808 rejecttitle = self.params.get('rejecttitle', False)
809 if rejecttitle:
810 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 811 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
d800609c 812 date = info_dict.get('upload_date')
8222d8de
JMF
813 if date is not None:
814 dateRange = self.params.get('daterange', DateRange())
815 if date not in dateRange:
6febd1c1 816 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
d800609c 817 view_count = info_dict.get('view_count')
5fe18bdb
PH
818 if view_count is not None:
819 min_views = self.params.get('min_views')
820 if min_views is not None and view_count < min_views:
6febd1c1 821 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
822 max_views = self.params.get('max_views')
823 if max_views is not None and view_count > max_views:
6febd1c1 824 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 825 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 826 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 827 if self.in_download_archive(info_dict):
6febd1c1 828 return '%s has already been recorded in archive' % video_title
347de493 829
442c37b7
PH
830 if not incomplete:
831 match_filter = self.params.get('match_filter')
832 if match_filter is not None:
833 ret = match_filter(info_dict)
834 if ret is not None:
835 return ret
347de493 836
8222d8de 837 return None
fe7e0c98 838
b6c45014
JMF
839 @staticmethod
840 def add_extra_info(info_dict, extra_info):
841 '''Set the keys from extra_info in info dict if they are missing'''
842 for key, value in extra_info.items():
843 info_dict.setdefault(key, value)
844
0704d222 845 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
61aa5ba3 846 process=True, force_generic_extractor=False):
8222d8de
JMF
847 '''
848 Returns a list with a dictionary for each video we find.
849 If 'download', also downloads the videos.
850 extra_info is a dict containing the extra values to add to each result
613b2d9d 851 '''
fe7e0c98 852
61aa5ba3 853 if not ie_key and force_generic_extractor:
d22dec74
S
854 ie_key = 'Generic'
855
8222d8de 856 if ie_key:
56c73665 857 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
858 else:
859 ies = self._ies
860
861 for ie in ies:
862 if not ie.suitable(url):
863 continue
864
9a68de12 865 ie_key = ie.ie_key()
866 ie = self.get_info_extractor(ie_key)
8222d8de 867 if not ie.working():
6febd1c1
PH
868 self.report_warning('The program functionality for this site has been marked as broken, '
869 'and will probably not work.')
8222d8de
JMF
870
871 try:
a0566bbf 872 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
873 except (AssertionError, IndexError, AttributeError):
874 temp_id = None
875 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
876 self.to_screen("[%s] %s: has already been recorded in archive" % (
877 ie_key, temp_id))
878 break
879
880 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
881
882 else:
883 self.report_error('no suitable InfoExtractor for URL %s' % url)
884
885 def __handle_extraction_exceptions(func):
886 def wrapper(self, *args, **kwargs):
887 try:
888 return func(self, *args, **kwargs)
773f291d
S
889 except GeoRestrictedError as e:
890 msg = e.msg
891 if e.countries:
892 msg += '\nThis video is available in %s.' % ', '.join(
893 map(ISO3166Utils.short2full, e.countries))
894 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
895 self.report_error(msg)
fb043a6e 896 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 897 self.report_error(compat_str(e), e.format_traceback())
d3e5bbf4
PH
898 except MaxDownloadsReached:
899 raise
8222d8de
JMF
900 except Exception as e:
901 if self.params.get('ignoreerrors', False):
9b9c5355 902 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
903 else:
904 raise
a0566bbf 905 return wrapper
906
907 @__handle_extraction_exceptions
908 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
909 ie_result = ie.extract(url)
910 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
911 return
912 if isinstance(ie_result, list):
913 # Backwards compatibility: old IE result format
914 ie_result = {
915 '_type': 'compat_list',
916 'entries': ie_result,
917 }
918 if info_dict:
919 if info_dict.get('id'):
920 ie_result['id'] = info_dict['id']
921 if info_dict.get('title'):
922 ie_result['title'] = info_dict['title']
923 self.add_default_extra_info(ie_result, ie, url)
924 if process:
925 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 926 else:
a0566bbf 927 return ie_result
fe7e0c98 928
ea38e55f
PH
929 def add_default_extra_info(self, ie_result, ie, url):
930 self.add_extra_info(ie_result, {
931 'extractor': ie.IE_NAME,
932 'webpage_url': url,
dbbbe555 933 'duration_string': (
934 formatSeconds(ie_result['duration'], '-')
935 if ie_result.get('duration', None) is not None
936 else None),
ea38e55f
PH
937 'webpage_url_basename': url_basename(url),
938 'extractor_key': ie.ie_key(),
939 })
940
8222d8de
JMF
941 def process_ie_result(self, ie_result, download=True, extra_info={}):
942 """
943 Take the result of the ie(may be modified) and resolve all unresolved
944 references (URLs, playlist items).
945
946 It will also download the videos if 'download'.
947 Returns the resolved ie_result.
948 """
e8ee972c
PH
949 result_type = ie_result.get('_type', 'video')
950
057a5206 951 if result_type in ('url', 'url_transparent'):
134c6ea8 952 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 953 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
954 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
955 or extract_flat is True):
d06daf23
S
956 self.__forced_printings(
957 ie_result, self.prepare_filename(ie_result),
958 incomplete=True)
e8ee972c
PH
959 return ie_result
960
8222d8de 961 if result_type == 'video':
b6c45014 962 self.add_extra_info(ie_result, extra_info)
feee2ecf 963 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
964 elif result_type == 'url':
965 # We have to add extra_info to the results because it may be
966 # contained in a playlist
967 return self.extract_info(ie_result['url'],
0704d222 968 download, info_dict=ie_result,
8222d8de
JMF
969 ie_key=ie_result.get('ie_key'),
970 extra_info=extra_info)
7fc3fa05
PH
971 elif result_type == 'url_transparent':
972 # Use the information from the embedding page
973 info = self.extract_info(
974 ie_result['url'], ie_key=ie_result.get('ie_key'),
975 extra_info=extra_info, download=False, process=False)
976
1640eb09
S
977 # extract_info may return None when ignoreerrors is enabled and
978 # extraction failed with an error, don't crash and return early
979 # in this case
980 if not info:
981 return info
982
412c617d
PH
983 force_properties = dict(
984 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 985 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
986 if f in force_properties:
987 del force_properties[f]
988 new_result = info.copy()
989 new_result.update(force_properties)
7fc3fa05 990
0563f7ac
S
991 # Extracted info may not be a video result (i.e.
992 # info.get('_type', 'video') != video) but rather an url or
993 # url_transparent. In such cases outer metadata (from ie_result)
994 # should be propagated to inner one (info). For this to happen
995 # _type of info should be overridden with url_transparent. This
067aa17e 996 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
997 if new_result.get('_type') == 'url':
998 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
999
1000 return self.process_ie_result(
1001 new_result, download=download, extra_info=extra_info)
40fcba5e 1002 elif result_type in ('playlist', 'multi_video'):
8222d8de 1003 # We process each entry in the playlist
d800609c 1004 playlist = ie_result.get('title') or ie_result.get('id')
6febd1c1 1005 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
1006
1007 playlist_results = []
1008
8222d8de 1009 playliststart = self.params.get('playliststart', 1) - 1
d800609c 1010 playlistend = self.params.get('playlistend')
a19fd00c 1011 # For backwards compatibility, interpret -1 as whole list
8222d8de 1012 if playlistend == -1:
a19fd00c 1013 playlistend = None
8222d8de 1014
d800609c 1015 playlistitems_str = self.params.get('playlist_items')
c14e88f0
PH
1016 playlistitems = None
1017 if playlistitems_str is not None:
1018 def iter_playlistitems(format):
1019 for string_segment in format.split(','):
1020 if '-' in string_segment:
1021 start, end = string_segment.split('-')
1022 for item in range(int(start), int(end) + 1):
1023 yield int(item)
1024 else:
1025 yield int(string_segment)
cd6fc19e 1026 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
c14e88f0 1027
b82f815f 1028 ie_entries = ie_result['entries']
7e85e872
S
1029
1030 def make_playlistitems_entries(list_ie_entries):
1031 num_entries = len(list_ie_entries)
1032 return [
1033 list_ie_entries[i - 1] for i in playlistitems
1034 if -num_entries <= i - 1 < num_entries]
1035
1036 def report_download(num_entries):
1037 self.to_screen(
1038 '[%s] playlist %s: Downloading %d videos' %
1039 (ie_result['extractor'], playlist, num_entries))
1040
b82f815f
PH
1041 if isinstance(ie_entries, list):
1042 n_all_entries = len(ie_entries)
c14e88f0 1043 if playlistitems:
7e85e872 1044 entries = make_playlistitems_entries(ie_entries)
c14e88f0
PH
1045 else:
1046 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
1047 n_entries = len(entries)
1048 self.to_screen(
611c1dd9 1049 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
b7ab0590 1050 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 1051 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
1052 if playlistitems:
1053 entries = []
1054 for item in playlistitems:
1055 entries.extend(ie_entries.getslice(
1056 item - 1, item
1057 ))
1058 else:
1059 entries = ie_entries.getslice(
1060 playliststart, playlistend)
b7ab0590 1061 n_entries = len(entries)
7e85e872 1062 report_download(n_entries)
b82f815f 1063 else: # iterable
c14e88f0 1064 if playlistitems:
5871ebac
S
1065 entries = make_playlistitems_entries(list(itertools.islice(
1066 ie_entries, 0, max(playlistitems))))
c14e88f0
PH
1067 else:
1068 entries = list(itertools.islice(
1069 ie_entries, playliststart, playlistend))
b82f815f 1070 n_entries = len(entries)
7e85e872 1071 report_download(n_entries)
8222d8de 1072
ff815fe6
MS
1073 if self.params.get('playlistreverse', False):
1074 entries = entries[::-1]
1075
75822ca7
TC
1076 if self.params.get('playlistrandom', False):
1077 random.shuffle(entries)
1078
0016b84e
S
1079 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1080
fe7e0c98 1081 for i, entry in enumerate(entries, 1):
734ea11e 1082 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
0016b84e
S
1083 # This __x_forwarded_for_ip thing is a bit ugly but requires
1084 # minimal changes
1085 if x_forwarded_for:
1086 entry['__x_forwarded_for_ip'] = x_forwarded_for
8222d8de 1087 extra = {
c6b4132a 1088 'n_entries': n_entries,
fe7e0c98 1089 'playlist': playlist,
a1cf99d0
PH
1090 'playlist_id': ie_result.get('id'),
1091 'playlist_title': ie_result.get('title'),
3961c6cb
S
1092 'playlist_uploader': ie_result.get('uploader'),
1093 'playlist_uploader_id': ie_result.get('uploader_id'),
de1121d7 1094 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
b6c45014 1095 'extractor': ie_result['extractor'],
9103bbc5 1096 'webpage_url': ie_result['webpage_url'],
29eb5174 1097 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1098 'extractor_key': ie_result['extractor_key'],
fe7e0c98 1099 }
7012b23c 1100
442c37b7 1101 reason = self._match_entry(entry, incomplete=True)
7012b23c 1102 if reason is not None:
ea6e0c2b 1103 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1104 print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1105 break
1106 else:
1107 self.to_screen('[download] ' + reason)
1108 continue
7012b23c 1109
a0566bbf 1110 entry_result = self.__process_iterable_entry(entry, download, extra)
1111 # TODO: skip failed (empty) entries?
8222d8de
JMF
1112 playlist_results.append(entry_result)
1113 ie_result['entries'] = playlist_results
371c3b79 1114 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
8222d8de
JMF
1115 return ie_result
1116 elif result_type == 'compat_list':
c9bf4114
PH
1117 self.report_warning(
1118 'Extractor %s returned a compat_list result. '
1119 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1120
8222d8de 1121 def _fixup(r):
9e1a5b84
JW
1122 self.add_extra_info(
1123 r,
9103bbc5
JMF
1124 {
1125 'extractor': ie_result['extractor'],
1126 'webpage_url': ie_result['webpage_url'],
29eb5174 1127 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1128 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1129 }
1130 )
8222d8de
JMF
1131 return r
1132 ie_result['entries'] = [
b6c45014 1133 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1134 for r in ie_result['entries']
1135 ]
1136 return ie_result
1137 else:
1138 raise Exception('Invalid result type: %s' % result_type)
1139
a0566bbf 1140 @__handle_extraction_exceptions
1141 def __process_iterable_entry(self, entry, download, extra_info):
1142 return self.process_ie_result(
1143 entry, download=download, extra_info=extra_info)
1144
67134eab
JMF
1145 def _build_format_filter(self, filter_spec):
1146 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1147
1148 OPERATORS = {
1149 '<': operator.lt,
1150 '<=': operator.le,
1151 '>': operator.gt,
1152 '>=': operator.ge,
1153 '=': operator.eq,
1154 '!=': operator.ne,
1155 }
67134eab 1156 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1157 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1158 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1159 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1160 $
083c9df9 1161 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1162 m = operator_rex.search(filter_spec)
9ddb6925
S
1163 if m:
1164 try:
1165 comparison_value = int(m.group('value'))
1166 except ValueError:
1167 comparison_value = parse_filesize(m.group('value'))
1168 if comparison_value is None:
1169 comparison_value = parse_filesize(m.group('value') + 'B')
1170 if comparison_value is None:
1171 raise ValueError(
1172 'Invalid value %r in format specification %r' % (
67134eab 1173 m.group('value'), filter_spec))
9ddb6925
S
1174 op = OPERATORS[m.group('op')]
1175
083c9df9 1176 if not m:
9ddb6925
S
1177 STR_OPERATORS = {
1178 '=': operator.eq,
10d33b34
YCH
1179 '^=': lambda attr, value: attr.startswith(value),
1180 '$=': lambda attr, value: attr.endswith(value),
1181 '*=': lambda attr, value: value in attr,
9ddb6925 1182 }
67134eab 1183 str_operator_rex = re.compile(r'''(?x)
f96bff99 1184 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1185 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1186 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1187 \s*$
9ddb6925 1188 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1189 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1190 if m:
1191 comparison_value = m.group('value')
2cc779f4
S
1192 str_op = STR_OPERATORS[m.group('op')]
1193 if m.group('negation'):
e118a879 1194 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1195 else:
1196 op = str_op
083c9df9 1197
9ddb6925 1198 if not m:
67134eab 1199 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1200
1201 def _filter(f):
1202 actual_value = f.get(m.group('key'))
1203 if actual_value is None:
1204 return m.group('none_inclusive')
1205 return op(actual_value, comparison_value)
67134eab
JMF
1206 return _filter
1207
0017d9ad 1208 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1209
af0f7428
S
1210 def can_merge():
1211 merger = FFmpegMergerPP(self)
1212 return merger.available and merger.can_merge()
1213
91ebc640 1214 prefer_best = (
1215 not self.params.get('simulate', False)
1216 and download
1217 and (
1218 not can_merge()
19807826 1219 or info_dict.get('is_live', False)
91ebc640 1220 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1221
1222 return (
1223 'best/bestvideo+bestaudio'
1224 if prefer_best
1225 else 'bestvideo*+bestaudio/best'
19807826 1226 if not self.params.get('allow_multiple_audio_streams', False)
91ebc640 1227 else 'bestvideo+bestaudio/best')
0017d9ad 1228
67134eab
JMF
1229 def build_format_selector(self, format_spec):
1230 def syntax_error(note, start):
1231 message = (
1232 'Invalid format specification: '
1233 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1234 return SyntaxError(message)
1235
1236 PICKFIRST = 'PICKFIRST'
1237 MERGE = 'MERGE'
1238 SINGLE = 'SINGLE'
0130afb7 1239 GROUP = 'GROUP'
67134eab
JMF
1240 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1241
91ebc640 1242 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1243 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1244
67134eab
JMF
1245 def _parse_filter(tokens):
1246 filter_parts = []
1247 for type, string, start, _, _ in tokens:
1248 if type == tokenize.OP and string == ']':
1249 return ''.join(filter_parts)
1250 else:
1251 filter_parts.append(string)
1252
232541df 1253 def _remove_unused_ops(tokens):
17cc1534 1254 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1255 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1256 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1257 last_string, last_start, last_end, last_line = None, None, None, None
1258 for type, string, start, end, line in tokens:
1259 if type == tokenize.OP and string == '[':
1260 if last_string:
1261 yield tokenize.NAME, last_string, last_start, last_end, last_line
1262 last_string = None
1263 yield type, string, start, end, line
1264 # everything inside brackets will be handled by _parse_filter
1265 for type, string, start, end, line in tokens:
1266 yield type, string, start, end, line
1267 if type == tokenize.OP and string == ']':
1268 break
1269 elif type == tokenize.OP and string in ALLOWED_OPS:
1270 if last_string:
1271 yield tokenize.NAME, last_string, last_start, last_end, last_line
1272 last_string = None
1273 yield type, string, start, end, line
1274 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1275 if not last_string:
1276 last_string = string
1277 last_start = start
1278 last_end = end
1279 else:
1280 last_string += string
1281 if last_string:
1282 yield tokenize.NAME, last_string, last_start, last_end, last_line
1283
cf2ac6df 1284 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1285 selectors = []
1286 current_selector = None
1287 for type, string, start, _, _ in tokens:
1288 # ENCODING is only defined in python 3.x
1289 if type == getattr(tokenize, 'ENCODING', None):
1290 continue
1291 elif type in [tokenize.NAME, tokenize.NUMBER]:
1292 current_selector = FormatSelector(SINGLE, string, [])
1293 elif type == tokenize.OP:
cf2ac6df
JMF
1294 if string == ')':
1295 if not inside_group:
1296 # ')' will be handled by the parentheses group
1297 tokens.restore_last_token()
67134eab 1298 break
cf2ac6df 1299 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1300 tokens.restore_last_token()
1301 break
cf2ac6df
JMF
1302 elif inside_choice and string == ',':
1303 tokens.restore_last_token()
1304 break
1305 elif string == ',':
0a31a350
JMF
1306 if not current_selector:
1307 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1308 selectors.append(current_selector)
1309 current_selector = None
1310 elif string == '/':
d96d604e
JMF
1311 if not current_selector:
1312 raise syntax_error('"/" must follow a format selector', start)
67134eab 1313 first_choice = current_selector
cf2ac6df 1314 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1315 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1316 elif string == '[':
1317 if not current_selector:
1318 current_selector = FormatSelector(SINGLE, 'best', [])
1319 format_filter = _parse_filter(tokens)
1320 current_selector.filters.append(format_filter)
0130afb7
JMF
1321 elif string == '(':
1322 if current_selector:
1323 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1324 group = _parse_format_selection(tokens, inside_group=True)
1325 current_selector = FormatSelector(GROUP, group, [])
67134eab 1326 elif string == '+':
d03cfdce 1327 if not current_selector:
1328 raise syntax_error('Unexpected "+"', start)
1329 selector_1 = current_selector
1330 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1331 if not selector_2:
1332 raise syntax_error('Expected a selector', start)
1333 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1334 else:
1335 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1336 elif type == tokenize.ENDMARKER:
1337 break
1338 if current_selector:
1339 selectors.append(current_selector)
1340 return selectors
1341
1342 def _build_selector_function(selector):
909d24dd 1343 if isinstance(selector, list): # ,
67134eab
JMF
1344 fs = [_build_selector_function(s) for s in selector]
1345
317f7ab6 1346 def selector_function(ctx):
67134eab 1347 for f in fs:
317f7ab6 1348 for format in f(ctx):
67134eab
JMF
1349 yield format
1350 return selector_function
909d24dd 1351
1352 elif selector.type == GROUP: # ()
0130afb7 1353 selector_function = _build_selector_function(selector.selector)
909d24dd 1354
1355 elif selector.type == PICKFIRST: # /
67134eab
JMF
1356 fs = [_build_selector_function(s) for s in selector.selector]
1357
317f7ab6 1358 def selector_function(ctx):
67134eab 1359 for f in fs:
317f7ab6 1360 picked_formats = list(f(ctx))
67134eab
JMF
1361 if picked_formats:
1362 return picked_formats
1363 return []
67134eab 1364
909d24dd 1365 elif selector.type == SINGLE: # atom
1366 format_spec = selector.selector if selector.selector is not None else 'best'
1367
1368 if format_spec == 'all':
1369 def selector_function(ctx):
1370 formats = list(ctx['formats'])
1371 if formats:
1372 for f in formats:
1373 yield f
1374
1375 else:
1376 format_fallback = False
1377 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1378 if format_spec_obj is not None:
1379 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1380 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1381 not_format_type = 'v' if format_type == 'a' else 'a'
1382 format_modified = format_spec_obj.group(3) is not None
1383
1384 format_fallback = not format_type and not format_modified # for b, w
1385 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1386 if format_type and format_modified # bv*, ba*, wv*, wa*
1387 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1388 if format_type # bv, ba, wv, wa
1389 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1390 if not format_modified # b, w
1391 else None) # b*, w*
67134eab 1392 else:
909d24dd 1393 format_idx = -1
1394 filter_f = ((lambda f: f.get('ext') == format_spec)
1395 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1396 else (lambda f: f.get('format_id') == format_spec)) # id
1397
1398 def selector_function(ctx):
1399 formats = list(ctx['formats'])
1400 if not formats:
1401 return
1402 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
67134eab 1403 if matches:
909d24dd 1404 yield matches[format_idx]
1405 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1406 # for extractors with incomplete formats (audio only (soundcloud)
1407 # or video only (imgur)) best/worst will fallback to
1408 # best/worst {video,audio}-only format
1409 yield formats[format_idx]
1410
1411 elif selector.type == MERGE: # +
d03cfdce 1412 def _merge(formats_pair):
1413 format_1, format_2 = formats_pair
1414
1415 formats_info = []
1416 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1417 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1418
909d24dd 1419 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1420 get_no_more = {"video": False, "audio": False}
1421 for (i, fmt_info) in enumerate(formats_info):
1422 for aud_vid in ["audio", "video"]:
1423 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1424 if get_no_more[aud_vid]:
1425 formats_info.pop(i)
1426 get_no_more[aud_vid] = True
1427
1428 if len(formats_info) == 1:
1429 return formats_info[0]
1430
d03cfdce 1431 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1432 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1433
1434 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1435 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1436
1437 output_ext = self.params.get('merge_output_format')
1438 if not output_ext:
1439 if the_only_video:
1440 output_ext = the_only_video['ext']
1441 elif the_only_audio and not video_fmts:
1442 output_ext = the_only_audio['ext']
1443 else:
1444 output_ext = 'mkv'
1445
1446 new_dict = {
67134eab 1447 'requested_formats': formats_info,
d03cfdce 1448 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1449 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
67134eab
JMF
1450 'ext': output_ext,
1451 }
d03cfdce 1452
1453 if the_only_video:
1454 new_dict.update({
1455 'width': the_only_video.get('width'),
1456 'height': the_only_video.get('height'),
1457 'resolution': the_only_video.get('resolution'),
1458 'fps': the_only_video.get('fps'),
1459 'vcodec': the_only_video.get('vcodec'),
1460 'vbr': the_only_video.get('vbr'),
1461 'stretched_ratio': the_only_video.get('stretched_ratio'),
1462 })
1463
1464 if the_only_audio:
1465 new_dict.update({
1466 'acodec': the_only_audio.get('acodec'),
1467 'abr': the_only_audio.get('abr'),
1468 })
1469
1470 return new_dict
1471
1472 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1473
317f7ab6
S
1474 def selector_function(ctx):
1475 for pair in itertools.product(
d03cfdce 1476 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1477 yield _merge(pair)
083c9df9 1478
67134eab 1479 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1480
317f7ab6
S
1481 def final_selector(ctx):
1482 ctx_copy = copy.deepcopy(ctx)
67134eab 1483 for _filter in filters:
317f7ab6
S
1484 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1485 return selector_function(ctx_copy)
67134eab 1486 return final_selector
083c9df9 1487
67134eab 1488 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1489 try:
232541df 1490 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1491 except tokenize.TokenError:
1492 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1493
1494 class TokenIterator(object):
1495 def __init__(self, tokens):
1496 self.tokens = tokens
1497 self.counter = 0
1498
1499 def __iter__(self):
1500 return self
1501
1502 def __next__(self):
1503 if self.counter >= len(self.tokens):
1504 raise StopIteration()
1505 value = self.tokens[self.counter]
1506 self.counter += 1
1507 return value
1508
1509 next = __next__
1510
1511 def restore_last_token(self):
1512 self.counter -= 1
1513
1514 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1515 return _build_selector_function(parsed_selector)
a9c58ad9 1516
e5660ee6
JMF
1517 def _calc_headers(self, info_dict):
1518 res = std_headers.copy()
1519
1520 add_headers = info_dict.get('http_headers')
1521 if add_headers:
1522 res.update(add_headers)
1523
1524 cookies = self._calc_cookies(info_dict)
1525 if cookies:
1526 res['Cookie'] = cookies
1527
0016b84e
S
1528 if 'X-Forwarded-For' not in res:
1529 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1530 if x_forwarded_for_ip:
1531 res['X-Forwarded-For'] = x_forwarded_for_ip
1532
e5660ee6
JMF
1533 return res
1534
1535 def _calc_cookies(self, info_dict):
5c2266df 1536 pr = sanitized_Request(info_dict['url'])
e5660ee6 1537 self.cookiejar.add_cookie_header(pr)
662435f7 1538 return pr.get_header('Cookie')
e5660ee6 1539
dd82ffea
JMF
1540 def process_video_result(self, info_dict, download=True):
1541 assert info_dict.get('_type', 'video') == 'video'
1542
bec1fad2
PH
1543 if 'id' not in info_dict:
1544 raise ExtractorError('Missing "id" field in extractor result')
1545 if 'title' not in info_dict:
1546 raise ExtractorError('Missing "title" field in extractor result')
1547
c9969434
S
1548 def report_force_conversion(field, field_not, conversion):
1549 self.report_warning(
1550 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1551 % (field, field_not, conversion))
1552
1553 def sanitize_string_field(info, string_field):
1554 field = info.get(string_field)
1555 if field is None or isinstance(field, compat_str):
1556 return
1557 report_force_conversion(string_field, 'a string', 'string')
1558 info[string_field] = compat_str(field)
1559
1560 def sanitize_numeric_fields(info):
1561 for numeric_field in self._NUMERIC_FIELDS:
1562 field = info.get(numeric_field)
1563 if field is None or isinstance(field, compat_numeric_types):
1564 continue
1565 report_force_conversion(numeric_field, 'numeric', 'int')
1566 info[numeric_field] = int_or_none(field)
1567
1568 sanitize_string_field(info_dict, 'id')
1569 sanitize_numeric_fields(info_dict)
be6217b2 1570
dd82ffea
JMF
1571 if 'playlist' not in info_dict:
1572 # It isn't part of a playlist
1573 info_dict['playlist'] = None
1574 info_dict['playlist_index'] = None
1575
d5519808 1576 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1577 if thumbnails is None:
1578 thumbnail = info_dict.get('thumbnail')
1579 if thumbnail:
a7a14d95 1580 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1581 if thumbnails:
be6d7229 1582 thumbnails.sort(key=lambda t: (
d37708fc
RA
1583 t.get('preference') if t.get('preference') is not None else -1,
1584 t.get('width') if t.get('width') is not None else -1,
1585 t.get('height') if t.get('height') is not None else -1,
1586 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1587 for i, t in enumerate(thumbnails):
dcf77cf1 1588 t['url'] = sanitize_url(t['url'])
9603e8a7 1589 if t.get('width') and t.get('height'):
d5519808 1590 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1591 if t.get('id') is None:
1592 t['id'] = '%d' % i
d5519808 1593
b7b72db9 1594 if self.params.get('list_thumbnails'):
1595 self.list_thumbnails(info_dict)
1596 return
1597
536a55da
S
1598 thumbnail = info_dict.get('thumbnail')
1599 if thumbnail:
1600 info_dict['thumbnail'] = sanitize_url(thumbnail)
1601 elif thumbnails:
d5519808
PH
1602 info_dict['thumbnail'] = thumbnails[-1]['url']
1603
c9ae7b95 1604 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1605 info_dict['display_id'] = info_dict['id']
1606
955c4514 1607 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1608 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1609 # see http://bugs.python.org/issue1646728)
1610 try:
1611 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1612 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1613 except (ValueError, OverflowError, OSError):
1614 pass
9d2ecdbc 1615
33d2fc2f
S
1616 # Auto generate title fields corresponding to the *_number fields when missing
1617 # in order to always have clean titles. This is very common for TV series.
1618 for field in ('chapter', 'season', 'episode'):
1619 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1620 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1621
05108a49
S
1622 for cc_kind in ('subtitles', 'automatic_captions'):
1623 cc = info_dict.get(cc_kind)
1624 if cc:
1625 for _, subtitle in cc.items():
1626 for subtitle_format in subtitle:
1627 if subtitle_format.get('url'):
1628 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1629 if subtitle_format.get('ext') is None:
1630 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1631
1632 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1633 subtitles = info_dict.get('subtitles')
4bba3716 1634
a504ced0 1635 if self.params.get('listsubtitles', False):
360e1ca5 1636 if 'automatic_captions' in info_dict:
05108a49
S
1637 self.list_subtitles(
1638 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1639 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1640 return
05108a49 1641
360e1ca5 1642 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1643 info_dict['id'], subtitles, automatic_captions)
a504ced0 1644
dd82ffea
JMF
1645 # We now pick which formats have to be downloaded
1646 if info_dict.get('formats') is None:
1647 # There's only one format available
1648 formats = [info_dict]
1649 else:
1650 formats = info_dict['formats']
1651
db95dc13
PH
1652 if not formats:
1653 raise ExtractorError('No video formats found!')
1654
73af5cc8
S
1655 def is_wellformed(f):
1656 url = f.get('url')
a5ac0c47 1657 if not url:
73af5cc8
S
1658 self.report_warning(
1659 '"url" field is missing or empty - skipping format, '
1660 'there is an error in extractor')
a5ac0c47
S
1661 return False
1662 if isinstance(url, bytes):
1663 sanitize_string_field(f, 'url')
1664 return True
73af5cc8
S
1665
1666 # Filter out malformed formats for better extraction robustness
1667 formats = list(filter(is_wellformed, formats))
1668
181c7053
S
1669 formats_dict = {}
1670
dd82ffea 1671 # We check that all the formats have the format and format_id fields
db95dc13 1672 for i, format in enumerate(formats):
c9969434
S
1673 sanitize_string_field(format, 'format_id')
1674 sanitize_numeric_fields(format)
dcf77cf1 1675 format['url'] = sanitize_url(format['url'])
e74e3b63 1676 if not format.get('format_id'):
8016c922 1677 format['format_id'] = compat_str(i)
e2effb08
S
1678 else:
1679 # Sanitize format_id from characters used in format selector expression
ec85ded8 1680 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1681 format_id = format['format_id']
1682 if format_id not in formats_dict:
1683 formats_dict[format_id] = []
1684 formats_dict[format_id].append(format)
1685
1686 # Make sure all formats have unique format_id
1687 for format_id, ambiguous_formats in formats_dict.items():
1688 if len(ambiguous_formats) > 1:
1689 for i, format in enumerate(ambiguous_formats):
1690 format['format_id'] = '%s-%d' % (format_id, i)
1691
1692 for i, format in enumerate(formats):
8c51aa65 1693 if format.get('format') is None:
6febd1c1 1694 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1695 id=format['format_id'],
1696 res=self.format_resolution(format),
6febd1c1 1697 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1698 )
c1002e96 1699 # Automatically determine file extension if missing
5b1d8575 1700 if format.get('ext') is None:
cce929ea 1701 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1702 # Automatically determine protocol if missing (useful for format
1703 # selection purposes)
6f0be937 1704 if format.get('protocol') is None:
b5559424 1705 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1706 # Add HTTP headers, so that external programs can use them from the
1707 # json output
1708 full_format_info = info_dict.copy()
1709 full_format_info.update(format)
1710 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1711 # Remove private housekeeping stuff
1712 if '__x_forwarded_for_ip' in info_dict:
1713 del info_dict['__x_forwarded_for_ip']
dd82ffea 1714
4bcc7bd1 1715 # TODO Central sorting goes here
99e206d5 1716
f89197d7 1717 if formats[0] is not info_dict:
b3d9ef88
JMF
1718 # only set the 'formats' fields if the original info_dict list them
1719 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1720 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1721 # which can't be exported to json
b3d9ef88 1722 info_dict['formats'] = formats
cfb56d1a 1723 if self.params.get('listformats'):
bfaae0a7 1724 self.list_formats(info_dict)
1725 return
1726
de3ef3ed 1727 req_format = self.params.get('format')
a9c58ad9 1728 if req_format is None:
0017d9ad
S
1729 req_format = self._default_format_spec(info_dict, download=download)
1730 if self.params.get('verbose'):
29f7c58a 1731 self._write_string('[debug] Default format spec: %s\n' % req_format)
0017d9ad 1732
5acfa126 1733 format_selector = self.build_format_selector(req_format)
317f7ab6
S
1734
1735 # While in format selection we may need to have an access to the original
1736 # format set in order to calculate some metrics or do some processing.
1737 # For now we need to be able to guess whether original formats provided
1738 # by extractor are incomplete or not (i.e. whether extractor provides only
1739 # video-only or audio-only formats) for proper formats selection for
1740 # extractors with such incomplete formats (see
067aa17e 1741 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
1742 # Since formats may be filtered during format selection and may not match
1743 # the original formats the results may be incorrect. Thus original formats
1744 # or pre-calculated metrics should be passed to format selection routines
1745 # as well.
1746 # We will pass a context object containing all necessary additional data
1747 # instead of just formats.
1748 # This fixes incorrect format selection issue (see
067aa17e 1749 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 1750 incomplete_formats = (
317f7ab6 1751 # All formats are video-only or
3089bc74 1752 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 1753 # all formats are audio-only
3089bc74 1754 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
1755
1756 ctx = {
1757 'formats': formats,
1758 'incomplete_formats': incomplete_formats,
1759 }
1760
1761 formats_to_download = list(format_selector(ctx))
dd82ffea 1762 if not formats_to_download:
6febd1c1 1763 raise ExtractorError('requested format not available',
78a3a9f8 1764 expected=True)
dd82ffea
JMF
1765
1766 if download:
909d24dd 1767 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
dd82ffea 1768 if len(formats_to_download) > 1:
6febd1c1 1769 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1770 for format in formats_to_download:
1771 new_info = dict(info_dict)
1772 new_info.update(format)
1773 self.process_info(new_info)
1774 # We update the info dict with the best quality format (backwards compatibility)
1775 info_dict.update(formats_to_download[-1])
1776 return info_dict
1777
98c70d6f 1778 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1779 """Select the requested subtitles and their format"""
98c70d6f
JMF
1780 available_subs = {}
1781 if normal_subtitles and self.params.get('writesubtitles'):
1782 available_subs.update(normal_subtitles)
1783 if automatic_captions and self.params.get('writeautomaticsub'):
1784 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1785 if lang not in available_subs:
1786 available_subs[lang] = cap_info
1787
4d171848
JMF
1788 if (not self.params.get('writesubtitles') and not
1789 self.params.get('writeautomaticsub') or not
1790 available_subs):
1791 return None
a504ced0
JMF
1792
1793 if self.params.get('allsubtitles', False):
1794 requested_langs = available_subs.keys()
1795 else:
1796 if self.params.get('subtitleslangs', False):
1797 requested_langs = self.params.get('subtitleslangs')
1798 elif 'en' in available_subs:
1799 requested_langs = ['en']
1800 else:
1801 requested_langs = [list(available_subs.keys())[0]]
1802
1803 formats_query = self.params.get('subtitlesformat', 'best')
1804 formats_preference = formats_query.split('/') if formats_query else []
1805 subs = {}
1806 for lang in requested_langs:
1807 formats = available_subs.get(lang)
1808 if formats is None:
1809 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1810 continue
a504ced0
JMF
1811 for ext in formats_preference:
1812 if ext == 'best':
1813 f = formats[-1]
1814 break
1815 matches = list(filter(lambda f: f['ext'] == ext, formats))
1816 if matches:
1817 f = matches[-1]
1818 break
1819 else:
1820 f = formats[-1]
1821 self.report_warning(
1822 'No subtitle format found matching "%s" for language %s, '
1823 'using %s' % (formats_query, lang, f['ext']))
1824 subs[lang] = f
1825 return subs
1826
d06daf23
S
1827 def __forced_printings(self, info_dict, filename, incomplete):
1828 def print_mandatory(field):
1829 if (self.params.get('force%s' % field, False)
1830 and (not incomplete or info_dict.get(field) is not None)):
1831 self.to_stdout(info_dict[field])
1832
1833 def print_optional(field):
1834 if (self.params.get('force%s' % field, False)
1835 and info_dict.get(field) is not None):
1836 self.to_stdout(info_dict[field])
1837
1838 print_mandatory('title')
1839 print_mandatory('id')
1840 if self.params.get('forceurl', False) and not incomplete:
1841 if info_dict.get('requested_formats') is not None:
1842 for f in info_dict['requested_formats']:
1843 self.to_stdout(f['url'] + f.get('play_path', ''))
1844 else:
1845 # For RTMP URLs, also include the playpath
1846 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1847 print_optional('thumbnail')
1848 print_optional('description')
1849 if self.params.get('forcefilename', False) and filename is not None:
1850 self.to_stdout(filename)
1851 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1852 self.to_stdout(formatSeconds(info_dict['duration']))
1853 print_mandatory('format')
1854 if self.params.get('forcejson', False):
1855 self.to_stdout(json.dumps(info_dict))
1856
8222d8de
JMF
1857 def process_info(self, info_dict):
1858 """Process a single resolved IE result."""
1859
1860 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1861
1862 max_downloads = self.params.get('max_downloads')
1863 if max_downloads is not None:
1864 if self._num_downloads >= int(max_downloads):
1865 raise MaxDownloadsReached()
8222d8de 1866
d06daf23 1867 # TODO: backward compatibility, to be removed
8222d8de 1868 info_dict['fulltitle'] = info_dict['title']
8222d8de 1869
11b85ce6 1870 if 'format' not in info_dict:
8222d8de
JMF
1871 info_dict['format'] = info_dict['ext']
1872
442c37b7 1873 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1874 if reason is not None:
6febd1c1 1875 self.to_screen('[download] ' + reason)
8222d8de
JMF
1876 return
1877
fd288278 1878 self._num_downloads += 1
8222d8de 1879
e72c7e41 1880 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1881
1882 # Forced printings
d06daf23 1883 self.__forced_printings(info_dict, filename, incomplete=False)
8222d8de 1884
8222d8de 1885 if self.params.get('simulate', False):
2d30509f 1886 if self.params.get('force_write_download_archive', False):
1887 self.record_download_archive(info_dict)
1888
1889 # Do nothing else if in simulate mode
8222d8de
JMF
1890 return
1891
1892 if filename is None:
1893 return
1894
c5c9bf0c
S
1895 def ensure_dir_exists(path):
1896 try:
1897 dn = os.path.dirname(path)
1898 if dn and not os.path.exists(dn):
1899 os.makedirs(dn)
1900 return True
1901 except (OSError, IOError) as err:
1902 self.report_error('unable to create directory ' + error_to_compat_str(err))
1903 return False
1904
1905 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
8222d8de
JMF
1906 return
1907
1908 if self.params.get('writedescription', False):
2699da80 1909 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
0c3d0f51 1910 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1911 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1912 elif info_dict.get('description') is None:
1913 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1914 else:
1915 try:
6febd1c1 1916 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1917 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1918 descfile.write(info_dict['description'])
7b6fefc9 1919 except (OSError, IOError):
6febd1c1 1920 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1921 return
8222d8de 1922
1fb07d10 1923 if self.params.get('writeannotations', False):
98727e12 1924 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
0c3d0f51 1925 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1926 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
1927 elif not info_dict.get('annotations'):
1928 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
1929 else:
1930 try:
6febd1c1 1931 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1932 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1933 annofile.write(info_dict['annotations'])
1934 except (KeyError, TypeError):
6febd1c1 1935 self.report_warning('There are no annotations to write.')
7b6fefc9 1936 except (OSError, IOError):
6febd1c1 1937 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1938 return
1fb07d10 1939
9f448fcb 1940 def dl(name, info, subtitle=False):
98b69821 1941 fd = get_suitable_downloader(info, self.params)(self, self.params)
1942 for ph in self._progress_hooks:
1943 fd.add_progress_hook(ph)
1944 if self.params.get('verbose'):
29f7c58a 1945 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
9f448fcb 1946 return fd.download(name, info, subtitle)
98b69821 1947
c4a91be7 1948 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1949 self.params.get('writeautomaticsub')])
c4a91be7 1950
c84dd8a9 1951 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1952 # subtitles download errors are already managed as troubles in relevant IE
1953 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1954 subtitles = info_dict['requested_subtitles']
fa57af1e 1955 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1956 for sub_lang, sub_info in subtitles.items():
1957 sub_format = sub_info['ext']
824fa511 1958 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 1959 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 1960 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
a504ced0 1961 else:
0c9df79e 1962 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
1963 if sub_info.get('data') is not None:
1964 try:
1965 # Use newline='' to prevent conversion of newline characters
067aa17e 1966 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
1967 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1968 subfile.write(sub_info['data'])
1969 except (OSError, IOError):
1970 self.report_error('Cannot write subtitles file ' + sub_filename)
1971 return
7b6fefc9 1972 else:
5ff1bc0c 1973 try:
9f448fcb
U
1974 dl(sub_filename, sub_info, subtitle=True)
1975 '''
0c9df79e
U
1976 if self.params.get('sleep_interval_subtitles', False):
1977 dl(sub_filename, sub_info)
1978 else:
1979 sub_data = ie._request_webpage(
1980 sub_info['url'], info_dict['id'], note=False).read()
1981 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1982 subfile.write(sub_data)
9f448fcb 1983 '''
0c9df79e 1984 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
5ff1bc0c
RA
1985 self.report_warning('Unable to download subtitle for "%s": %s' %
1986 (sub_lang, error_to_compat_str(err)))
1987 continue
8222d8de 1988
57df9f53
U
1989 if self.params.get('skip_download', False):
1990 if self.params.get('convertsubtitles', False):
1991 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1992 filename_real_ext = os.path.splitext(filename)[1][1:]
1993 filename_wo_ext = (
1994 os.path.splitext(filename)[0]
1995 if filename_real_ext == info_dict['ext']
1996 else filename)
1997 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1998 if subconv.available:
1999 info_dict.setdefault('__postprocessors', [])
2000 # info_dict['__postprocessors'].append(subconv)
2001 if os.path.exists(encodeFilename(afilename)):
f791b419
U
2002 self.to_screen(
2003 '[download] %s has already been downloaded and '
2004 'converted' % afilename)
57df9f53
U
2005 else:
2006 try:
2007 self.post_process(filename, info_dict)
2008 except (PostProcessingError) as err:
2009 self.report_error('postprocessing: %s' % str(err))
2010 return
2011
8222d8de 2012 if self.params.get('writeinfojson', False):
b29e0000 2013 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
0c3d0f51 2014 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
6febd1c1 2015 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 2016 else:
6febd1c1 2017 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 2018 try:
cb202fd2 2019 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 2020 except (OSError, IOError):
6febd1c1 2021 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 2022 return
8222d8de 2023
ec82d85a 2024 self._write_thumbnails(info_dict, filename)
8222d8de 2025
732044af 2026 # Write internet shortcut files
2027 url_link = webloc_link = desktop_link = False
2028 if self.params.get('writelink', False):
2029 if sys.platform == "darwin": # macOS.
2030 webloc_link = True
2031 elif sys.platform.startswith("linux"):
2032 desktop_link = True
2033 else: # if sys.platform in ['win32', 'cygwin']:
2034 url_link = True
2035 if self.params.get('writeurllink', False):
2036 url_link = True
2037 if self.params.get('writewebloclink', False):
2038 webloc_link = True
2039 if self.params.get('writedesktoplink', False):
2040 desktop_link = True
2041
2042 if url_link or webloc_link or desktop_link:
2043 if 'webpage_url' not in info_dict:
2044 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2045 return
2046 ascii_url = iri_to_uri(info_dict['webpage_url'])
2047
2048 def _write_link_file(extension, template, newline, embed_filename):
2049 linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2050 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2051 self.to_screen('[info] Internet shortcut is already present')
2052 else:
2053 try:
2054 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2055 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2056 template_vars = {'url': ascii_url}
2057 if embed_filename:
2058 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2059 linkfile.write(template % template_vars)
2060 except (OSError, IOError):
2061 self.report_error('Cannot write internet shortcut ' + linkfn)
2062 return False
2063 return True
2064
2065 if url_link:
2066 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2067 return
2068 if webloc_link:
2069 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2070 return
2071 if desktop_link:
2072 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2073 return
2074
2075 # Download
2076 must_record_download_archive = False
8222d8de 2077 if not self.params.get('skip_download', False):
4340deca 2078 try:
4340deca
P
2079 if info_dict.get('requested_formats') is not None:
2080 downloaded = []
2081 success = True
d47aeb22 2082 merger = FFmpegMergerPP(self)
f740fae2 2083 if not merger.available:
4340deca
P
2084 postprocessors = []
2085 self.report_warning('You have requested multiple '
2086 'formats but ffmpeg or avconv are not installed.'
4a5a898a 2087 ' The formats won\'t be merged.')
6350728b 2088 else:
4340deca 2089 postprocessors = [merger]
81cd954a
S
2090
2091 def compatible_formats(formats):
d03cfdce 2092 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2093 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2094 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2095 if len(video_formats) > 2 or len(audio_formats) > 2:
2096 return False
2097
81cd954a 2098 # Check extension
d03cfdce 2099 exts = set(format.get('ext') for format in formats)
2100 COMPATIBLE_EXTS = (
2101 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2102 set(('webm',)),
2103 )
2104 for ext_sets in COMPATIBLE_EXTS:
2105 if ext_sets.issuperset(exts):
2106 return True
81cd954a
S
2107 # TODO: Check acodec/vcodec
2108 return False
2109
38c6902b
S
2110 filename_real_ext = os.path.splitext(filename)[1][1:]
2111 filename_wo_ext = (
2112 os.path.splitext(filename)[0]
2113 if filename_real_ext == info_dict['ext']
2114 else filename)
81cd954a 2115 requested_formats = info_dict['requested_formats']
c0dea0a7 2116 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 2117 info_dict['ext'] = 'mkv'
4a5a898a
S
2118 self.report_warning(
2119 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
2120 # Ensure filename always has a correct extension for successful merge
2121 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
0c3d0f51 2122 file_exists = os.path.exists(encodeFilename(filename))
2123 if not self.params.get('overwrites', False) and file_exists:
5b5fbc08
JMF
2124 self.to_screen(
2125 '[download] %s has already been downloaded and '
2126 'merged' % filename)
2127 else:
0c3d0f51 2128 if file_exists:
2129 self.report_file_delete(filename)
2130 os.remove(encodeFilename(filename))
81cd954a 2131 for f in requested_formats:
5b5fbc08
JMF
2132 new_info = dict(info_dict)
2133 new_info.update(f)
c5c9bf0c
S
2134 fname = prepend_extension(
2135 self.prepare_filename(new_info),
2136 'f%s' % f['format_id'], new_info['ext'])
2137 if not ensure_dir_exists(fname):
2138 return
5b5fbc08 2139 downloaded.append(fname)
a9e7f546 2140 partial_success, real_download = dl(fname, new_info)
5b5fbc08
JMF
2141 success = success and partial_success
2142 info_dict['__postprocessors'] = postprocessors
2143 info_dict['__files_to_merge'] = downloaded
a9e7f546 2144 # Even if there were no downloads, it is being merged only now
2145 info_dict['__real_download'] = True
4340deca 2146 else:
0c3d0f51 2147 # Delete existing file with --yes-overwrites
2148 if self.params.get('overwrites', False):
2149 if os.path.exists(encodeFilename(filename)):
2150 self.report_file_delete(filename)
2151 os.remove(encodeFilename(filename))
4340deca 2152 # Just a single file
a9e7f546 2153 success, real_download = dl(filename, info_dict)
2154 info_dict['__real_download'] = real_download
4340deca 2155 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 2156 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2157 return
2158 except (OSError, IOError) as err:
2159 raise UnavailableVideoError(err)
2160 except (ContentTooShortError, ) as err:
2161 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2162 return
8222d8de 2163
e38cafe9 2164 if success and filename != '-':
6271f1ca 2165 # Fixup content
62cd676c
PH
2166 fixup_policy = self.params.get('fixup')
2167 if fixup_policy is None:
2168 fixup_policy = 'detect_or_warn'
2169
d1e4a464
S
2170 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2171
6271f1ca
PH
2172 stretched_ratio = info_dict.get('stretched_ratio')
2173 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2174 if fixup_policy == 'warn':
2175 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2176 info_dict['id'], stretched_ratio))
2177 elif fixup_policy == 'detect_or_warn':
2178 stretched_pp = FFmpegFixupStretchedPP(self)
2179 if stretched_pp.available:
2180 info_dict.setdefault('__postprocessors', [])
2181 info_dict['__postprocessors'].append(stretched_pp)
2182 else:
2183 self.report_warning(
d1e4a464
S
2184 '%s: Non-uniform pixel ratio (%s). %s'
2185 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2186 else:
62cd676c
PH
2187 assert fixup_policy in ('ignore', 'never')
2188
3089bc74
S
2189 if (info_dict.get('requested_formats') is None
2190 and info_dict.get('container') == 'm4a_dash'):
62cd676c 2191 if fixup_policy == 'warn':
d1e4a464
S
2192 self.report_warning(
2193 '%s: writing DASH m4a. '
2194 'Only some players support this container.'
2195 % info_dict['id'])
62cd676c
PH
2196 elif fixup_policy == 'detect_or_warn':
2197 fixup_pp = FFmpegFixupM4aPP(self)
2198 if fixup_pp.available:
2199 info_dict.setdefault('__postprocessors', [])
2200 info_dict['__postprocessors'].append(fixup_pp)
2201 else:
2202 self.report_warning(
d1e4a464
S
2203 '%s: writing DASH m4a. '
2204 'Only some players support this container. %s'
2205 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2206 else:
2207 assert fixup_policy in ('ignore', 'never')
6271f1ca 2208
3089bc74
S
2209 if (info_dict.get('protocol') == 'm3u8_native'
2210 or info_dict.get('protocol') == 'm3u8'
2211 and self.params.get('hls_prefer_native')):
f17f8651 2212 if fixup_policy == 'warn':
a02682fd 2213 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2214 info_dict['id']))
2215 elif fixup_policy == 'detect_or_warn':
2216 fixup_pp = FFmpegFixupM3u8PP(self)
2217 if fixup_pp.available:
2218 info_dict.setdefault('__postprocessors', [])
2219 info_dict['__postprocessors'].append(fixup_pp)
2220 else:
2221 self.report_warning(
a02682fd 2222 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2223 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2224 else:
2225 assert fixup_policy in ('ignore', 'never')
2226
8222d8de
JMF
2227 try:
2228 self.post_process(filename, info_dict)
2229 except (PostProcessingError) as err:
6febd1c1 2230 self.report_error('postprocessing: %s' % str(err))
8222d8de 2231 return
ab8e5e51
AM
2232 try:
2233 for ph in self._post_hooks:
2234 ph(filename)
2235 except Exception as err:
2236 self.report_error('post hooks: %s' % str(err))
2237 return
2d30509f 2238 must_record_download_archive = True
2239
2240 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2241 self.record_download_archive(info_dict)
c3e6ffba 2242 max_downloads = self.params.get('max_downloads')
2243 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2244 raise MaxDownloadsReached()
8222d8de
JMF
2245
2246 def download(self, url_list):
2247 """Download a given list of URLs."""
acd69589 2248 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
3089bc74
S
2249 if (len(url_list) > 1
2250 and outtmpl != '-'
2251 and '%' not in outtmpl
2252 and self.params.get('max_downloads') != 1):
acd69589 2253 raise SameFileError(outtmpl)
8222d8de
JMF
2254
2255 for url in url_list:
2256 try:
5f6a1245 2257 # It also downloads the videos
61aa5ba3
S
2258 res = self.extract_info(
2259 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2260 except UnavailableVideoError:
6febd1c1 2261 self.report_error('unable to download video')
8222d8de 2262 except MaxDownloadsReached:
6febd1c1 2263 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 2264 raise
63e0be34
PH
2265 else:
2266 if self.params.get('dump_single_json', False):
2267 self.to_stdout(json.dumps(res))
8222d8de
JMF
2268
2269 return self._download_retcode
2270
1dcc4c0c 2271 def download_with_info_file(self, info_filename):
31bd3925
JMF
2272 with contextlib.closing(fileinput.FileInput(
2273 [info_filename], mode='r',
2274 openhook=fileinput.hook_encoded('utf-8'))) as f:
2275 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 2276 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
2277 try:
2278 self.process_ie_result(info, download=True)
2279 except DownloadError:
2280 webpage_url = info.get('webpage_url')
2281 if webpage_url is not None:
6febd1c1 2282 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2283 return self.download([webpage_url])
2284 else:
2285 raise
2286 return self._download_retcode
1dcc4c0c 2287
cb202fd2
S
2288 @staticmethod
2289 def filter_requested_info(info_dict):
2290 return dict(
2291 (k, v) for k, v in info_dict.items()
2292 if k not in ['requested_formats', 'requested_subtitles'])
2293
8222d8de
JMF
2294 def post_process(self, filename, ie_info):
2295 """Run all the postprocessors on the given file."""
2296 info = dict(ie_info)
2297 info['filepath'] = filename
6350728b
JMF
2298 pps_chain = []
2299 if ie_info.get('__postprocessors') is not None:
2300 pps_chain.extend(ie_info['__postprocessors'])
2301 pps_chain.extend(self._pps)
2302 for pp in pps_chain:
71646e46 2303 files_to_delete = []
8222d8de 2304 try:
592e97e8 2305 files_to_delete, info = pp.run(info)
8222d8de 2306 except PostProcessingError as e:
bbcbf4d4 2307 self.report_error(e.msg)
592e97e8 2308 if files_to_delete and not self.params.get('keepvideo', False):
d03cfdce 2309 for old_filename in set(files_to_delete):
f3ff1a36 2310 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
2311 try:
2312 os.remove(encodeFilename(old_filename))
2313 except (IOError, OSError):
2314 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 2315
5db07df6 2316 def _make_archive_id(self, info_dict):
e9fef7ee
S
2317 video_id = info_dict.get('id')
2318 if not video_id:
2319 return
5db07df6
PH
2320 # Future-proof against any change in case
2321 # and backwards compatibility with prior versions
e9fef7ee 2322 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2323 if extractor is None:
1211bb6d
S
2324 url = str_or_none(info_dict.get('url'))
2325 if not url:
2326 return
e9fef7ee
S
2327 # Try to find matching extractor for the URL and take its ie_key
2328 for ie in self._ies:
1211bb6d 2329 if ie.suitable(url):
e9fef7ee
S
2330 extractor = ie.ie_key()
2331 break
2332 else:
2333 return
2334 return extractor.lower() + ' ' + video_id
5db07df6
PH
2335
2336 def in_download_archive(self, info_dict):
2337 fn = self.params.get('download_archive')
2338 if fn is None:
2339 return False
2340
2341 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2342 if not vid_id:
7012b23c 2343 return False # Incomplete video information
5db07df6 2344
a45e8619 2345 return vid_id in self.archive
c1c9a79c
PH
2346
2347 def record_download_archive(self, info_dict):
2348 fn = self.params.get('download_archive')
2349 if fn is None:
2350 return
5db07df6
PH
2351 vid_id = self._make_archive_id(info_dict)
2352 assert vid_id
c1c9a79c 2353 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2354 archive_file.write(vid_id + '\n')
a45e8619 2355 self.archive.add(vid_id)
dd82ffea 2356
8c51aa65 2357 @staticmethod
8abeeb94 2358 def format_resolution(format, default='unknown'):
fb04e403
PH
2359 if format.get('vcodec') == 'none':
2360 return 'audio only'
f49d89ee
PH
2361 if format.get('resolution') is not None:
2362 return format['resolution']
8c51aa65
JMF
2363 if format.get('height') is not None:
2364 if format.get('width') is not None:
6febd1c1 2365 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 2366 else:
6febd1c1 2367 res = '%sp' % format['height']
f49d89ee 2368 elif format.get('width') is not None:
388ae76b 2369 res = '%dx?' % format['width']
8c51aa65 2370 else:
8abeeb94 2371 res = default
8c51aa65
JMF
2372 return res
2373
c57f7757
PH
2374 def _format_note(self, fdict):
2375 res = ''
2376 if fdict.get('ext') in ['f4f', 'f4m']:
2377 res += '(unsupported) '
32f90364
PH
2378 if fdict.get('language'):
2379 if res:
2380 res += ' '
9016d76f 2381 res += '[%s] ' % fdict['language']
c57f7757
PH
2382 if fdict.get('format_note') is not None:
2383 res += fdict['format_note'] + ' '
2384 if fdict.get('tbr') is not None:
2385 res += '%4dk ' % fdict['tbr']
2386 if fdict.get('container') is not None:
2387 if res:
2388 res += ', '
2389 res += '%s container' % fdict['container']
3089bc74
S
2390 if (fdict.get('vcodec') is not None
2391 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2392 if res:
2393 res += ', '
2394 res += fdict['vcodec']
91c7271a 2395 if fdict.get('vbr') is not None:
c57f7757
PH
2396 res += '@'
2397 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2398 res += 'video@'
2399 if fdict.get('vbr') is not None:
2400 res += '%4dk' % fdict['vbr']
fbb21cf5 2401 if fdict.get('fps') is not None:
5d583bdf
S
2402 if res:
2403 res += ', '
2404 res += '%sfps' % fdict['fps']
c57f7757
PH
2405 if fdict.get('acodec') is not None:
2406 if res:
2407 res += ', '
2408 if fdict['acodec'] == 'none':
2409 res += 'video only'
2410 else:
2411 res += '%-5s' % fdict['acodec']
2412 elif fdict.get('abr') is not None:
2413 if res:
2414 res += ', '
2415 res += 'audio'
2416 if fdict.get('abr') is not None:
2417 res += '@%3dk' % fdict['abr']
2418 if fdict.get('asr') is not None:
2419 res += ' (%5dHz)' % fdict['asr']
2420 if fdict.get('filesize') is not None:
2421 if res:
2422 res += ', '
2423 res += format_bytes(fdict['filesize'])
9732d77e
PH
2424 elif fdict.get('filesize_approx') is not None:
2425 if res:
2426 res += ', '
2427 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2428 return res
91c7271a 2429
76d321f6 2430 def _format_note_table(self, f):
2431 def join_fields(*vargs):
2432 return ', '.join((val for val in vargs if val != ''))
2433
2434 return join_fields(
2435 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2436 format_field(f, 'language', '[%s]'),
2437 format_field(f, 'format_note'),
2438 format_field(f, 'container', ignore=(None, f.get('ext'))),
2439 format_field(f, 'asr', '%5dHz'))
2440
c57f7757 2441 def list_formats(self, info_dict):
94badb25 2442 formats = info_dict.get('formats', [info_dict])
76d321f6 2443 new_format = self.params.get('listformats_table', False)
2444 if new_format:
2445 table = [
2446 [
2447 format_field(f, 'format_id'),
2448 format_field(f, 'ext'),
2449 self.format_resolution(f),
2450 format_field(f, 'fps', '%d'),
2451 '|',
2452 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2453 format_field(f, 'tbr', '%4dk'),
2454 f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2455 '|',
2456 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2457 format_field(f, 'vbr', '%4dk'),
2458 format_field(f, 'acodec', default='unknown').replace('none', ''),
2459 format_field(f, 'abr', '%3dk'),
2460 format_field(f, 'asr', '%5dHz'),
2461 self._format_note_table(f)]
2462 for f in formats
2463 if f.get('preference') is None or f['preference'] >= -1000]
2464 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2465 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2466 else:
2467 table = [
2468 [
2469 format_field(f, 'format_id'),
2470 format_field(f, 'ext'),
2471 self.format_resolution(f),
2472 self._format_note(f)]
2473 for f in formats
2474 if f.get('preference') is None or f['preference'] >= -1000]
2475 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 2476
76d321f6 2477 # if len(formats) > 1:
2478 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
cfb56d1a 2479 self.to_screen(
76d321f6 2480 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2481 header_line,
2482 table,
2483 delim=new_format,
2484 extraGap=(0 if new_format else 1),
2485 hideEmpty=new_format)))
cfb56d1a
PH
2486
2487 def list_thumbnails(self, info_dict):
2488 thumbnails = info_dict.get('thumbnails')
2489 if not thumbnails:
b7b72db9 2490 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2491 return
cfb56d1a
PH
2492
2493 self.to_screen(
2494 '[info] Thumbnails for %s:' % info_dict['id'])
2495 self.to_screen(render_table(
2496 ['ID', 'width', 'height', 'URL'],
2497 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2498
360e1ca5 2499 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2500 if not subtitles:
360e1ca5 2501 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2502 return
a504ced0 2503 self.to_screen(
edab9dbf
JMF
2504 'Available %s for %s:' % (name, video_id))
2505 self.to_screen(render_table(
2506 ['Language', 'formats'],
2507 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2508 for lang, formats in subtitles.items()]))
a504ced0 2509
dca08720
PH
2510 def urlopen(self, req):
2511 """ Start an HTTP download """
82d8a8b6 2512 if isinstance(req, compat_basestring):
67dda517 2513 req = sanitized_Request(req)
19a41fc6 2514 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2515
2516 def print_debug_header(self):
2517 if not self.params.get('verbose'):
2518 return
62fec3b2 2519
4192b51c 2520 if type('') is not compat_str:
067aa17e 2521 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2522 self.report_warning(
2523 'Your Python is broken! Update to a newer and supported version')
2524
c6afed48
PH
2525 stdout_encoding = getattr(
2526 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2527 encoding_str = (
734f90bb
PH
2528 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2529 locale.getpreferredencoding(),
2530 sys.getfilesystemencoding(),
c6afed48 2531 stdout_encoding,
b0472057 2532 self.get_encoding()))
4192b51c 2533 write_string(encoding_str, encoding=None)
734f90bb 2534
cefecac1 2535 self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
e0986e31
JMF
2536 if _LAZY_LOADER:
2537 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
dca08720
PH
2538 try:
2539 sp = subprocess.Popen(
2540 ['git', 'rev-parse', '--short', 'HEAD'],
2541 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2542 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 2543 out, err = process_communicate_or_kill(sp)
dca08720
PH
2544 out = out.decode().strip()
2545 if re.match('[0-9a-f]+', out):
734f90bb 2546 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 2547 except Exception:
dca08720
PH
2548 try:
2549 sys.exc_clear()
70a1165b 2550 except Exception:
dca08720 2551 pass
b300cda4
S
2552
2553 def python_implementation():
2554 impl_name = platform.python_implementation()
2555 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2556 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2557 return impl_name
2558
2559 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2560 platform.python_version(), python_implementation(),
2561 platform_name()))
d28b5171 2562
73fac4e9 2563 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2564 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 2565 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
2566 exe_str = ', '.join(
2567 '%s %s' % (exe, v)
2568 for exe, v in sorted(exe_versions.items())
2569 if v
2570 )
2571 if not exe_str:
2572 exe_str = 'none'
2573 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2574
2575 proxy_map = {}
2576 for handler in self._opener.handlers:
2577 if hasattr(handler, 'proxies'):
2578 proxy_map.update(handler.proxies)
734f90bb 2579 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2580
58b1f00d
PH
2581 if self.params.get('call_home', False):
2582 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2583 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 2584 return
58b1f00d
PH
2585 latest_version = self.urlopen(
2586 'https://yt-dl.org/latest/version').read().decode('utf-8')
2587 if version_tuple(latest_version) > version_tuple(__version__):
2588 self.report_warning(
2589 'You are using an outdated version (newest version: %s)! '
2590 'See https://yt-dl.org/update if you need help updating.' %
2591 latest_version)
2592
e344693b 2593 def _setup_opener(self):
6ad14cab 2594 timeout_val = self.params.get('socket_timeout')
19a41fc6 2595 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2596
dca08720
PH
2597 opts_cookiefile = self.params.get('cookiefile')
2598 opts_proxy = self.params.get('proxy')
2599
2600 if opts_cookiefile is None:
2601 self.cookiejar = compat_cookiejar.CookieJar()
2602 else:
590bc6f6 2603 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 2604 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 2605 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 2606 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 2607
6a3f4c3f 2608 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2609 if opts_proxy is not None:
2610 if opts_proxy == '':
2611 proxies = {}
2612 else:
2613 proxies = {'http': opts_proxy, 'https': opts_proxy}
2614 else:
2615 proxies = compat_urllib_request.getproxies()
067aa17e 2616 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
2617 if 'http' in proxies and 'https' not in proxies:
2618 proxies['https'] = proxies['http']
91410c9b 2619 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2620
2621 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2622 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2623 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 2624 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 2625 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2626
2627 # When passing our own FileHandler instance, build_opener won't add the
2628 # default FileHandler and allows us to disable the file protocol, which
2629 # can be used for malicious purposes (see
067aa17e 2630 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
2631 file_handler = compat_urllib_request.FileHandler()
2632
2633 def file_open(*args, **kwargs):
cefecac1 2634 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
6240b0a2
JMF
2635 file_handler.file_open = file_open
2636
2637 opener = compat_urllib_request.build_opener(
fca6dba8 2638 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 2639
dca08720
PH
2640 # Delete the default user-agent header, which would otherwise apply in
2641 # cases where our custom HTTP handler doesn't come into play
067aa17e 2642 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
2643 opener.addheaders = []
2644 self._opener = opener
62fec3b2
PH
2645
2646 def encode(self, s):
2647 if isinstance(s, bytes):
2648 return s # Already encoded
2649
2650 try:
2651 return s.encode(self.get_encoding())
2652 except UnicodeEncodeError as err:
2653 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2654 raise
2655
2656 def get_encoding(self):
2657 encoding = self.params.get('encoding')
2658 if encoding is None:
2659 encoding = preferredencoding()
2660 return encoding
ec82d85a
PH
2661
2662 def _write_thumbnails(self, info_dict, filename):
2663 if self.params.get('writethumbnail', False):
2664 thumbnails = info_dict.get('thumbnails')
2665 if thumbnails:
2666 thumbnails = [thumbnails[-1]]
2667 elif self.params.get('write_all_thumbnails', False):
2668 thumbnails = info_dict.get('thumbnails')
2669 else:
2670 return
2671
2672 if not thumbnails:
2673 # No thumbnails present, so return immediately
2674 return
2675
2676 for t in thumbnails:
2677 thumb_ext = determine_ext(t['url'], 'jpg')
2678 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2679 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
29f7c58a 2680 t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
ec82d85a 2681
0c3d0f51 2682 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
ec82d85a
PH
2683 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2684 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2685 else:
2686 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2687 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2688 try:
2689 uf = self.urlopen(t['url'])
d3d89c32 2690 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2691 shutil.copyfileobj(uf, thumbf)
2692 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2693 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2694 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2695 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2696 (t['url'], error_to_compat_str(err)))