]> jfr.im git - yt-dlp.git/blame - youtube_dlc/YoutubeDL.py
[readme] Cleanup options
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import socket
23import sys
24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474
S
29from string import ascii_letters
30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b
PH
44)
45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
732044af 54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 57 DownloadError,
c0384f22 58 encode_compat_str,
ce02ed60 59 encodeFilename,
9b9c5355 60 error_to_compat_str,
8b0d7497 61 ExistingVideoReached,
590bc6f6 62 expand_path,
ce02ed60 63 ExtractorError,
02dbf93f 64 format_bytes,
76d321f6 65 format_field,
525ef922 66 formatSeconds,
773f291d 67 GeoRestrictedError,
c9969434 68 int_or_none,
732044af 69 iri_to_uri,
773f291d 70 ISO3166Utils,
ce02ed60 71 locked_file,
dca08720 72 make_HTTPS_handler,
ce02ed60 73 MaxDownloadsReached,
cd6fc19e 74 orderedSet,
b7ab0590 75 PagedList,
083c9df9 76 parse_filesize,
91410c9b 77 PerRequestProxyHandler,
dca08720 78 platform_name,
eedb7ba5 79 PostProcessingError,
ce02ed60 80 preferredencoding,
eedb7ba5 81 prepend_extension,
51fb4995 82 register_socks_protocols,
cfb56d1a 83 render_table,
eedb7ba5 84 replace_extension,
8b0d7497 85 RejectedVideoReached,
ce02ed60
PH
86 SameFileError,
87 sanitize_filename,
1bb5c511 88 sanitize_path,
dcf77cf1 89 sanitize_url,
67dda517 90 sanitized_Request,
e5660ee6 91 std_headers,
1211bb6d 92 str_or_none,
ce02ed60 93 subtitles_filename,
732044af 94 to_high_limit_path,
ce02ed60 95 UnavailableVideoError,
29eb5174 96 url_basename,
58b1f00d 97 version_tuple,
ce02ed60
PH
98 write_json_file,
99 write_string,
1bab3437 100 YoutubeDLCookieJar,
6a3f4c3f 101 YoutubeDLCookieProcessor,
dca08720 102 YoutubeDLHandler,
fca6dba8 103 YoutubeDLRedirectHandler,
f5b1bca9 104 process_communicate_or_kill,
ce02ed60 105)
a0e07d31 106from .cache import Cache
e0986e31 107from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
4c54b89e 108from .extractor.openload import PhantomJSwrapper
3bc2ddcc 109from .downloader import get_suitable_downloader
4c83c967 110from .downloader.rtmp import rtmpdump_version
4f026faf 111from .postprocessor import (
f17f8651 112 FFmpegFixupM3u8PP,
62cd676c 113 FFmpegFixupM4aPP,
6271f1ca 114 FFmpegFixupStretchedPP,
4f026faf
PH
115 FFmpegMergerPP,
116 FFmpegPostProcessor,
57df9f53 117 FFmpegSubtitlesConvertorPP,
4f026faf
PH
118 get_postprocessor,
119)
dca08720 120from .version import __version__
8222d8de 121
e9c0cdd3
YCH
122if compat_os_name == 'nt':
123 import ctypes
124
2459b6e1 125
8222d8de
JMF
126class YoutubeDL(object):
127 """YoutubeDL class.
128
129 YoutubeDL objects are the ones responsible of downloading the
130 actual video file and writing it to disk if the user has requested
131 it, among some other tasks. In most cases there should be one per
132 program. As, given a video URL, the downloader doesn't know how to
133 extract all the needed information, task that InfoExtractors do, it
134 has to pass the URL to one of them.
135
136 For this, YoutubeDL objects have a method that allows
137 InfoExtractors to be registered in a given order. When it is passed
138 a URL, the YoutubeDL object handles it to the first InfoExtractor it
139 finds that reports being able to handle it. The InfoExtractor extracts
140 all the information about the video or videos the URL refers to, and
141 YoutubeDL process the extracted information, possibly using a File
142 Downloader to download the video.
143
144 YoutubeDL objects accept a lot of parameters. In order not to saturate
145 the object constructor with arguments, it receives a dictionary of
146 options instead. These options are available through the params
147 attribute for the InfoExtractors to use. The YoutubeDL also
148 registers itself as the downloader in charge for the InfoExtractors
149 that are added to it, so this is a "mutual registration".
150
151 Available options:
152
153 username: Username for authentication purposes.
154 password: Password for authentication purposes.
180940e0 155 videopassword: Password for accessing a video.
1da50aa3
S
156 ap_mso: Adobe Pass multiple-system operator identifier.
157 ap_username: Multiple-system operator account username.
158 ap_password: Multiple-system operator account password.
8222d8de
JMF
159 usenetrc: Use netrc for authentication instead.
160 verbose: Print additional info to stdout.
161 quiet: Do not print messages to stdout.
ad8915b7 162 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
163 forceurl: Force printing final URL.
164 forcetitle: Force printing title.
165 forceid: Force printing ID.
166 forcethumbnail: Force printing thumbnail URL.
167 forcedescription: Force printing description.
168 forcefilename: Force printing final filename.
525ef922 169 forceduration: Force printing duration.
8694c600 170 forcejson: Force printing info_dict as JSON.
63e0be34
PH
171 dump_single_json: Force printing the info_dict of the whole playlist
172 (or video) as a single JSON line.
2d30509f 173 force_write_download_archive: Force writing download archive regardless of
174 'skip_download' or 'simulate'.
8222d8de 175 simulate: Do not download the video files.
eb8a4433 176 format: Video format code. see "FORMAT SELECTION" for more details.
177 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
178 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
909d24dd 179 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
180 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
8222d8de 181 outtmpl: Template for output names.
bdc3fd2f
U
182 restrictfilenames: Do not allow "&" and spaces in file names.
183 trim_file_name: Limit length of filename (extension excluded).
f5546c0b 184 ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
d22dec74 185 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 186 overwrites: Overwrite all video and metadata files if True,
187 overwrite only non-video files if None
188 and don't overwrite any file if False
8222d8de
JMF
189 playliststart: Playlist item to start at.
190 playlistend: Playlist item to end at.
c14e88f0 191 playlist_items: Specific indices of playlist to download.
ff815fe6 192 playlistreverse: Download playlist items in reverse order.
75822ca7 193 playlistrandom: Download playlist items in random order.
8222d8de
JMF
194 matchtitle: Download only matching titles.
195 rejecttitle: Reject downloads for matching titles.
8bf9319e 196 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
197 logtostderr: Log messages to stderr instead of stdout.
198 writedescription: Write the video description to a .description file
199 writeinfojson: Write the video description to a .info.json file
1fb07d10 200 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 201 writethumbnail: Write the thumbnail image to a file
ec82d85a 202 write_all_thumbnails: Write all thumbnail formats to files
732044af 203 writelink: Write an internet shortcut file, depending on the
204 current platform (.url/.webloc/.desktop)
205 writeurllink: Write a Windows internet shortcut file (.url)
206 writewebloclink: Write a macOS internet shortcut file (.webloc)
207 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 208 writesubtitles: Write the video subtitles to a file
741dd8ea 209 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 210 allsubtitles: Downloads all the subtitles of the video
0b7f3118 211 (requires writesubtitles or writeautomaticsub)
8222d8de 212 listsubtitles: Lists all available subtitles for the video
a504ced0 213 subtitlesformat: The format code for subtitles
aa6a10c4 214 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
215 keepvideo: Keep the video file after post-processing
216 daterange: A DateRange object, download only if the upload_date is in the range.
217 skip_download: Skip the actual download of the video file
c35f9e72 218 cachedir: Location of the cache files in the filesystem.
a0e07d31 219 False to disable filesystem cache.
47192f92 220 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
221 age_limit: An integer representing the user's age in years.
222 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
223 min_views: An integer representing the minimum view count the video
224 must have in order to not be skipped.
225 Videos without view count information are always
226 downloaded. None for no limit.
227 max_views: An integer representing the maximum view count.
228 Videos that are more popular than that are not
229 downloaded.
230 Videos without view count information are always
231 downloaded. None for no limit.
232 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
233 Videos already present in the file are not downloaded
234 again.
8a51f564 235 break_on_existing: Stop the download process after attempting to download a
236 file that is in the archive.
237 break_on_reject: Stop the download process when encountering a video that
238 has been filtered out.
239 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 240 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
241 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
242 At the moment, this is only supported by YouTube.
a1ee09e8 243 proxy: URL of the proxy server to use
38cce791 244 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 245 on geo-restricted sites.
e344693b 246 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
247 bidi_workaround: Work around buggy terminals without bidirectional text
248 support, using fridibi
a0ddb8a2 249 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 250 include_ads: Download ads as well
04b4d394
PH
251 default_search: Prepend this string if an input url is not valid.
252 'auto' for elaborate guessing
62fec3b2 253 encoding: Use this encoding instead of the system-specified.
e8ee972c 254 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
255 Pass in 'in_playlist' to only show this behavior for
256 playlist items.
4f026faf 257 postprocessors: A list of dictionaries, each with an entry
71b640cc 258 * key: The name of the postprocessor. See
cefecac1 259 youtube_dlc/postprocessor/__init__.py for a list.
4f026faf
PH
260 as well as any further keyword arguments for the
261 postprocessor.
ab8e5e51
AM
262 post_hooks: A list of functions that get called as the final step
263 for each video file, after all postprocessors have been
264 called. The filename will be passed as the only argument.
71b640cc
PH
265 progress_hooks: A list of functions that get called on download
266 progress, with a dictionary with the entries
5cda4eda 267 * status: One of "downloading", "error", or "finished".
ee69b99a 268 Check this first and ignore unknown values.
71b640cc 269
5cda4eda 270 If status is one of "downloading", or "finished", the
ee69b99a
PH
271 following properties may also be present:
272 * filename: The final filename (always present)
5cda4eda 273 * tmpfilename: The filename we're currently writing to
71b640cc
PH
274 * downloaded_bytes: Bytes on disk
275 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
276 * total_bytes_estimate: Guess of the eventual file size,
277 None if unavailable.
278 * elapsed: The number of seconds since download started.
71b640cc
PH
279 * eta: The estimated time in seconds, None if unknown
280 * speed: The download speed in bytes/second, None if
281 unknown
5cda4eda
PH
282 * fragment_index: The counter of the currently
283 downloaded video fragment.
284 * fragment_count: The number of fragments (= individual
285 files that will be merged)
71b640cc
PH
286
287 Progress hooks are guaranteed to be called at least once
288 (with status "finished") if the download is successful.
45598f15 289 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
290 fixup: Automatically correct known faults of the file.
291 One of:
292 - "never": do nothing
293 - "warn": only emit a warning
294 - "detect_or_warn": check whether we can do anything
62cd676c 295 about it, warn otherwise (default)
504f20dd 296 source_address: Client-side IP address to bind to.
6ec6cb4e 297 call_home: Boolean, true iff we are allowed to contact the
cefecac1 298 youtube-dlc servers for debugging.
7aa589a5
S
299 sleep_interval: Number of seconds to sleep before each download when
300 used alone or a lower bound of a range for randomized
301 sleep before each download (minimum possible number
302 of seconds to sleep) when used along with
303 max_sleep_interval.
304 max_sleep_interval:Upper bound of a range for randomized sleep before each
305 download (maximum possible number of seconds to sleep).
306 Must only be used along with sleep_interval.
307 Actual sleep time will be a random float from range
308 [sleep_interval; max_sleep_interval].
cfb56d1a
PH
309 listformats: Print an overview of available video formats and exit.
310 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
311 match_filter: A function that gets called with the info_dict of
312 every video.
313 If it returns a message, the video is ignored.
314 If it returns None, the video is downloaded.
315 match_filter_func in utils.py is one example for this.
7e5db8c9 316 no_color: Do not emit color codes in output.
0a840f58 317 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 318 HTTP header
0a840f58 319 geo_bypass_country:
773f291d
S
320 Two-letter ISO 3166-2 country code that will be used for
321 explicit geographic restriction bypassing via faking
504f20dd 322 X-Forwarded-For HTTP header
5f95927a
S
323 geo_bypass_ip_block:
324 IP range in CIDR notation that will be used similarly to
504f20dd 325 geo_bypass_country
71b640cc 326
85729c51
PH
327 The following options determine which downloader is picked:
328 external_downloader: Executable of the external downloader to call.
329 None or unset for standard (built-in) downloader.
bf09af3a
S
330 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
331 if True, otherwise use ffmpeg/avconv if False, otherwise
332 use downloader suggested by extractor if None.
fe7e0c98 333
8222d8de 334 The following parameters are not used by YoutubeDL itself, they are used by
cefecac1 335 the downloader (see youtube_dlc/downloader/common.py):
8222d8de 336 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 337 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c
S
338 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
339 http_chunk_size.
76b1bd67
JMF
340
341 The following options are used by the post processors:
d4a24f40
S
342 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
343 otherwise prefer ffmpeg.
c0b7d117
S
344 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
345 to the binary or its containing directory.
1b77b347 346 postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
347 of additional command-line arguments for the postprocessor.
348 Use 'default' as the name for arguments to passed to all PP.
3836b02c 349
3600fd59
S
350 The following options are used by the Youtube extractor:
351 youtube_include_dash_manifest: If True (default), DASH manifests and related
352 data will be downloaded and processed by extractor.
353 You can reduce network I/O by disabling it if you don't
354 care about DASH.
8222d8de
JMF
355 """
356
c9969434
S
357 _NUMERIC_FIELDS = set((
358 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
359 'timestamp', 'upload_year', 'upload_month', 'upload_day',
360 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
361 'average_rating', 'comment_count', 'age_limit',
362 'start_time', 'end_time',
363 'chapter_number', 'season_number', 'episode_number',
364 'track_number', 'disc_number', 'release_year',
365 'playlist_index',
366 ))
367
8222d8de
JMF
368 params = None
369 _ies = []
370 _pps = []
371 _download_retcode = None
372 _num_downloads = None
30a074c2 373 _playlist_level = 0
374 _playlist_urls = set()
8222d8de
JMF
375 _screen_file = None
376
3511266b 377 def __init__(self, params=None, auto_init=True):
8222d8de 378 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
379 if params is None:
380 params = {}
8222d8de 381 self._ies = []
56c73665 382 self._ies_instances = {}
8222d8de 383 self._pps = []
ab8e5e51 384 self._post_hooks = []
933605d7 385 self._progress_hooks = []
8222d8de
JMF
386 self._download_retcode = 0
387 self._num_downloads = 0
388 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 389 self._err_file = sys.stderr
4abf617b
S
390 self.params = {
391 # Default parameters
392 'nocheckcertificate': False,
393 }
394 self.params.update(params)
a0e07d31 395 self.cache = Cache(self)
a45e8619 396 self.archive = set()
ecdec191
JB
397
398 """Preload the archive, if any is specified"""
399 def preload_download_archive(self):
400 fn = self.params.get('download_archive')
401 if fn is None:
402 return False
403 try:
404 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
405 for line in archive_file:
a45e8619 406 self.archive.add(line.strip())
ecdec191
JB
407 except IOError as ioe:
408 if ioe.errno != errno.ENOENT:
409 raise
1d74d8d9 410 return False
ecdec191 411 return True
34308b30 412
be5df5ee
S
413 def check_deprecated(param, option, suggestion):
414 if self.params.get(param) is not None:
415 self.report_warning(
416 '%s is deprecated. Use %s instead.' % (option, suggestion))
417 return True
418 return False
419
1de7ea76
JB
420 if self.params.get('verbose'):
421 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
422
ecdec191
JB
423 preload_download_archive(self)
424
be5df5ee 425 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
426 if self.params.get('geo_verification_proxy') is None:
427 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
428
be5df5ee
S
429 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
430 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
431 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
432
0783b09b 433 if params.get('bidi_workaround', False):
1c088fa8
PH
434 try:
435 import pty
436 master, slave = pty.openpty()
003c69a8 437 width = compat_get_terminal_size().columns
1c088fa8
PH
438 if width is None:
439 width_args = []
440 else:
441 width_args = ['-w', str(width)]
5d681e96 442 sp_kwargs = dict(
1c088fa8
PH
443 stdin=subprocess.PIPE,
444 stdout=slave,
445 stderr=self._err_file)
5d681e96
PH
446 try:
447 self._output_process = subprocess.Popen(
448 ['bidiv'] + width_args, **sp_kwargs
449 )
450 except OSError:
5d681e96
PH
451 self._output_process = subprocess.Popen(
452 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
453 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 454 except OSError as ose:
66e7ace1 455 if ose.errno == errno.ENOENT:
6febd1c1 456 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
457 else:
458 raise
0783b09b 459
3089bc74
S
460 if (sys.platform != 'win32'
461 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
462 and not params.get('restrictfilenames', False)):
e9137224 463 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 464 self.report_warning(
6febd1c1 465 'Assuming --restrict-filenames since file system encoding '
1b725173 466 'cannot encode all characters. '
6febd1c1 467 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 468 self.params['restrictfilenames'] = True
34308b30 469
486dd09e
PH
470 if isinstance(params.get('outtmpl'), bytes):
471 self.report_warning(
472 'Parameter outtmpl is bytes, but should be a unicode string. '
473 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
474
dca08720
PH
475 self._setup_opener()
476
3511266b
PH
477 if auto_init:
478 self.print_debug_header()
479 self.add_default_info_extractors()
480
4f026faf
PH
481 for pp_def_raw in self.params.get('postprocessors', []):
482 pp_class = get_postprocessor(pp_def_raw['key'])
483 pp_def = dict(pp_def_raw)
484 del pp_def['key']
485 pp = pp_class(self, **compat_kwargs(pp_def))
486 self.add_post_processor(pp)
487
ab8e5e51
AM
488 for ph in self.params.get('post_hooks', []):
489 self.add_post_hook(ph)
490
71b640cc
PH
491 for ph in self.params.get('progress_hooks', []):
492 self.add_progress_hook(ph)
493
51fb4995
YCH
494 register_socks_protocols()
495
7d4111ed
PH
496 def warn_if_short_id(self, argv):
497 # short YouTube ID starting with dash?
498 idxs = [
499 i for i, a in enumerate(argv)
500 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
501 if idxs:
502 correct_argv = (
cefecac1 503 ['youtube-dlc']
3089bc74
S
504 + [a for i, a in enumerate(argv) if i not in idxs]
505 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
506 )
507 self.report_warning(
508 'Long argument string detected. '
509 'Use -- to separate parameters and URLs, like this:\n%s\n' %
510 args_to_str(correct_argv))
511
8222d8de
JMF
512 def add_info_extractor(self, ie):
513 """Add an InfoExtractor object to the end of the list."""
514 self._ies.append(ie)
e52d7f85
JMF
515 if not isinstance(ie, type):
516 self._ies_instances[ie.ie_key()] = ie
517 ie.set_downloader(self)
8222d8de 518
56c73665
JMF
519 def get_info_extractor(self, ie_key):
520 """
521 Get an instance of an IE with name ie_key, it will try to get one from
522 the _ies list, if there's no instance it will create a new one and add
523 it to the extractor list.
524 """
525 ie = self._ies_instances.get(ie_key)
526 if ie is None:
527 ie = get_info_extractor(ie_key)()
528 self.add_info_extractor(ie)
529 return ie
530
023fa8c4
JMF
531 def add_default_info_extractors(self):
532 """
533 Add the InfoExtractors returned by gen_extractors to the end of the list
534 """
e52d7f85 535 for ie in gen_extractor_classes():
023fa8c4
JMF
536 self.add_info_extractor(ie)
537
8222d8de
JMF
538 def add_post_processor(self, pp):
539 """Add a PostProcessor object to the end of the chain."""
540 self._pps.append(pp)
541 pp.set_downloader(self)
542
ab8e5e51
AM
543 def add_post_hook(self, ph):
544 """Add the post hook"""
545 self._post_hooks.append(ph)
546
933605d7
JMF
547 def add_progress_hook(self, ph):
548 """Add the progress hook (currently only for the file downloader)"""
549 self._progress_hooks.append(ph)
8ab470f1 550
1c088fa8 551 def _bidi_workaround(self, message):
5d681e96 552 if not hasattr(self, '_output_channel'):
1c088fa8
PH
553 return message
554
5d681e96 555 assert hasattr(self, '_output_process')
11b85ce6 556 assert isinstance(message, compat_str)
6febd1c1
PH
557 line_count = message.count('\n') + 1
558 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 559 self._output_process.stdin.flush()
6febd1c1 560 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 561 for _ in range(line_count))
6febd1c1 562 return res[:-len('\n')]
1c088fa8 563
8222d8de 564 def to_screen(self, message, skip_eol=False):
0783b09b
PH
565 """Print message to stdout if not in quiet mode."""
566 return self.to_stdout(message, skip_eol, check_quiet=True)
567
734f90bb 568 def _write_string(self, s, out=None):
b58ddb32 569 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 570
0783b09b 571 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 572 """Print message to stdout if not in quiet mode."""
8bf9319e 573 if self.params.get('logger'):
43afe285 574 self.params['logger'].debug(message)
0783b09b 575 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 576 message = self._bidi_workaround(message)
6febd1c1 577 terminator = ['\n', ''][skip_eol]
8222d8de 578 output = message + terminator
1c088fa8 579
734f90bb 580 self._write_string(output, self._screen_file)
8222d8de
JMF
581
582 def to_stderr(self, message):
583 """Print message to stderr."""
11b85ce6 584 assert isinstance(message, compat_str)
8bf9319e 585 if self.params.get('logger'):
43afe285
IB
586 self.params['logger'].error(message)
587 else:
1c088fa8 588 message = self._bidi_workaround(message)
6febd1c1 589 output = message + '\n'
734f90bb 590 self._write_string(output, self._err_file)
8222d8de 591
1e5b9a95
PH
592 def to_console_title(self, message):
593 if not self.params.get('consoletitle', False):
594 return
4bede0d8
C
595 if compat_os_name == 'nt':
596 if ctypes.windll.kernel32.GetConsoleWindow():
597 # c_wchar_p() might not be necessary if `message` is
598 # already of type unicode()
599 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 600 elif 'TERM' in os.environ:
d9eebbc7 601 self._write_string('\033[0;%s\007' % message, self._screen_file)
1e5b9a95 602
bdde425c
PH
603 def save_console_title(self):
604 if not self.params.get('consoletitle', False):
605 return
94c3442e
S
606 if self.params.get('simulate', False):
607 return
4bede0d8 608 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 609 # Save the title on stack
734f90bb 610 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
611
612 def restore_console_title(self):
613 if not self.params.get('consoletitle', False):
614 return
94c3442e
S
615 if self.params.get('simulate', False):
616 return
4bede0d8 617 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 618 # Restore the title from stack
734f90bb 619 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
620
621 def __enter__(self):
622 self.save_console_title()
623 return self
624
625 def __exit__(self, *args):
626 self.restore_console_title()
f89197d7 627
dca08720 628 if self.params.get('cookiefile') is not None:
1bab3437 629 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 630
8222d8de
JMF
631 def trouble(self, message=None, tb=None):
632 """Determine action to take when a download problem appears.
633
634 Depending on if the downloader has been configured to ignore
635 download errors or not, this method may throw an exception or
636 not when errors are found, after printing the message.
637
638 tb, if given, is additional traceback information.
639 """
640 if message is not None:
641 self.to_stderr(message)
642 if self.params.get('verbose'):
643 if tb is None:
644 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 645 tb = ''
8222d8de 646 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 647 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 648 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
649 else:
650 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 651 tb = ''.join(tb_data)
8222d8de
JMF
652 self.to_stderr(tb)
653 if not self.params.get('ignoreerrors', False):
654 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
655 exc_info = sys.exc_info()[1].exc_info
656 else:
657 exc_info = sys.exc_info()
658 raise DownloadError(message, exc_info)
659 self._download_retcode = 1
660
661 def report_warning(self, message):
662 '''
663 Print the message to stderr, it will be prefixed with 'WARNING:'
664 If stderr is a tty file the 'WARNING:' will be colored
665 '''
6d07ce01
JMF
666 if self.params.get('logger') is not None:
667 self.params['logger'].warning(message)
8222d8de 668 else:
ad8915b7
PH
669 if self.params.get('no_warnings'):
670 return
e9c0cdd3 671 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
672 _msg_header = '\033[0;33mWARNING:\033[0m'
673 else:
674 _msg_header = 'WARNING:'
675 warning_message = '%s %s' % (_msg_header, message)
676 self.to_stderr(warning_message)
8222d8de
JMF
677
678 def report_error(self, message, tb=None):
679 '''
680 Do the same as trouble, but prefixes the message with 'ERROR:', colored
681 in red if stderr is a tty file.
682 '''
e9c0cdd3 683 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 684 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 685 else:
6febd1c1
PH
686 _msg_header = 'ERROR:'
687 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
688 self.trouble(error_message, tb)
689
8222d8de
JMF
690 def report_file_already_downloaded(self, file_name):
691 """Report file has already been fully downloaded."""
692 try:
6febd1c1 693 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 694 except UnicodeEncodeError:
6febd1c1 695 self.to_screen('[download] The file has already been downloaded')
8222d8de 696
0c3d0f51 697 def report_file_delete(self, file_name):
698 """Report that existing file will be deleted."""
699 try:
700 self.to_screen('Deleting already existent file %s' % file_name)
701 except UnicodeEncodeError:
702 self.to_screen('Deleting already existent file')
703
8222d8de
JMF
704 def prepare_filename(self, info_dict):
705 """Generate the output filename."""
706 try:
707 template_dict = dict(info_dict)
708
709 template_dict['epoch'] = int(time.time())
710 autonumber_size = self.params.get('autonumber_size')
711 if autonumber_size is None:
712 autonumber_size = 5
89db639d 713 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
17b75c0d
PH
714 if template_dict.get('resolution') is None:
715 if template_dict.get('width') and template_dict.get('height'):
716 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
717 elif template_dict.get('height'):
805ef3c6 718 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 719 elif template_dict.get('width'):
51ce9117 720 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 721
586a91b6 722 sanitize = lambda k, v: sanitize_filename(
45598aab 723 compat_str(v),
1bb5c511 724 restricted=self.params.get('restrictfilenames'),
40df485f 725 is_id=(k == 'id' or k.endswith('_id')))
d0d9ade4 726 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
45598aab 727 for k, v in template_dict.items()
f0e14fdd 728 if v is not None and not isinstance(v, (list, tuple, dict)))
6febd1c1 729 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 730
b3613d36 731 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
d0d9ade4 732
89db639d
S
733 # For fields playlist_index and autonumber convert all occurrences
734 # of %(field)s to %(field)0Nd for backward compatibility
735 field_size_compat_map = {
736 'playlist_index': len(str(template_dict['n_entries'])),
737 'autonumber': autonumber_size,
738 }
739 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
740 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
741 if mobj:
742 outtmpl = re.sub(
743 FIELD_SIZE_COMPAT_RE,
744 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
745 outtmpl)
746
d0d9ade4
S
747 # Missing numeric fields used together with integer presentation types
748 # in format specification will break the argument substitution since
749 # string 'NA' is returned for missing fields. We will patch output
750 # template for missing fields to meet string presentation type.
c9969434 751 for numeric_field in self._NUMERIC_FIELDS:
d0d9ade4
S
752 if numeric_field not in template_dict:
753 # As of [1] format syntax is:
754 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
755 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
756 FORMAT_RE = r'''(?x)
757 (?<!%)
758 %
759 \({0}\) # mapping key
760 (?:[#0\-+ ]+)? # conversion flags (optional)
761 (?:\d+)? # minimum field width (optional)
762 (?:\.\d+)? # precision (optional)
763 [hlL]? # length modifier (optional)
764 [diouxXeEfFgGcrs%] # conversion type
765 '''
766 outtmpl = re.sub(
767 FORMAT_RE.format(numeric_field),
768 r'%({0})s'.format(numeric_field), outtmpl)
769
15da37c7
S
770 # expand_path translates '%%' into '%' and '$$' into '$'
771 # correspondingly that is not what we want since we need to keep
772 # '%%' intact for template dict substitution step. Working around
773 # with boundary-alike separator hack.
961ea474 774 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
775 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
776
777 # outtmpl should be expand_path'ed before template dict substitution
778 # because meta fields may contain env variables we don't want to
779 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
780 # title "Hello $PATH", we don't want `$PATH` to be expanded.
781 filename = expand_path(outtmpl).replace(sep, '') % template_dict
782
bdc3fd2f
U
783 # https://github.com/blackjack4494/youtube-dlc/issues/85
784 trim_file_name = self.params.get('trim_file_name', False)
785 if trim_file_name:
786 fn_groups = filename.rsplit('.')
787 ext = fn_groups[-1]
788 sub_ext = ''
789 if len(fn_groups) > 2:
790 sub_ext = fn_groups[-2]
791 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
792
3a0d2f52
S
793 # Temporary fix for #4787
794 # 'Treat' all problem characters by passing filename through preferredencoding
795 # to workaround encoding issues with subprocess on python2 @ Windows
796 if sys.version_info < (3, 0) and sys.platform == 'win32':
797 filename = encodeFilename(filename, True).decode(preferredencoding())
b3613d36 798 return sanitize_path(filename)
8222d8de 799 except ValueError as err:
6febd1c1 800 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
801 return None
802
442c37b7 803 def _match_entry(self, info_dict, incomplete):
ecdec191 804 """ Returns None if the file should be downloaded """
8222d8de 805
8b0d7497 806 def check_filter():
807 video_title = info_dict.get('title', info_dict.get('id', 'video'))
808 if 'title' in info_dict:
809 # This can happen when we're just evaluating the playlist
810 title = info_dict['title']
811 matchtitle = self.params.get('matchtitle', False)
812 if matchtitle:
813 if not re.search(matchtitle, title, re.IGNORECASE):
814 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
815 rejecttitle = self.params.get('rejecttitle', False)
816 if rejecttitle:
817 if re.search(rejecttitle, title, re.IGNORECASE):
818 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
819 date = info_dict.get('upload_date')
820 if date is not None:
821 dateRange = self.params.get('daterange', DateRange())
822 if date not in dateRange:
823 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
824 view_count = info_dict.get('view_count')
825 if view_count is not None:
826 min_views = self.params.get('min_views')
827 if min_views is not None and view_count < min_views:
828 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
829 max_views = self.params.get('max_views')
830 if max_views is not None and view_count > max_views:
831 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
832 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
833 return 'Skipping "%s" because it is age restricted' % video_title
834 if self.in_download_archive(info_dict):
835 return '%s has already been recorded in archive' % video_title
836
837 if not incomplete:
838 match_filter = self.params.get('match_filter')
839 if match_filter is not None:
840 ret = match_filter(info_dict)
841 if ret is not None:
842 return ret
843 return None
844
845 reason = check_filter()
846 if reason is not None:
847 self.to_screen('[download] ' + reason)
d83cb531 848 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
8b0d7497 849 raise ExistingVideoReached()
d83cb531 850 elif self.params.get('break_on_reject', False):
8b0d7497 851 raise RejectedVideoReached()
852 return reason
fe7e0c98 853
b6c45014
JMF
854 @staticmethod
855 def add_extra_info(info_dict, extra_info):
856 '''Set the keys from extra_info in info dict if they are missing'''
857 for key, value in extra_info.items():
858 info_dict.setdefault(key, value)
859
0704d222 860 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
61aa5ba3 861 process=True, force_generic_extractor=False):
8222d8de
JMF
862 '''
863 Returns a list with a dictionary for each video we find.
864 If 'download', also downloads the videos.
865 extra_info is a dict containing the extra values to add to each result
613b2d9d 866 '''
fe7e0c98 867
61aa5ba3 868 if not ie_key and force_generic_extractor:
d22dec74
S
869 ie_key = 'Generic'
870
8222d8de 871 if ie_key:
56c73665 872 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
873 else:
874 ies = self._ies
875
876 for ie in ies:
877 if not ie.suitable(url):
878 continue
879
9a68de12 880 ie_key = ie.ie_key()
881 ie = self.get_info_extractor(ie_key)
8222d8de 882 if not ie.working():
6febd1c1
PH
883 self.report_warning('The program functionality for this site has been marked as broken, '
884 'and will probably not work.')
8222d8de
JMF
885
886 try:
a0566bbf 887 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
888 except (AssertionError, IndexError, AttributeError):
889 temp_id = None
890 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
891 self.to_screen("[%s] %s: has already been recorded in archive" % (
892 ie_key, temp_id))
893 break
894
895 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
896
897 else:
898 self.report_error('no suitable InfoExtractor for URL %s' % url)
899
900 def __handle_extraction_exceptions(func):
901 def wrapper(self, *args, **kwargs):
902 try:
903 return func(self, *args, **kwargs)
773f291d
S
904 except GeoRestrictedError as e:
905 msg = e.msg
906 if e.countries:
907 msg += '\nThis video is available in %s.' % ', '.join(
908 map(ISO3166Utils.short2full, e.countries))
909 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
910 self.report_error(msg)
fb043a6e 911 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 912 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 913 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 914 raise
8222d8de
JMF
915 except Exception as e:
916 if self.params.get('ignoreerrors', False):
9b9c5355 917 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
918 else:
919 raise
a0566bbf 920 return wrapper
921
922 @__handle_extraction_exceptions
923 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
924 ie_result = ie.extract(url)
925 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
926 return
927 if isinstance(ie_result, list):
928 # Backwards compatibility: old IE result format
929 ie_result = {
930 '_type': 'compat_list',
931 'entries': ie_result,
932 }
933 if info_dict:
934 if info_dict.get('id'):
935 ie_result['id'] = info_dict['id']
936 if info_dict.get('title'):
937 ie_result['title'] = info_dict['title']
938 self.add_default_extra_info(ie_result, ie, url)
939 if process:
940 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 941 else:
a0566bbf 942 return ie_result
fe7e0c98 943
ea38e55f
PH
944 def add_default_extra_info(self, ie_result, ie, url):
945 self.add_extra_info(ie_result, {
946 'extractor': ie.IE_NAME,
947 'webpage_url': url,
dbbbe555 948 'duration_string': (
949 formatSeconds(ie_result['duration'], '-')
950 if ie_result.get('duration', None) is not None
951 else None),
ea38e55f
PH
952 'webpage_url_basename': url_basename(url),
953 'extractor_key': ie.ie_key(),
954 })
955
8222d8de
JMF
956 def process_ie_result(self, ie_result, download=True, extra_info={}):
957 """
958 Take the result of the ie(may be modified) and resolve all unresolved
959 references (URLs, playlist items).
960
961 It will also download the videos if 'download'.
962 Returns the resolved ie_result.
963 """
e8ee972c
PH
964 result_type = ie_result.get('_type', 'video')
965
057a5206 966 if result_type in ('url', 'url_transparent'):
134c6ea8 967 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 968 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
969 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
970 or extract_flat is True):
d06daf23
S
971 self.__forced_printings(
972 ie_result, self.prepare_filename(ie_result),
973 incomplete=True)
e8ee972c
PH
974 return ie_result
975
8222d8de 976 if result_type == 'video':
b6c45014 977 self.add_extra_info(ie_result, extra_info)
feee2ecf 978 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
979 elif result_type == 'url':
980 # We have to add extra_info to the results because it may be
981 # contained in a playlist
982 return self.extract_info(ie_result['url'],
0704d222 983 download, info_dict=ie_result,
8222d8de
JMF
984 ie_key=ie_result.get('ie_key'),
985 extra_info=extra_info)
7fc3fa05
PH
986 elif result_type == 'url_transparent':
987 # Use the information from the embedding page
988 info = self.extract_info(
989 ie_result['url'], ie_key=ie_result.get('ie_key'),
990 extra_info=extra_info, download=False, process=False)
991
1640eb09
S
992 # extract_info may return None when ignoreerrors is enabled and
993 # extraction failed with an error, don't crash and return early
994 # in this case
995 if not info:
996 return info
997
412c617d
PH
998 force_properties = dict(
999 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1000 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1001 if f in force_properties:
1002 del force_properties[f]
1003 new_result = info.copy()
1004 new_result.update(force_properties)
7fc3fa05 1005
0563f7ac
S
1006 # Extracted info may not be a video result (i.e.
1007 # info.get('_type', 'video') != video) but rather an url or
1008 # url_transparent. In such cases outer metadata (from ie_result)
1009 # should be propagated to inner one (info). For this to happen
1010 # _type of info should be overridden with url_transparent. This
067aa17e 1011 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1012 if new_result.get('_type') == 'url':
1013 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1014
1015 return self.process_ie_result(
1016 new_result, download=download, extra_info=extra_info)
40fcba5e 1017 elif result_type in ('playlist', 'multi_video'):
30a074c2 1018 # Protect from infinite recursion due to recursively nested playlists
1019 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1020 webpage_url = ie_result['webpage_url']
1021 if webpage_url in self._playlist_urls:
7e85e872 1022 self.to_screen(
30a074c2 1023 '[download] Skipping already downloaded playlist: %s'
1024 % ie_result.get('title') or ie_result.get('id'))
1025 return
7e85e872 1026
30a074c2 1027 self._playlist_level += 1
1028 self._playlist_urls.add(webpage_url)
1029 try:
1030 return self.__process_playlist(ie_result, download)
1031 finally:
1032 self._playlist_level -= 1
1033 if not self._playlist_level:
1034 self._playlist_urls.clear()
8222d8de 1035 elif result_type == 'compat_list':
c9bf4114
PH
1036 self.report_warning(
1037 'Extractor %s returned a compat_list result. '
1038 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1039
8222d8de 1040 def _fixup(r):
9e1a5b84
JW
1041 self.add_extra_info(
1042 r,
9103bbc5
JMF
1043 {
1044 'extractor': ie_result['extractor'],
1045 'webpage_url': ie_result['webpage_url'],
29eb5174 1046 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1047 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1048 }
1049 )
8222d8de
JMF
1050 return r
1051 ie_result['entries'] = [
b6c45014 1052 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1053 for r in ie_result['entries']
1054 ]
1055 return ie_result
1056 else:
1057 raise Exception('Invalid result type: %s' % result_type)
1058
30a074c2 1059 def __process_playlist(self, ie_result, download):
1060 # We process each entry in the playlist
1061 playlist = ie_result.get('title') or ie_result.get('id')
1062 self.to_screen('[download] Downloading playlist: %s' % playlist)
1063
1064 playlist_results = []
1065
1066 playliststart = self.params.get('playliststart', 1) - 1
1067 playlistend = self.params.get('playlistend')
1068 # For backwards compatibility, interpret -1 as whole list
1069 if playlistend == -1:
1070 playlistend = None
1071
1072 playlistitems_str = self.params.get('playlist_items')
1073 playlistitems = None
1074 if playlistitems_str is not None:
1075 def iter_playlistitems(format):
1076 for string_segment in format.split(','):
1077 if '-' in string_segment:
1078 start, end = string_segment.split('-')
1079 for item in range(int(start), int(end) + 1):
1080 yield int(item)
1081 else:
1082 yield int(string_segment)
1083 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1084
1085 ie_entries = ie_result['entries']
1086
1087 def make_playlistitems_entries(list_ie_entries):
1088 num_entries = len(list_ie_entries)
1089 return [
1090 list_ie_entries[i - 1] for i in playlistitems
1091 if -num_entries <= i - 1 < num_entries]
1092
1093 def report_download(num_entries):
1094 self.to_screen(
1095 '[%s] playlist %s: Downloading %d videos' %
1096 (ie_result['extractor'], playlist, num_entries))
1097
1098 if isinstance(ie_entries, list):
1099 n_all_entries = len(ie_entries)
1100 if playlistitems:
1101 entries = make_playlistitems_entries(ie_entries)
1102 else:
1103 entries = ie_entries[playliststart:playlistend]
1104 n_entries = len(entries)
1105 self.to_screen(
1106 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1107 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1108 elif isinstance(ie_entries, PagedList):
1109 if playlistitems:
1110 entries = []
1111 for item in playlistitems:
1112 entries.extend(ie_entries.getslice(
1113 item - 1, item
1114 ))
1115 else:
1116 entries = ie_entries.getslice(
1117 playliststart, playlistend)
1118 n_entries = len(entries)
1119 report_download(n_entries)
1120 else: # iterable
1121 if playlistitems:
1122 entries = make_playlistitems_entries(list(itertools.islice(
1123 ie_entries, 0, max(playlistitems))))
1124 else:
1125 entries = list(itertools.islice(
1126 ie_entries, playliststart, playlistend))
1127 n_entries = len(entries)
1128 report_download(n_entries)
1129
1130 if self.params.get('playlistreverse', False):
1131 entries = entries[::-1]
1132
1133 if self.params.get('playlistrandom', False):
1134 random.shuffle(entries)
1135
1136 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1137
1138 for i, entry in enumerate(entries, 1):
1139 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1140 # This __x_forwarded_for_ip thing is a bit ugly but requires
1141 # minimal changes
1142 if x_forwarded_for:
1143 entry['__x_forwarded_for_ip'] = x_forwarded_for
1144 extra = {
1145 'n_entries': n_entries,
1146 'playlist': playlist,
1147 'playlist_id': ie_result.get('id'),
1148 'playlist_title': ie_result.get('title'),
1149 'playlist_uploader': ie_result.get('uploader'),
1150 'playlist_uploader_id': ie_result.get('uploader_id'),
1151 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1152 'extractor': ie_result['extractor'],
1153 'webpage_url': ie_result['webpage_url'],
1154 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1155 'extractor_key': ie_result['extractor_key'],
1156 }
1157
1158 if self._match_entry(entry, incomplete=True) is not None:
1159 continue
1160
1161 entry_result = self.__process_iterable_entry(entry, download, extra)
1162 # TODO: skip failed (empty) entries?
1163 playlist_results.append(entry_result)
1164 ie_result['entries'] = playlist_results
1165 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1166 return ie_result
1167
a0566bbf 1168 @__handle_extraction_exceptions
1169 def __process_iterable_entry(self, entry, download, extra_info):
1170 return self.process_ie_result(
1171 entry, download=download, extra_info=extra_info)
1172
67134eab
JMF
1173 def _build_format_filter(self, filter_spec):
1174 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1175
1176 OPERATORS = {
1177 '<': operator.lt,
1178 '<=': operator.le,
1179 '>': operator.gt,
1180 '>=': operator.ge,
1181 '=': operator.eq,
1182 '!=': operator.ne,
1183 }
67134eab 1184 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1185 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1186 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1187 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1188 $
083c9df9 1189 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1190 m = operator_rex.search(filter_spec)
9ddb6925
S
1191 if m:
1192 try:
1193 comparison_value = int(m.group('value'))
1194 except ValueError:
1195 comparison_value = parse_filesize(m.group('value'))
1196 if comparison_value is None:
1197 comparison_value = parse_filesize(m.group('value') + 'B')
1198 if comparison_value is None:
1199 raise ValueError(
1200 'Invalid value %r in format specification %r' % (
67134eab 1201 m.group('value'), filter_spec))
9ddb6925
S
1202 op = OPERATORS[m.group('op')]
1203
083c9df9 1204 if not m:
9ddb6925
S
1205 STR_OPERATORS = {
1206 '=': operator.eq,
10d33b34
YCH
1207 '^=': lambda attr, value: attr.startswith(value),
1208 '$=': lambda attr, value: attr.endswith(value),
1209 '*=': lambda attr, value: value in attr,
9ddb6925 1210 }
67134eab 1211 str_operator_rex = re.compile(r'''(?x)
f96bff99 1212 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1213 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1214 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1215 \s*$
9ddb6925 1216 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1217 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1218 if m:
1219 comparison_value = m.group('value')
2cc779f4
S
1220 str_op = STR_OPERATORS[m.group('op')]
1221 if m.group('negation'):
e118a879 1222 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1223 else:
1224 op = str_op
083c9df9 1225
9ddb6925 1226 if not m:
67134eab 1227 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1228
1229 def _filter(f):
1230 actual_value = f.get(m.group('key'))
1231 if actual_value is None:
1232 return m.group('none_inclusive')
1233 return op(actual_value, comparison_value)
67134eab
JMF
1234 return _filter
1235
0017d9ad 1236 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1237
af0f7428
S
1238 def can_merge():
1239 merger = FFmpegMergerPP(self)
1240 return merger.available and merger.can_merge()
1241
91ebc640 1242 prefer_best = (
1243 not self.params.get('simulate', False)
1244 and download
1245 and (
1246 not can_merge()
19807826 1247 or info_dict.get('is_live', False)
91ebc640 1248 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1249
1250 return (
1251 'best/bestvideo+bestaudio'
1252 if prefer_best
1253 else 'bestvideo*+bestaudio/best'
19807826 1254 if not self.params.get('allow_multiple_audio_streams', False)
91ebc640 1255 else 'bestvideo+bestaudio/best')
0017d9ad 1256
67134eab
JMF
1257 def build_format_selector(self, format_spec):
1258 def syntax_error(note, start):
1259 message = (
1260 'Invalid format specification: '
1261 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1262 return SyntaxError(message)
1263
1264 PICKFIRST = 'PICKFIRST'
1265 MERGE = 'MERGE'
1266 SINGLE = 'SINGLE'
0130afb7 1267 GROUP = 'GROUP'
67134eab
JMF
1268 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1269
91ebc640 1270 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1271 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1272
67134eab
JMF
1273 def _parse_filter(tokens):
1274 filter_parts = []
1275 for type, string, start, _, _ in tokens:
1276 if type == tokenize.OP and string == ']':
1277 return ''.join(filter_parts)
1278 else:
1279 filter_parts.append(string)
1280
232541df 1281 def _remove_unused_ops(tokens):
17cc1534 1282 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1283 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1284 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1285 last_string, last_start, last_end, last_line = None, None, None, None
1286 for type, string, start, end, line in tokens:
1287 if type == tokenize.OP and string == '[':
1288 if last_string:
1289 yield tokenize.NAME, last_string, last_start, last_end, last_line
1290 last_string = None
1291 yield type, string, start, end, line
1292 # everything inside brackets will be handled by _parse_filter
1293 for type, string, start, end, line in tokens:
1294 yield type, string, start, end, line
1295 if type == tokenize.OP and string == ']':
1296 break
1297 elif type == tokenize.OP and string in ALLOWED_OPS:
1298 if last_string:
1299 yield tokenize.NAME, last_string, last_start, last_end, last_line
1300 last_string = None
1301 yield type, string, start, end, line
1302 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1303 if not last_string:
1304 last_string = string
1305 last_start = start
1306 last_end = end
1307 else:
1308 last_string += string
1309 if last_string:
1310 yield tokenize.NAME, last_string, last_start, last_end, last_line
1311
cf2ac6df 1312 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1313 selectors = []
1314 current_selector = None
1315 for type, string, start, _, _ in tokens:
1316 # ENCODING is only defined in python 3.x
1317 if type == getattr(tokenize, 'ENCODING', None):
1318 continue
1319 elif type in [tokenize.NAME, tokenize.NUMBER]:
1320 current_selector = FormatSelector(SINGLE, string, [])
1321 elif type == tokenize.OP:
cf2ac6df
JMF
1322 if string == ')':
1323 if not inside_group:
1324 # ')' will be handled by the parentheses group
1325 tokens.restore_last_token()
67134eab 1326 break
cf2ac6df 1327 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1328 tokens.restore_last_token()
1329 break
cf2ac6df
JMF
1330 elif inside_choice and string == ',':
1331 tokens.restore_last_token()
1332 break
1333 elif string == ',':
0a31a350
JMF
1334 if not current_selector:
1335 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1336 selectors.append(current_selector)
1337 current_selector = None
1338 elif string == '/':
d96d604e
JMF
1339 if not current_selector:
1340 raise syntax_error('"/" must follow a format selector', start)
67134eab 1341 first_choice = current_selector
cf2ac6df 1342 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1343 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1344 elif string == '[':
1345 if not current_selector:
1346 current_selector = FormatSelector(SINGLE, 'best', [])
1347 format_filter = _parse_filter(tokens)
1348 current_selector.filters.append(format_filter)
0130afb7
JMF
1349 elif string == '(':
1350 if current_selector:
1351 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1352 group = _parse_format_selection(tokens, inside_group=True)
1353 current_selector = FormatSelector(GROUP, group, [])
67134eab 1354 elif string == '+':
d03cfdce 1355 if not current_selector:
1356 raise syntax_error('Unexpected "+"', start)
1357 selector_1 = current_selector
1358 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1359 if not selector_2:
1360 raise syntax_error('Expected a selector', start)
1361 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1362 else:
1363 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1364 elif type == tokenize.ENDMARKER:
1365 break
1366 if current_selector:
1367 selectors.append(current_selector)
1368 return selectors
1369
1370 def _build_selector_function(selector):
909d24dd 1371 if isinstance(selector, list): # ,
67134eab
JMF
1372 fs = [_build_selector_function(s) for s in selector]
1373
317f7ab6 1374 def selector_function(ctx):
67134eab 1375 for f in fs:
317f7ab6 1376 for format in f(ctx):
67134eab
JMF
1377 yield format
1378 return selector_function
909d24dd 1379
1380 elif selector.type == GROUP: # ()
0130afb7 1381 selector_function = _build_selector_function(selector.selector)
909d24dd 1382
1383 elif selector.type == PICKFIRST: # /
67134eab
JMF
1384 fs = [_build_selector_function(s) for s in selector.selector]
1385
317f7ab6 1386 def selector_function(ctx):
67134eab 1387 for f in fs:
317f7ab6 1388 picked_formats = list(f(ctx))
67134eab
JMF
1389 if picked_formats:
1390 return picked_formats
1391 return []
67134eab 1392
909d24dd 1393 elif selector.type == SINGLE: # atom
1394 format_spec = selector.selector if selector.selector is not None else 'best'
1395
1396 if format_spec == 'all':
1397 def selector_function(ctx):
1398 formats = list(ctx['formats'])
1399 if formats:
1400 for f in formats:
1401 yield f
1402
1403 else:
1404 format_fallback = False
1405 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1406 if format_spec_obj is not None:
1407 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1408 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1409 not_format_type = 'v' if format_type == 'a' else 'a'
1410 format_modified = format_spec_obj.group(3) is not None
1411
1412 format_fallback = not format_type and not format_modified # for b, w
1413 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1414 if format_type and format_modified # bv*, ba*, wv*, wa*
1415 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1416 if format_type # bv, ba, wv, wa
1417 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1418 if not format_modified # b, w
1419 else None) # b*, w*
67134eab 1420 else:
909d24dd 1421 format_idx = -1
1422 filter_f = ((lambda f: f.get('ext') == format_spec)
1423 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1424 else (lambda f: f.get('format_id') == format_spec)) # id
1425
1426 def selector_function(ctx):
1427 formats = list(ctx['formats'])
1428 if not formats:
1429 return
1430 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
67134eab 1431 if matches:
909d24dd 1432 yield matches[format_idx]
1433 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1434 # for extractors with incomplete formats (audio only (soundcloud)
1435 # or video only (imgur)) best/worst will fallback to
1436 # best/worst {video,audio}-only format
1437 yield formats[format_idx]
1438
1439 elif selector.type == MERGE: # +
d03cfdce 1440 def _merge(formats_pair):
1441 format_1, format_2 = formats_pair
1442
1443 formats_info = []
1444 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1445 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1446
909d24dd 1447 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1448 get_no_more = {"video": False, "audio": False}
1449 for (i, fmt_info) in enumerate(formats_info):
1450 for aud_vid in ["audio", "video"]:
1451 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1452 if get_no_more[aud_vid]:
1453 formats_info.pop(i)
1454 get_no_more[aud_vid] = True
1455
1456 if len(formats_info) == 1:
1457 return formats_info[0]
1458
d03cfdce 1459 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1460 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1461
1462 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1463 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1464
1465 output_ext = self.params.get('merge_output_format')
1466 if not output_ext:
1467 if the_only_video:
1468 output_ext = the_only_video['ext']
1469 elif the_only_audio and not video_fmts:
1470 output_ext = the_only_audio['ext']
1471 else:
1472 output_ext = 'mkv'
1473
1474 new_dict = {
67134eab 1475 'requested_formats': formats_info,
d03cfdce 1476 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1477 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
67134eab
JMF
1478 'ext': output_ext,
1479 }
d03cfdce 1480
1481 if the_only_video:
1482 new_dict.update({
1483 'width': the_only_video.get('width'),
1484 'height': the_only_video.get('height'),
1485 'resolution': the_only_video.get('resolution'),
1486 'fps': the_only_video.get('fps'),
1487 'vcodec': the_only_video.get('vcodec'),
1488 'vbr': the_only_video.get('vbr'),
1489 'stretched_ratio': the_only_video.get('stretched_ratio'),
1490 })
1491
1492 if the_only_audio:
1493 new_dict.update({
1494 'acodec': the_only_audio.get('acodec'),
1495 'abr': the_only_audio.get('abr'),
1496 })
1497
1498 return new_dict
1499
1500 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1501
317f7ab6
S
1502 def selector_function(ctx):
1503 for pair in itertools.product(
d03cfdce 1504 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1505 yield _merge(pair)
083c9df9 1506
67134eab 1507 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1508
317f7ab6
S
1509 def final_selector(ctx):
1510 ctx_copy = copy.deepcopy(ctx)
67134eab 1511 for _filter in filters:
317f7ab6
S
1512 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1513 return selector_function(ctx_copy)
67134eab 1514 return final_selector
083c9df9 1515
67134eab 1516 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1517 try:
232541df 1518 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1519 except tokenize.TokenError:
1520 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1521
1522 class TokenIterator(object):
1523 def __init__(self, tokens):
1524 self.tokens = tokens
1525 self.counter = 0
1526
1527 def __iter__(self):
1528 return self
1529
1530 def __next__(self):
1531 if self.counter >= len(self.tokens):
1532 raise StopIteration()
1533 value = self.tokens[self.counter]
1534 self.counter += 1
1535 return value
1536
1537 next = __next__
1538
1539 def restore_last_token(self):
1540 self.counter -= 1
1541
1542 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1543 return _build_selector_function(parsed_selector)
a9c58ad9 1544
e5660ee6
JMF
1545 def _calc_headers(self, info_dict):
1546 res = std_headers.copy()
1547
1548 add_headers = info_dict.get('http_headers')
1549 if add_headers:
1550 res.update(add_headers)
1551
1552 cookies = self._calc_cookies(info_dict)
1553 if cookies:
1554 res['Cookie'] = cookies
1555
0016b84e
S
1556 if 'X-Forwarded-For' not in res:
1557 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1558 if x_forwarded_for_ip:
1559 res['X-Forwarded-For'] = x_forwarded_for_ip
1560
e5660ee6
JMF
1561 return res
1562
1563 def _calc_cookies(self, info_dict):
5c2266df 1564 pr = sanitized_Request(info_dict['url'])
e5660ee6 1565 self.cookiejar.add_cookie_header(pr)
662435f7 1566 return pr.get_header('Cookie')
e5660ee6 1567
dd82ffea
JMF
1568 def process_video_result(self, info_dict, download=True):
1569 assert info_dict.get('_type', 'video') == 'video'
1570
bec1fad2
PH
1571 if 'id' not in info_dict:
1572 raise ExtractorError('Missing "id" field in extractor result')
1573 if 'title' not in info_dict:
1574 raise ExtractorError('Missing "title" field in extractor result')
1575
c9969434
S
1576 def report_force_conversion(field, field_not, conversion):
1577 self.report_warning(
1578 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1579 % (field, field_not, conversion))
1580
1581 def sanitize_string_field(info, string_field):
1582 field = info.get(string_field)
1583 if field is None or isinstance(field, compat_str):
1584 return
1585 report_force_conversion(string_field, 'a string', 'string')
1586 info[string_field] = compat_str(field)
1587
1588 def sanitize_numeric_fields(info):
1589 for numeric_field in self._NUMERIC_FIELDS:
1590 field = info.get(numeric_field)
1591 if field is None or isinstance(field, compat_numeric_types):
1592 continue
1593 report_force_conversion(numeric_field, 'numeric', 'int')
1594 info[numeric_field] = int_or_none(field)
1595
1596 sanitize_string_field(info_dict, 'id')
1597 sanitize_numeric_fields(info_dict)
be6217b2 1598
dd82ffea
JMF
1599 if 'playlist' not in info_dict:
1600 # It isn't part of a playlist
1601 info_dict['playlist'] = None
1602 info_dict['playlist_index'] = None
1603
d5519808 1604 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1605 if thumbnails is None:
1606 thumbnail = info_dict.get('thumbnail')
1607 if thumbnail:
a7a14d95 1608 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1609 if thumbnails:
be6d7229 1610 thumbnails.sort(key=lambda t: (
d37708fc
RA
1611 t.get('preference') if t.get('preference') is not None else -1,
1612 t.get('width') if t.get('width') is not None else -1,
1613 t.get('height') if t.get('height') is not None else -1,
1614 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1615 for i, t in enumerate(thumbnails):
dcf77cf1 1616 t['url'] = sanitize_url(t['url'])
9603e8a7 1617 if t.get('width') and t.get('height'):
d5519808 1618 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1619 if t.get('id') is None:
1620 t['id'] = '%d' % i
d5519808 1621
b7b72db9 1622 if self.params.get('list_thumbnails'):
1623 self.list_thumbnails(info_dict)
1624 return
1625
536a55da
S
1626 thumbnail = info_dict.get('thumbnail')
1627 if thumbnail:
1628 info_dict['thumbnail'] = sanitize_url(thumbnail)
1629 elif thumbnails:
d5519808
PH
1630 info_dict['thumbnail'] = thumbnails[-1]['url']
1631
c9ae7b95 1632 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1633 info_dict['display_id'] = info_dict['id']
1634
955c4514 1635 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1636 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1637 # see http://bugs.python.org/issue1646728)
1638 try:
1639 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1640 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1641 except (ValueError, OverflowError, OSError):
1642 pass
9d2ecdbc 1643
33d2fc2f
S
1644 # Auto generate title fields corresponding to the *_number fields when missing
1645 # in order to always have clean titles. This is very common for TV series.
1646 for field in ('chapter', 'season', 'episode'):
1647 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1648 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1649
05108a49
S
1650 for cc_kind in ('subtitles', 'automatic_captions'):
1651 cc = info_dict.get(cc_kind)
1652 if cc:
1653 for _, subtitle in cc.items():
1654 for subtitle_format in subtitle:
1655 if subtitle_format.get('url'):
1656 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1657 if subtitle_format.get('ext') is None:
1658 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1659
1660 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1661 subtitles = info_dict.get('subtitles')
4bba3716 1662
a504ced0 1663 if self.params.get('listsubtitles', False):
360e1ca5 1664 if 'automatic_captions' in info_dict:
05108a49
S
1665 self.list_subtitles(
1666 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1667 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1668 return
05108a49 1669
360e1ca5 1670 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1671 info_dict['id'], subtitles, automatic_captions)
a504ced0 1672
dd82ffea
JMF
1673 # We now pick which formats have to be downloaded
1674 if info_dict.get('formats') is None:
1675 # There's only one format available
1676 formats = [info_dict]
1677 else:
1678 formats = info_dict['formats']
1679
db95dc13
PH
1680 if not formats:
1681 raise ExtractorError('No video formats found!')
1682
73af5cc8
S
1683 def is_wellformed(f):
1684 url = f.get('url')
a5ac0c47 1685 if not url:
73af5cc8
S
1686 self.report_warning(
1687 '"url" field is missing or empty - skipping format, '
1688 'there is an error in extractor')
a5ac0c47
S
1689 return False
1690 if isinstance(url, bytes):
1691 sanitize_string_field(f, 'url')
1692 return True
73af5cc8
S
1693
1694 # Filter out malformed formats for better extraction robustness
1695 formats = list(filter(is_wellformed, formats))
1696
181c7053
S
1697 formats_dict = {}
1698
dd82ffea 1699 # We check that all the formats have the format and format_id fields
db95dc13 1700 for i, format in enumerate(formats):
c9969434
S
1701 sanitize_string_field(format, 'format_id')
1702 sanitize_numeric_fields(format)
dcf77cf1 1703 format['url'] = sanitize_url(format['url'])
e74e3b63 1704 if not format.get('format_id'):
8016c922 1705 format['format_id'] = compat_str(i)
e2effb08
S
1706 else:
1707 # Sanitize format_id from characters used in format selector expression
ec85ded8 1708 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1709 format_id = format['format_id']
1710 if format_id not in formats_dict:
1711 formats_dict[format_id] = []
1712 formats_dict[format_id].append(format)
1713
1714 # Make sure all formats have unique format_id
1715 for format_id, ambiguous_formats in formats_dict.items():
1716 if len(ambiguous_formats) > 1:
1717 for i, format in enumerate(ambiguous_formats):
1718 format['format_id'] = '%s-%d' % (format_id, i)
1719
1720 for i, format in enumerate(formats):
8c51aa65 1721 if format.get('format') is None:
6febd1c1 1722 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1723 id=format['format_id'],
1724 res=self.format_resolution(format),
6febd1c1 1725 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1726 )
c1002e96 1727 # Automatically determine file extension if missing
5b1d8575 1728 if format.get('ext') is None:
cce929ea 1729 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1730 # Automatically determine protocol if missing (useful for format
1731 # selection purposes)
6f0be937 1732 if format.get('protocol') is None:
b5559424 1733 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1734 # Add HTTP headers, so that external programs can use them from the
1735 # json output
1736 full_format_info = info_dict.copy()
1737 full_format_info.update(format)
1738 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1739 # Remove private housekeeping stuff
1740 if '__x_forwarded_for_ip' in info_dict:
1741 del info_dict['__x_forwarded_for_ip']
dd82ffea 1742
4bcc7bd1 1743 # TODO Central sorting goes here
99e206d5 1744
f89197d7 1745 if formats[0] is not info_dict:
b3d9ef88
JMF
1746 # only set the 'formats' fields if the original info_dict list them
1747 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1748 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1749 # which can't be exported to json
b3d9ef88 1750 info_dict['formats'] = formats
cfb56d1a 1751 if self.params.get('listformats'):
bfaae0a7 1752 self.list_formats(info_dict)
1753 return
1754
de3ef3ed 1755 req_format = self.params.get('format')
a9c58ad9 1756 if req_format is None:
0017d9ad
S
1757 req_format = self._default_format_spec(info_dict, download=download)
1758 if self.params.get('verbose'):
29f7c58a 1759 self._write_string('[debug] Default format spec: %s\n' % req_format)
0017d9ad 1760
5acfa126 1761 format_selector = self.build_format_selector(req_format)
317f7ab6
S
1762
1763 # While in format selection we may need to have an access to the original
1764 # format set in order to calculate some metrics or do some processing.
1765 # For now we need to be able to guess whether original formats provided
1766 # by extractor are incomplete or not (i.e. whether extractor provides only
1767 # video-only or audio-only formats) for proper formats selection for
1768 # extractors with such incomplete formats (see
067aa17e 1769 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
1770 # Since formats may be filtered during format selection and may not match
1771 # the original formats the results may be incorrect. Thus original formats
1772 # or pre-calculated metrics should be passed to format selection routines
1773 # as well.
1774 # We will pass a context object containing all necessary additional data
1775 # instead of just formats.
1776 # This fixes incorrect format selection issue (see
067aa17e 1777 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 1778 incomplete_formats = (
317f7ab6 1779 # All formats are video-only or
3089bc74 1780 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 1781 # all formats are audio-only
3089bc74 1782 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
1783
1784 ctx = {
1785 'formats': formats,
1786 'incomplete_formats': incomplete_formats,
1787 }
1788
1789 formats_to_download = list(format_selector(ctx))
dd82ffea 1790 if not formats_to_download:
6febd1c1 1791 raise ExtractorError('requested format not available',
78a3a9f8 1792 expected=True)
dd82ffea
JMF
1793
1794 if download:
909d24dd 1795 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
dd82ffea 1796 if len(formats_to_download) > 1:
6febd1c1 1797 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1798 for format in formats_to_download:
1799 new_info = dict(info_dict)
1800 new_info.update(format)
1801 self.process_info(new_info)
1802 # We update the info dict with the best quality format (backwards compatibility)
1803 info_dict.update(formats_to_download[-1])
1804 return info_dict
1805
98c70d6f 1806 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1807 """Select the requested subtitles and their format"""
98c70d6f
JMF
1808 available_subs = {}
1809 if normal_subtitles and self.params.get('writesubtitles'):
1810 available_subs.update(normal_subtitles)
1811 if automatic_captions and self.params.get('writeautomaticsub'):
1812 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1813 if lang not in available_subs:
1814 available_subs[lang] = cap_info
1815
4d171848
JMF
1816 if (not self.params.get('writesubtitles') and not
1817 self.params.get('writeautomaticsub') or not
1818 available_subs):
1819 return None
a504ced0
JMF
1820
1821 if self.params.get('allsubtitles', False):
1822 requested_langs = available_subs.keys()
1823 else:
1824 if self.params.get('subtitleslangs', False):
1825 requested_langs = self.params.get('subtitleslangs')
1826 elif 'en' in available_subs:
1827 requested_langs = ['en']
1828 else:
1829 requested_langs = [list(available_subs.keys())[0]]
1830
1831 formats_query = self.params.get('subtitlesformat', 'best')
1832 formats_preference = formats_query.split('/') if formats_query else []
1833 subs = {}
1834 for lang in requested_langs:
1835 formats = available_subs.get(lang)
1836 if formats is None:
1837 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1838 continue
a504ced0
JMF
1839 for ext in formats_preference:
1840 if ext == 'best':
1841 f = formats[-1]
1842 break
1843 matches = list(filter(lambda f: f['ext'] == ext, formats))
1844 if matches:
1845 f = matches[-1]
1846 break
1847 else:
1848 f = formats[-1]
1849 self.report_warning(
1850 'No subtitle format found matching "%s" for language %s, '
1851 'using %s' % (formats_query, lang, f['ext']))
1852 subs[lang] = f
1853 return subs
1854
d06daf23
S
1855 def __forced_printings(self, info_dict, filename, incomplete):
1856 def print_mandatory(field):
1857 if (self.params.get('force%s' % field, False)
1858 and (not incomplete or info_dict.get(field) is not None)):
1859 self.to_stdout(info_dict[field])
1860
1861 def print_optional(field):
1862 if (self.params.get('force%s' % field, False)
1863 and info_dict.get(field) is not None):
1864 self.to_stdout(info_dict[field])
1865
1866 print_mandatory('title')
1867 print_mandatory('id')
1868 if self.params.get('forceurl', False) and not incomplete:
1869 if info_dict.get('requested_formats') is not None:
1870 for f in info_dict['requested_formats']:
1871 self.to_stdout(f['url'] + f.get('play_path', ''))
1872 else:
1873 # For RTMP URLs, also include the playpath
1874 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1875 print_optional('thumbnail')
1876 print_optional('description')
1877 if self.params.get('forcefilename', False) and filename is not None:
1878 self.to_stdout(filename)
1879 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1880 self.to_stdout(formatSeconds(info_dict['duration']))
1881 print_mandatory('format')
1882 if self.params.get('forcejson', False):
1883 self.to_stdout(json.dumps(info_dict))
1884
8222d8de
JMF
1885 def process_info(self, info_dict):
1886 """Process a single resolved IE result."""
1887
1888 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1889
1890 max_downloads = self.params.get('max_downloads')
1891 if max_downloads is not None:
1892 if self._num_downloads >= int(max_downloads):
1893 raise MaxDownloadsReached()
8222d8de 1894
d06daf23 1895 # TODO: backward compatibility, to be removed
8222d8de 1896 info_dict['fulltitle'] = info_dict['title']
8222d8de 1897
11b85ce6 1898 if 'format' not in info_dict:
8222d8de
JMF
1899 info_dict['format'] = info_dict['ext']
1900
8b0d7497 1901 if self._match_entry(info_dict, incomplete=False) is not None:
8222d8de
JMF
1902 return
1903
fd288278 1904 self._num_downloads += 1
8222d8de 1905
e72c7e41 1906 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1907
1908 # Forced printings
d06daf23 1909 self.__forced_printings(info_dict, filename, incomplete=False)
8222d8de 1910
8222d8de 1911 if self.params.get('simulate', False):
2d30509f 1912 if self.params.get('force_write_download_archive', False):
1913 self.record_download_archive(info_dict)
1914
1915 # Do nothing else if in simulate mode
8222d8de
JMF
1916 return
1917
1918 if filename is None:
1919 return
1920
c5c9bf0c
S
1921 def ensure_dir_exists(path):
1922 try:
1923 dn = os.path.dirname(path)
1924 if dn and not os.path.exists(dn):
1925 os.makedirs(dn)
1926 return True
1927 except (OSError, IOError) as err:
1928 self.report_error('unable to create directory ' + error_to_compat_str(err))
1929 return False
1930
1931 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
8222d8de
JMF
1932 return
1933
1934 if self.params.get('writedescription', False):
2699da80 1935 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
0c3d0f51 1936 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1937 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1938 elif info_dict.get('description') is None:
1939 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1940 else:
1941 try:
6febd1c1 1942 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1943 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1944 descfile.write(info_dict['description'])
7b6fefc9 1945 except (OSError, IOError):
6febd1c1 1946 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1947 return
8222d8de 1948
1fb07d10 1949 if self.params.get('writeannotations', False):
98727e12 1950 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
0c3d0f51 1951 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1952 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
1953 elif not info_dict.get('annotations'):
1954 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
1955 else:
1956 try:
6febd1c1 1957 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1958 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1959 annofile.write(info_dict['annotations'])
1960 except (KeyError, TypeError):
6febd1c1 1961 self.report_warning('There are no annotations to write.')
7b6fefc9 1962 except (OSError, IOError):
6febd1c1 1963 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1964 return
1fb07d10 1965
9f448fcb 1966 def dl(name, info, subtitle=False):
98b69821 1967 fd = get_suitable_downloader(info, self.params)(self, self.params)
1968 for ph in self._progress_hooks:
1969 fd.add_progress_hook(ph)
1970 if self.params.get('verbose'):
29f7c58a 1971 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
9f448fcb 1972 return fd.download(name, info, subtitle)
98b69821 1973
c4a91be7 1974 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1975 self.params.get('writeautomaticsub')])
c4a91be7 1976
c84dd8a9 1977 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1978 # subtitles download errors are already managed as troubles in relevant IE
1979 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1980 subtitles = info_dict['requested_subtitles']
fa57af1e 1981 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1982 for sub_lang, sub_info in subtitles.items():
1983 sub_format = sub_info['ext']
824fa511 1984 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 1985 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 1986 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
a504ced0 1987 else:
0c9df79e 1988 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
1989 if sub_info.get('data') is not None:
1990 try:
1991 # Use newline='' to prevent conversion of newline characters
067aa17e 1992 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
1993 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1994 subfile.write(sub_info['data'])
1995 except (OSError, IOError):
1996 self.report_error('Cannot write subtitles file ' + sub_filename)
1997 return
7b6fefc9 1998 else:
5ff1bc0c 1999 try:
9f448fcb
U
2000 dl(sub_filename, sub_info, subtitle=True)
2001 '''
0c9df79e
U
2002 if self.params.get('sleep_interval_subtitles', False):
2003 dl(sub_filename, sub_info)
2004 else:
2005 sub_data = ie._request_webpage(
2006 sub_info['url'], info_dict['id'], note=False).read()
2007 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2008 subfile.write(sub_data)
9f448fcb 2009 '''
0c9df79e 2010 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
5ff1bc0c
RA
2011 self.report_warning('Unable to download subtitle for "%s": %s' %
2012 (sub_lang, error_to_compat_str(err)))
2013 continue
8222d8de 2014
57df9f53
U
2015 if self.params.get('skip_download', False):
2016 if self.params.get('convertsubtitles', False):
2017 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2018 filename_real_ext = os.path.splitext(filename)[1][1:]
2019 filename_wo_ext = (
2020 os.path.splitext(filename)[0]
2021 if filename_real_ext == info_dict['ext']
2022 else filename)
2023 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2024 if subconv.available:
2025 info_dict.setdefault('__postprocessors', [])
2026 # info_dict['__postprocessors'].append(subconv)
2027 if os.path.exists(encodeFilename(afilename)):
f791b419
U
2028 self.to_screen(
2029 '[download] %s has already been downloaded and '
2030 'converted' % afilename)
57df9f53
U
2031 else:
2032 try:
2033 self.post_process(filename, info_dict)
2034 except (PostProcessingError) as err:
2035 self.report_error('postprocessing: %s' % str(err))
2036 return
2037
8222d8de 2038 if self.params.get('writeinfojson', False):
b29e0000 2039 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
0c3d0f51 2040 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
6febd1c1 2041 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 2042 else:
6febd1c1 2043 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 2044 try:
cb202fd2 2045 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 2046 except (OSError, IOError):
6febd1c1 2047 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 2048 return
8222d8de 2049
ec82d85a 2050 self._write_thumbnails(info_dict, filename)
8222d8de 2051
732044af 2052 # Write internet shortcut files
2053 url_link = webloc_link = desktop_link = False
2054 if self.params.get('writelink', False):
2055 if sys.platform == "darwin": # macOS.
2056 webloc_link = True
2057 elif sys.platform.startswith("linux"):
2058 desktop_link = True
2059 else: # if sys.platform in ['win32', 'cygwin']:
2060 url_link = True
2061 if self.params.get('writeurllink', False):
2062 url_link = True
2063 if self.params.get('writewebloclink', False):
2064 webloc_link = True
2065 if self.params.get('writedesktoplink', False):
2066 desktop_link = True
2067
2068 if url_link or webloc_link or desktop_link:
2069 if 'webpage_url' not in info_dict:
2070 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2071 return
2072 ascii_url = iri_to_uri(info_dict['webpage_url'])
2073
2074 def _write_link_file(extension, template, newline, embed_filename):
2075 linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2076 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2077 self.to_screen('[info] Internet shortcut is already present')
2078 else:
2079 try:
2080 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2081 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2082 template_vars = {'url': ascii_url}
2083 if embed_filename:
2084 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2085 linkfile.write(template % template_vars)
2086 except (OSError, IOError):
2087 self.report_error('Cannot write internet shortcut ' + linkfn)
2088 return False
2089 return True
2090
2091 if url_link:
2092 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2093 return
2094 if webloc_link:
2095 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2096 return
2097 if desktop_link:
2098 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2099 return
2100
2101 # Download
2102 must_record_download_archive = False
8222d8de 2103 if not self.params.get('skip_download', False):
4340deca 2104 try:
4340deca
P
2105 if info_dict.get('requested_formats') is not None:
2106 downloaded = []
2107 success = True
d47aeb22 2108 merger = FFmpegMergerPP(self)
f740fae2 2109 if not merger.available:
4340deca
P
2110 postprocessors = []
2111 self.report_warning('You have requested multiple '
2112 'formats but ffmpeg or avconv are not installed.'
4a5a898a 2113 ' The formats won\'t be merged.')
6350728b 2114 else:
4340deca 2115 postprocessors = [merger]
81cd954a
S
2116
2117 def compatible_formats(formats):
d03cfdce 2118 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2119 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2120 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2121 if len(video_formats) > 2 or len(audio_formats) > 2:
2122 return False
2123
81cd954a 2124 # Check extension
d03cfdce 2125 exts = set(format.get('ext') for format in formats)
2126 COMPATIBLE_EXTS = (
2127 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2128 set(('webm',)),
2129 )
2130 for ext_sets in COMPATIBLE_EXTS:
2131 if ext_sets.issuperset(exts):
2132 return True
81cd954a
S
2133 # TODO: Check acodec/vcodec
2134 return False
2135
38c6902b
S
2136 filename_real_ext = os.path.splitext(filename)[1][1:]
2137 filename_wo_ext = (
2138 os.path.splitext(filename)[0]
2139 if filename_real_ext == info_dict['ext']
2140 else filename)
81cd954a 2141 requested_formats = info_dict['requested_formats']
c0dea0a7 2142 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 2143 info_dict['ext'] = 'mkv'
4a5a898a
S
2144 self.report_warning(
2145 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
2146 # Ensure filename always has a correct extension for successful merge
2147 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
0c3d0f51 2148 file_exists = os.path.exists(encodeFilename(filename))
2149 if not self.params.get('overwrites', False) and file_exists:
5b5fbc08
JMF
2150 self.to_screen(
2151 '[download] %s has already been downloaded and '
2152 'merged' % filename)
2153 else:
0c3d0f51 2154 if file_exists:
2155 self.report_file_delete(filename)
2156 os.remove(encodeFilename(filename))
81cd954a 2157 for f in requested_formats:
5b5fbc08
JMF
2158 new_info = dict(info_dict)
2159 new_info.update(f)
c5c9bf0c
S
2160 fname = prepend_extension(
2161 self.prepare_filename(new_info),
2162 'f%s' % f['format_id'], new_info['ext'])
2163 if not ensure_dir_exists(fname):
2164 return
5b5fbc08 2165 downloaded.append(fname)
a9e7f546 2166 partial_success, real_download = dl(fname, new_info)
5b5fbc08
JMF
2167 success = success and partial_success
2168 info_dict['__postprocessors'] = postprocessors
2169 info_dict['__files_to_merge'] = downloaded
a9e7f546 2170 # Even if there were no downloads, it is being merged only now
2171 info_dict['__real_download'] = True
4340deca 2172 else:
0c3d0f51 2173 # Delete existing file with --yes-overwrites
2174 if self.params.get('overwrites', False):
2175 if os.path.exists(encodeFilename(filename)):
2176 self.report_file_delete(filename)
2177 os.remove(encodeFilename(filename))
4340deca 2178 # Just a single file
a9e7f546 2179 success, real_download = dl(filename, info_dict)
2180 info_dict['__real_download'] = real_download
4340deca 2181 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 2182 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2183 return
2184 except (OSError, IOError) as err:
2185 raise UnavailableVideoError(err)
2186 except (ContentTooShortError, ) as err:
2187 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2188 return
8222d8de 2189
e38cafe9 2190 if success and filename != '-':
6271f1ca 2191 # Fixup content
62cd676c
PH
2192 fixup_policy = self.params.get('fixup')
2193 if fixup_policy is None:
2194 fixup_policy = 'detect_or_warn'
2195
d1e4a464
S
2196 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2197
6271f1ca
PH
2198 stretched_ratio = info_dict.get('stretched_ratio')
2199 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2200 if fixup_policy == 'warn':
2201 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2202 info_dict['id'], stretched_ratio))
2203 elif fixup_policy == 'detect_or_warn':
2204 stretched_pp = FFmpegFixupStretchedPP(self)
2205 if stretched_pp.available:
2206 info_dict.setdefault('__postprocessors', [])
2207 info_dict['__postprocessors'].append(stretched_pp)
2208 else:
2209 self.report_warning(
d1e4a464
S
2210 '%s: Non-uniform pixel ratio (%s). %s'
2211 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2212 else:
62cd676c
PH
2213 assert fixup_policy in ('ignore', 'never')
2214
3089bc74
S
2215 if (info_dict.get('requested_formats') is None
2216 and info_dict.get('container') == 'm4a_dash'):
62cd676c 2217 if fixup_policy == 'warn':
d1e4a464
S
2218 self.report_warning(
2219 '%s: writing DASH m4a. '
2220 'Only some players support this container.'
2221 % info_dict['id'])
62cd676c
PH
2222 elif fixup_policy == 'detect_or_warn':
2223 fixup_pp = FFmpegFixupM4aPP(self)
2224 if fixup_pp.available:
2225 info_dict.setdefault('__postprocessors', [])
2226 info_dict['__postprocessors'].append(fixup_pp)
2227 else:
2228 self.report_warning(
d1e4a464
S
2229 '%s: writing DASH m4a. '
2230 'Only some players support this container. %s'
2231 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2232 else:
2233 assert fixup_policy in ('ignore', 'never')
6271f1ca 2234
3089bc74
S
2235 if (info_dict.get('protocol') == 'm3u8_native'
2236 or info_dict.get('protocol') == 'm3u8'
2237 and self.params.get('hls_prefer_native')):
f17f8651 2238 if fixup_policy == 'warn':
a02682fd 2239 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2240 info_dict['id']))
2241 elif fixup_policy == 'detect_or_warn':
2242 fixup_pp = FFmpegFixupM3u8PP(self)
2243 if fixup_pp.available:
2244 info_dict.setdefault('__postprocessors', [])
2245 info_dict['__postprocessors'].append(fixup_pp)
2246 else:
2247 self.report_warning(
a02682fd 2248 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2249 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2250 else:
2251 assert fixup_policy in ('ignore', 'never')
2252
8222d8de
JMF
2253 try:
2254 self.post_process(filename, info_dict)
2255 except (PostProcessingError) as err:
6febd1c1 2256 self.report_error('postprocessing: %s' % str(err))
8222d8de 2257 return
ab8e5e51
AM
2258 try:
2259 for ph in self._post_hooks:
2260 ph(filename)
2261 except Exception as err:
2262 self.report_error('post hooks: %s' % str(err))
2263 return
2d30509f 2264 must_record_download_archive = True
2265
2266 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2267 self.record_download_archive(info_dict)
c3e6ffba 2268 max_downloads = self.params.get('max_downloads')
2269 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2270 raise MaxDownloadsReached()
8222d8de
JMF
2271
2272 def download(self, url_list):
2273 """Download a given list of URLs."""
acd69589 2274 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
3089bc74
S
2275 if (len(url_list) > 1
2276 and outtmpl != '-'
2277 and '%' not in outtmpl
2278 and self.params.get('max_downloads') != 1):
acd69589 2279 raise SameFileError(outtmpl)
8222d8de
JMF
2280
2281 for url in url_list:
2282 try:
5f6a1245 2283 # It also downloads the videos
61aa5ba3
S
2284 res = self.extract_info(
2285 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2286 except UnavailableVideoError:
6febd1c1 2287 self.report_error('unable to download video')
8222d8de 2288 except MaxDownloadsReached:
8b0d7497 2289 self.to_screen('[info] Maximum number of downloaded files reached')
2290 raise
2291 except ExistingVideoReached:
d83cb531 2292 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2293 raise
2294 except RejectedVideoReached:
d83cb531 2295 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2296 raise
63e0be34
PH
2297 else:
2298 if self.params.get('dump_single_json', False):
2299 self.to_stdout(json.dumps(res))
8222d8de
JMF
2300
2301 return self._download_retcode
2302
1dcc4c0c 2303 def download_with_info_file(self, info_filename):
31bd3925
JMF
2304 with contextlib.closing(fileinput.FileInput(
2305 [info_filename], mode='r',
2306 openhook=fileinput.hook_encoded('utf-8'))) as f:
2307 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 2308 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
2309 try:
2310 self.process_ie_result(info, download=True)
2311 except DownloadError:
2312 webpage_url = info.get('webpage_url')
2313 if webpage_url is not None:
6febd1c1 2314 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2315 return self.download([webpage_url])
2316 else:
2317 raise
2318 return self._download_retcode
1dcc4c0c 2319
cb202fd2
S
2320 @staticmethod
2321 def filter_requested_info(info_dict):
2322 return dict(
2323 (k, v) for k, v in info_dict.items()
2324 if k not in ['requested_formats', 'requested_subtitles'])
2325
8222d8de
JMF
2326 def post_process(self, filename, ie_info):
2327 """Run all the postprocessors on the given file."""
2328 info = dict(ie_info)
2329 info['filepath'] = filename
6350728b
JMF
2330 pps_chain = []
2331 if ie_info.get('__postprocessors') is not None:
2332 pps_chain.extend(ie_info['__postprocessors'])
2333 pps_chain.extend(self._pps)
2334 for pp in pps_chain:
71646e46 2335 files_to_delete = []
8222d8de 2336 try:
592e97e8 2337 files_to_delete, info = pp.run(info)
8222d8de 2338 except PostProcessingError as e:
bbcbf4d4 2339 self.report_error(e.msg)
592e97e8 2340 if files_to_delete and not self.params.get('keepvideo', False):
d03cfdce 2341 for old_filename in set(files_to_delete):
f3ff1a36 2342 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
2343 try:
2344 os.remove(encodeFilename(old_filename))
2345 except (IOError, OSError):
2346 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 2347
5db07df6 2348 def _make_archive_id(self, info_dict):
e9fef7ee
S
2349 video_id = info_dict.get('id')
2350 if not video_id:
2351 return
5db07df6
PH
2352 # Future-proof against any change in case
2353 # and backwards compatibility with prior versions
e9fef7ee 2354 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2355 if extractor is None:
1211bb6d
S
2356 url = str_or_none(info_dict.get('url'))
2357 if not url:
2358 return
e9fef7ee
S
2359 # Try to find matching extractor for the URL and take its ie_key
2360 for ie in self._ies:
1211bb6d 2361 if ie.suitable(url):
e9fef7ee
S
2362 extractor = ie.ie_key()
2363 break
2364 else:
2365 return
2366 return extractor.lower() + ' ' + video_id
5db07df6
PH
2367
2368 def in_download_archive(self, info_dict):
2369 fn = self.params.get('download_archive')
2370 if fn is None:
2371 return False
2372
2373 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2374 if not vid_id:
7012b23c 2375 return False # Incomplete video information
5db07df6 2376
a45e8619 2377 return vid_id in self.archive
c1c9a79c
PH
2378
2379 def record_download_archive(self, info_dict):
2380 fn = self.params.get('download_archive')
2381 if fn is None:
2382 return
5db07df6
PH
2383 vid_id = self._make_archive_id(info_dict)
2384 assert vid_id
c1c9a79c 2385 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2386 archive_file.write(vid_id + '\n')
a45e8619 2387 self.archive.add(vid_id)
dd82ffea 2388
8c51aa65 2389 @staticmethod
8abeeb94 2390 def format_resolution(format, default='unknown'):
fb04e403
PH
2391 if format.get('vcodec') == 'none':
2392 return 'audio only'
f49d89ee
PH
2393 if format.get('resolution') is not None:
2394 return format['resolution']
8c51aa65
JMF
2395 if format.get('height') is not None:
2396 if format.get('width') is not None:
6febd1c1 2397 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 2398 else:
6febd1c1 2399 res = '%sp' % format['height']
f49d89ee 2400 elif format.get('width') is not None:
388ae76b 2401 res = '%dx?' % format['width']
8c51aa65 2402 else:
8abeeb94 2403 res = default
8c51aa65
JMF
2404 return res
2405
c57f7757
PH
2406 def _format_note(self, fdict):
2407 res = ''
2408 if fdict.get('ext') in ['f4f', 'f4m']:
2409 res += '(unsupported) '
32f90364
PH
2410 if fdict.get('language'):
2411 if res:
2412 res += ' '
9016d76f 2413 res += '[%s] ' % fdict['language']
c57f7757
PH
2414 if fdict.get('format_note') is not None:
2415 res += fdict['format_note'] + ' '
2416 if fdict.get('tbr') is not None:
2417 res += '%4dk ' % fdict['tbr']
2418 if fdict.get('container') is not None:
2419 if res:
2420 res += ', '
2421 res += '%s container' % fdict['container']
3089bc74
S
2422 if (fdict.get('vcodec') is not None
2423 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2424 if res:
2425 res += ', '
2426 res += fdict['vcodec']
91c7271a 2427 if fdict.get('vbr') is not None:
c57f7757
PH
2428 res += '@'
2429 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2430 res += 'video@'
2431 if fdict.get('vbr') is not None:
2432 res += '%4dk' % fdict['vbr']
fbb21cf5 2433 if fdict.get('fps') is not None:
5d583bdf
S
2434 if res:
2435 res += ', '
2436 res += '%sfps' % fdict['fps']
c57f7757
PH
2437 if fdict.get('acodec') is not None:
2438 if res:
2439 res += ', '
2440 if fdict['acodec'] == 'none':
2441 res += 'video only'
2442 else:
2443 res += '%-5s' % fdict['acodec']
2444 elif fdict.get('abr') is not None:
2445 if res:
2446 res += ', '
2447 res += 'audio'
2448 if fdict.get('abr') is not None:
2449 res += '@%3dk' % fdict['abr']
2450 if fdict.get('asr') is not None:
2451 res += ' (%5dHz)' % fdict['asr']
2452 if fdict.get('filesize') is not None:
2453 if res:
2454 res += ', '
2455 res += format_bytes(fdict['filesize'])
9732d77e
PH
2456 elif fdict.get('filesize_approx') is not None:
2457 if res:
2458 res += ', '
2459 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2460 return res
91c7271a 2461
76d321f6 2462 def _format_note_table(self, f):
2463 def join_fields(*vargs):
2464 return ', '.join((val for val in vargs if val != ''))
2465
2466 return join_fields(
2467 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2468 format_field(f, 'language', '[%s]'),
2469 format_field(f, 'format_note'),
2470 format_field(f, 'container', ignore=(None, f.get('ext'))),
2471 format_field(f, 'asr', '%5dHz'))
2472
c57f7757 2473 def list_formats(self, info_dict):
94badb25 2474 formats = info_dict.get('formats', [info_dict])
76d321f6 2475 new_format = self.params.get('listformats_table', False)
2476 if new_format:
2477 table = [
2478 [
2479 format_field(f, 'format_id'),
2480 format_field(f, 'ext'),
2481 self.format_resolution(f),
2482 format_field(f, 'fps', '%d'),
2483 '|',
2484 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2485 format_field(f, 'tbr', '%4dk'),
2486 f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2487 '|',
2488 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2489 format_field(f, 'vbr', '%4dk'),
2490 format_field(f, 'acodec', default='unknown').replace('none', ''),
2491 format_field(f, 'abr', '%3dk'),
2492 format_field(f, 'asr', '%5dHz'),
2493 self._format_note_table(f)]
2494 for f in formats
2495 if f.get('preference') is None or f['preference'] >= -1000]
2496 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2497 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2498 else:
2499 table = [
2500 [
2501 format_field(f, 'format_id'),
2502 format_field(f, 'ext'),
2503 self.format_resolution(f),
2504 self._format_note(f)]
2505 for f in formats
2506 if f.get('preference') is None or f['preference'] >= -1000]
2507 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 2508
76d321f6 2509 # if len(formats) > 1:
2510 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
cfb56d1a 2511 self.to_screen(
76d321f6 2512 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2513 header_line,
2514 table,
2515 delim=new_format,
2516 extraGap=(0 if new_format else 1),
2517 hideEmpty=new_format)))
cfb56d1a
PH
2518
2519 def list_thumbnails(self, info_dict):
2520 thumbnails = info_dict.get('thumbnails')
2521 if not thumbnails:
b7b72db9 2522 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2523 return
cfb56d1a
PH
2524
2525 self.to_screen(
2526 '[info] Thumbnails for %s:' % info_dict['id'])
2527 self.to_screen(render_table(
2528 ['ID', 'width', 'height', 'URL'],
2529 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2530
360e1ca5 2531 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2532 if not subtitles:
360e1ca5 2533 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2534 return
a504ced0 2535 self.to_screen(
edab9dbf
JMF
2536 'Available %s for %s:' % (name, video_id))
2537 self.to_screen(render_table(
2538 ['Language', 'formats'],
2539 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2540 for lang, formats in subtitles.items()]))
a504ced0 2541
dca08720
PH
2542 def urlopen(self, req):
2543 """ Start an HTTP download """
82d8a8b6 2544 if isinstance(req, compat_basestring):
67dda517 2545 req = sanitized_Request(req)
19a41fc6 2546 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2547
2548 def print_debug_header(self):
2549 if not self.params.get('verbose'):
2550 return
62fec3b2 2551
4192b51c 2552 if type('') is not compat_str:
067aa17e 2553 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2554 self.report_warning(
2555 'Your Python is broken! Update to a newer and supported version')
2556
c6afed48
PH
2557 stdout_encoding = getattr(
2558 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2559 encoding_str = (
734f90bb
PH
2560 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2561 locale.getpreferredencoding(),
2562 sys.getfilesystemencoding(),
c6afed48 2563 stdout_encoding,
b0472057 2564 self.get_encoding()))
4192b51c 2565 write_string(encoding_str, encoding=None)
734f90bb 2566
d9d045e2 2567 self._write_string('[debug] yt-dlp version ' + __version__ + '\n')
e0986e31
JMF
2568 if _LAZY_LOADER:
2569 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
dca08720
PH
2570 try:
2571 sp = subprocess.Popen(
2572 ['git', 'rev-parse', '--short', 'HEAD'],
2573 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2574 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 2575 out, err = process_communicate_or_kill(sp)
dca08720
PH
2576 out = out.decode().strip()
2577 if re.match('[0-9a-f]+', out):
734f90bb 2578 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 2579 except Exception:
dca08720
PH
2580 try:
2581 sys.exc_clear()
70a1165b 2582 except Exception:
dca08720 2583 pass
b300cda4
S
2584
2585 def python_implementation():
2586 impl_name = platform.python_implementation()
2587 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2588 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2589 return impl_name
2590
2591 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2592 platform.python_version(), python_implementation(),
2593 platform_name()))
d28b5171 2594
73fac4e9 2595 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2596 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 2597 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
2598 exe_str = ', '.join(
2599 '%s %s' % (exe, v)
2600 for exe, v in sorted(exe_versions.items())
2601 if v
2602 )
2603 if not exe_str:
2604 exe_str = 'none'
2605 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2606
2607 proxy_map = {}
2608 for handler in self._opener.handlers:
2609 if hasattr(handler, 'proxies'):
2610 proxy_map.update(handler.proxies)
734f90bb 2611 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2612
58b1f00d
PH
2613 if self.params.get('call_home', False):
2614 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2615 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 2616 return
58b1f00d
PH
2617 latest_version = self.urlopen(
2618 'https://yt-dl.org/latest/version').read().decode('utf-8')
2619 if version_tuple(latest_version) > version_tuple(__version__):
2620 self.report_warning(
2621 'You are using an outdated version (newest version: %s)! '
2622 'See https://yt-dl.org/update if you need help updating.' %
2623 latest_version)
2624
e344693b 2625 def _setup_opener(self):
6ad14cab 2626 timeout_val = self.params.get('socket_timeout')
19a41fc6 2627 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2628
dca08720
PH
2629 opts_cookiefile = self.params.get('cookiefile')
2630 opts_proxy = self.params.get('proxy')
2631
2632 if opts_cookiefile is None:
2633 self.cookiejar = compat_cookiejar.CookieJar()
2634 else:
590bc6f6 2635 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 2636 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 2637 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 2638 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 2639
6a3f4c3f 2640 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2641 if opts_proxy is not None:
2642 if opts_proxy == '':
2643 proxies = {}
2644 else:
2645 proxies = {'http': opts_proxy, 'https': opts_proxy}
2646 else:
2647 proxies = compat_urllib_request.getproxies()
067aa17e 2648 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
2649 if 'http' in proxies and 'https' not in proxies:
2650 proxies['https'] = proxies['http']
91410c9b 2651 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2652
2653 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2654 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2655 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 2656 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 2657 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2658
2659 # When passing our own FileHandler instance, build_opener won't add the
2660 # default FileHandler and allows us to disable the file protocol, which
2661 # can be used for malicious purposes (see
067aa17e 2662 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
2663 file_handler = compat_urllib_request.FileHandler()
2664
2665 def file_open(*args, **kwargs):
cefecac1 2666 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
6240b0a2
JMF
2667 file_handler.file_open = file_open
2668
2669 opener = compat_urllib_request.build_opener(
fca6dba8 2670 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 2671
dca08720
PH
2672 # Delete the default user-agent header, which would otherwise apply in
2673 # cases where our custom HTTP handler doesn't come into play
067aa17e 2674 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
2675 opener.addheaders = []
2676 self._opener = opener
62fec3b2
PH
2677
2678 def encode(self, s):
2679 if isinstance(s, bytes):
2680 return s # Already encoded
2681
2682 try:
2683 return s.encode(self.get_encoding())
2684 except UnicodeEncodeError as err:
2685 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2686 raise
2687
2688 def get_encoding(self):
2689 encoding = self.params.get('encoding')
2690 if encoding is None:
2691 encoding = preferredencoding()
2692 return encoding
ec82d85a
PH
2693
2694 def _write_thumbnails(self, info_dict, filename):
2695 if self.params.get('writethumbnail', False):
2696 thumbnails = info_dict.get('thumbnails')
2697 if thumbnails:
2698 thumbnails = [thumbnails[-1]]
2699 elif self.params.get('write_all_thumbnails', False):
2700 thumbnails = info_dict.get('thumbnails')
2701 else:
2702 return
2703
2704 if not thumbnails:
2705 # No thumbnails present, so return immediately
2706 return
2707
2708 for t in thumbnails:
2709 thumb_ext = determine_ext(t['url'], 'jpg')
2710 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2711 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
29f7c58a 2712 t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
ec82d85a 2713
0c3d0f51 2714 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
ec82d85a
PH
2715 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2716 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2717 else:
2718 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2719 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2720 try:
2721 uf = self.urlopen(t['url'])
d3d89c32 2722 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2723 shutil.copyfileobj(uf, thumbf)
2724 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2725 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2726 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2727 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2728 (t['url'], error_to_compat_str(err)))