]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[cleanup] Misc
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
26e63931 1import collections
31bd3925 2import contextlib
9d2ecdbc 3import datetime
c1c9a79c 4import errno
31bd3925 5import fileinput
b5ae35ee 6import functools
8222d8de 7import io
b82f815f 8import itertools
8694c600 9import json
62fec3b2 10import locale
083c9df9 11import operator
8222d8de 12import os
f8271158 13import random
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de 17import sys
21cd8fae 18import tempfile
8222d8de 19import time
67134eab 20import tokenize
8222d8de 21import traceback
524e2e4f 22import unicodedata
f9934b96 23import urllib.request
961ea474
S
24from string import ascii_letters
25
f8271158 26from .cache import Cache
14f25df2 27from .compat import compat_os_name, compat_shlex_quote
982ee69a 28from .cookies import load_cookies
f8271158 29from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
30from .downloader.rtmp import rtmpdump_version
f8271158 31from .extractor import gen_extractor_classes, get_info_extractor
fe7866d0 32from .extractor.common import UnsupportedURLIE
f8271158 33from .extractor.openload import PhantomJSwrapper
34from .minicurses import format_text
8e40b9d1 35from .plugins import directories as plugin_directories
e756f45b 36from .postprocessor import _PLUGIN_CLASSES as plugin_pps
f8271158 37from .postprocessor import (
38 EmbedThumbnailPP,
39 FFmpegFixupDuplicateMoovPP,
40 FFmpegFixupDurationPP,
41 FFmpegFixupM3u8PP,
42 FFmpegFixupM4aPP,
43 FFmpegFixupStretchedPP,
44 FFmpegFixupTimestampPP,
45 FFmpegMergerPP,
46 FFmpegPostProcessor,
ca9def71 47 FFmpegVideoConvertorPP,
f8271158 48 MoveFilesAfterDownloadPP,
49 get_postprocessor,
50)
ca9def71 51from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
b5e7a2e6 52from .update import REPOSITORY, current_git_head, detect_variant
8c25f81b 53from .utils import (
f8271158 54 DEFAULT_OUTTMPL,
7b2c3f47 55 IDENTITY,
f8271158 56 LINK_TEMPLATES,
8dc59305 57 MEDIA_EXTENSIONS,
f8271158 58 NO_DEFAULT,
1d485a1a 59 NUMBER_RE,
f8271158 60 OUTTMPL_TYPES,
61 POSTPROCESS_WHEN,
62 STR_FORMAT_RE_TMPL,
63 STR_FORMAT_TYPES,
64 ContentTooShortError,
65 DateRange,
66 DownloadCancelled,
67 DownloadError,
68 EntryNotInPlaylist,
69 ExistingVideoReached,
70 ExtractorError,
784320c9 71 FormatSorter,
f8271158 72 GeoRestrictedError,
73 HEADRequest,
f8271158 74 ISO3166Utils,
75 LazyList,
76 MaxDownloadsReached,
19a03940 77 Namespace,
f8271158 78 PagedList,
79 PerRequestProxyHandler,
7e88d7d7 80 PlaylistEntries,
f8271158 81 Popen,
82 PostProcessingError,
83 ReExtractInfo,
84 RejectedVideoReached,
85 SameFileError,
86 UnavailableVideoError,
693f0600 87 UserNotLive,
f8271158 88 YoutubeDLCookieProcessor,
89 YoutubeDLHandler,
90 YoutubeDLRedirectHandler,
eedb7ba5
S
91 age_restricted,
92 args_to_str,
cb794ee0 93 bug_reports_message,
ce02ed60 94 date_from_str,
da4db748 95 deprecation_warning,
ce02ed60 96 determine_ext,
b5559424 97 determine_protocol,
c0384f22 98 encode_compat_str,
ce02ed60 99 encodeFilename,
a06916d9 100 error_to_compat_str,
47cdc68e 101 escapeHTML,
590bc6f6 102 expand_path,
90137ca4 103 filter_dict,
e29663c6 104 float_or_none,
02dbf93f 105 format_bytes,
e0fd9573 106 format_decimal_suffix,
f8271158 107 format_field,
525ef922 108 formatSeconds,
fc61aff4 109 get_compatible_ext,
0bb322b9 110 get_domain,
c9969434 111 int_or_none,
732044af 112 iri_to_uri,
941e881e 113 is_path_like,
34921b43 114 join_nonempty,
ce02ed60 115 locked_file,
0647d925 116 make_archive_id,
0202b52a 117 make_dir,
dca08720 118 make_HTTPS_handler,
8b7539d2 119 merge_headers,
3158150c 120 network_exceptions,
ec11a9f4 121 number_of_digits,
cd6fc19e 122 orderedSet,
5314b521 123 orderedSet_from_options,
083c9df9 124 parse_filesize,
ce02ed60 125 preferredencoding,
eedb7ba5 126 prepend_extension,
51fb4995 127 register_socks_protocols,
3efb96a6 128 remove_terminal_sequences,
cfb56d1a 129 render_table,
eedb7ba5 130 replace_extension,
ce02ed60 131 sanitize_filename,
1bb5c511 132 sanitize_path,
dcf77cf1 133 sanitize_url,
67dda517 134 sanitized_Request,
e5660ee6 135 std_headers,
1211bb6d 136 str_or_none,
e29663c6 137 strftime_or_none,
ce02ed60 138 subtitles_filename,
819e0531 139 supports_terminal_sequences,
b1f94422 140 system_identifier,
f2ebc5c7 141 timetuple_from_msec,
732044af 142 to_high_limit_path,
324ad820 143 traverse_obj,
fc61aff4 144 try_call,
6033d980 145 try_get,
29eb5174 146 url_basename,
7d1eb38a 147 variadic,
58b1f00d 148 version_tuple,
53973b4d 149 windows_enable_vt_mode,
ce02ed60
PH
150 write_json_file,
151 write_string,
4f026faf 152)
29cb20bd 153from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
8222d8de 154
e9c0cdd3
YCH
155if compat_os_name == 'nt':
156 import ctypes
157
2459b6e1 158
86e5f3ed 159class YoutubeDL:
8222d8de
JMF
160 """YoutubeDL class.
161
162 YoutubeDL objects are the ones responsible of downloading the
163 actual video file and writing it to disk if the user has requested
164 it, among some other tasks. In most cases there should be one per
165 program. As, given a video URL, the downloader doesn't know how to
166 extract all the needed information, task that InfoExtractors do, it
167 has to pass the URL to one of them.
168
169 For this, YoutubeDL objects have a method that allows
170 InfoExtractors to be registered in a given order. When it is passed
171 a URL, the YoutubeDL object handles it to the first InfoExtractor it
172 finds that reports being able to handle it. The InfoExtractor extracts
173 all the information about the video or videos the URL refers to, and
174 YoutubeDL process the extracted information, possibly using a File
175 Downloader to download the video.
176
177 YoutubeDL objects accept a lot of parameters. In order not to saturate
178 the object constructor with arguments, it receives a dictionary of
179 options instead. These options are available through the params
180 attribute for the InfoExtractors to use. The YoutubeDL also
181 registers itself as the downloader in charge for the InfoExtractors
182 that are added to it, so this is a "mutual registration".
183
184 Available options:
185
186 username: Username for authentication purposes.
187 password: Password for authentication purposes.
180940e0 188 videopassword: Password for accessing a video.
1da50aa3
S
189 ap_mso: Adobe Pass multiple-system operator identifier.
190 ap_username: Multiple-system operator account username.
191 ap_password: Multiple-system operator account password.
8222d8de
JMF
192 usenetrc: Use netrc for authentication instead.
193 verbose: Print additional info to stdout.
194 quiet: Do not print messages to stdout.
ad8915b7 195 no_warnings: Do not print out anything for warnings.
bb66c247 196 forceprint: A dict with keys WHEN mapped to a list of templates to
197 print to stdout. The allowed keys are video or any of the
198 items in utils.POSTPROCESS_WHEN.
ca30f449 199 For compatibility, a single list is also accepted
bb66c247 200 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
201 a list of tuples with (template, filename)
8694c600 202 forcejson: Force printing info_dict as JSON.
63e0be34
PH
203 dump_single_json: Force printing the info_dict of the whole playlist
204 (or video) as a single JSON line.
c25228e5 205 force_write_download_archive: Force writing download archive regardless
206 of 'skip_download' or 'simulate'.
b7b04c78 207 simulate: Do not download the video files. If unset (or None),
208 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 209 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 210 You can also pass a function. The function takes 'ctx' as
211 argument and returns the formats to download.
212 See "build_format_selector" for an implementation
63ad4d43 213 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 214 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
215 extracting metadata even if the video is not actually
216 available for download (experimental)
0930b11f 217 format_sort: A list of fields by which to sort the video formats.
218 See "Sorting Formats" for more details.
c25228e5 219 format_sort_force: Force the given format_sort. see "Sorting Formats"
220 for more details.
08d30158 221 prefer_free_formats: Whether to prefer video formats with free containers
222 over non-free ones of same quality.
c25228e5 223 allow_multiple_video_streams: Allow multiple video streams to be merged
224 into a single file
225 allow_multiple_audio_streams: Allow multiple audio streams to be merged
226 into a single file
0ba692ac 227 check_formats Whether to test if the formats are downloadable.
9f1a1c36 228 Can be True (check all), False (check none),
229 'selected' (check selected formats),
0ba692ac 230 or None (check only if requested by extractor)
4524baf0 231 paths: Dictionary of output paths. The allowed keys are 'home'
232 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 233 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 234 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 235 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
236 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
237 restrictfilenames: Do not allow "&" and spaces in file names
238 trim_file_name: Limit length of filename (extension excluded)
4524baf0 239 windowsfilenames: Force the filenames to be windows compatible
b1940459 240 ignoreerrors: Do not stop on download/postprocessing errors.
241 Can be 'only_download' to ignore only download errors.
242 Default is 'only_download' for CLI, but False for API
26e2805c 243 skip_playlist_after_errors: Number of allowed failures until the rest of
244 the playlist is skipped
fe7866d0 245 allowed_extractors: List of regexes to match against extractor names that are allowed
0c3d0f51 246 overwrites: Overwrite all video and metadata files if True,
247 overwrite only non-video files if None
248 and don't overwrite any file if False
34488702 249 For compatibility with youtube-dl,
250 "nooverwrites" may also be used instead
c14e88f0 251 playlist_items: Specific indices of playlist to download.
75822ca7 252 playlistrandom: Download playlist items in random order.
7e9a6125 253 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
254 matchtitle: Download only matching titles.
255 rejecttitle: Reject downloads for matching titles.
8bf9319e 256 logger: Log messages to a logging.Logger instance.
17ffed18 257 logtostderr: Print everything to stderr instead of stdout.
258 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
259 writedescription: Write the video description to a .description file
260 writeinfojson: Write the video description to a .info.json file
75d43ca0 261 clean_infojson: Remove private fields from the infojson
34488702 262 getcomments: Extract video comments. This will not be written to disk
06167fbb 263 unless writeinfojson is also given
1fb07d10 264 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 265 writethumbnail: Write the thumbnail image to a file
c25228e5 266 allow_playlist_files: Whether to write playlists' description, infojson etc
267 also to disk when using the 'write*' options
ec82d85a 268 write_all_thumbnails: Write all thumbnail formats to files
732044af 269 writelink: Write an internet shortcut file, depending on the
270 current platform (.url/.webloc/.desktop)
271 writeurllink: Write a Windows internet shortcut file (.url)
272 writewebloclink: Write a macOS internet shortcut file (.webloc)
273 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 274 writesubtitles: Write the video subtitles to a file
741dd8ea 275 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 276 listsubtitles: Lists all available subtitles for the video
a504ced0 277 subtitlesformat: The format code for subtitles
c32b0aab 278 subtitleslangs: List of languages of the subtitles to download (can be regex).
279 The list may contain "all" to refer to all the available
280 subtitles. The language can be prefixed with a "-" to
62b58c09 281 exclude it from the requested languages, e.g. ['all', '-live_chat']
8222d8de
JMF
282 keepvideo: Keep the video file after post-processing
283 daterange: A DateRange object, download only if the upload_date is in the range.
284 skip_download: Skip the actual download of the video file
c35f9e72 285 cachedir: Location of the cache files in the filesystem.
a0e07d31 286 False to disable filesystem cache.
47192f92 287 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
288 age_limit: An integer representing the user's age in years.
289 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
290 min_views: An integer representing the minimum view count the video
291 must have in order to not be skipped.
292 Videos without view count information are always
293 downloaded. None for no limit.
294 max_views: An integer representing the maximum view count.
295 Videos that are more popular than that are not
296 downloaded.
297 Videos without view count information are always
298 downloaded. None for no limit.
ae103564 299 download_archive: A set, or the name of a file where all downloads are recorded.
300 Videos already present in the file are not downloaded again.
8a51f564 301 break_on_existing: Stop the download process after attempting to download a
302 file that is in the archive.
b222c271 303 break_per_url: Whether break_on_reject and break_on_existing
304 should act on each input URL as opposed to for the entire queue
d76fa1f3 305 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8 306 cookiesfrombrowser: A tuple containing the name of the browser, the profile
9bd13fe5 307 name/path from where cookies are loaded, the name of the keyring,
308 and the container name, e.g. ('chrome', ) or
309 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
f81c62a6 310 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
311 support RFC 5746 secure renegotiation
f59f5ef8 312 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 313 client_certificate: Path to client certificate file in PEM format. May include the private key
314 client_certificate_key: Path to private key file for client certificate
315 client_certificate_password: Password for client certificate private key, if encrypted.
316 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0 317 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
c6e07cf1 318 (Only supported by some extractors)
8300774c 319 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
8b7539d2 320 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 321 proxy: URL of the proxy server to use
38cce791 322 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 323 on geo-restricted sites.
e344693b 324 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
325 bidi_workaround: Work around buggy terminals without bidirectional text
326 support, using fridibi
a0ddb8a2 327 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
328 default_search: Prepend this string if an input url is not valid.
329 'auto' for elaborate guessing
62fec3b2 330 encoding: Use this encoding instead of the system-specified.
134c913c 331 extract_flat: Whether to resolve and process url_results further
332 * False: Always process (default)
333 * True: Never process
334 * 'in_playlist': Do not process inside playlist/multi_video
335 * 'discard': Always process, but don't return the result
336 from inside playlist/multi_video
337 * 'discard_in_playlist': Same as "discard", but only for
338 playlists (not multi_video)
f2ebc5c7 339 wait_for_video: If given, wait for scheduled streams to become available.
340 The value should be a tuple containing the range
341 (min_secs, max_secs) to wait between retries
4f026faf 342 postprocessors: A list of dictionaries, each with an entry
71b640cc 343 * key: The name of the postprocessor. See
7a5c1cfe 344 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 345 * when: When to run the postprocessor. Allowed values are
346 the entries of utils.POSTPROCESS_WHEN
56d868db 347 Assumed to be 'post_process' if not given
71b640cc
PH
348 progress_hooks: A list of functions that get called on download
349 progress, with a dictionary with the entries
5cda4eda 350 * status: One of "downloading", "error", or "finished".
ee69b99a 351 Check this first and ignore unknown values.
3ba7740d 352 * info_dict: The extracted info_dict
71b640cc 353
5cda4eda 354 If status is one of "downloading", or "finished", the
ee69b99a
PH
355 following properties may also be present:
356 * filename: The final filename (always present)
5cda4eda 357 * tmpfilename: The filename we're currently writing to
71b640cc
PH
358 * downloaded_bytes: Bytes on disk
359 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
360 * total_bytes_estimate: Guess of the eventual file size,
361 None if unavailable.
362 * elapsed: The number of seconds since download started.
71b640cc
PH
363 * eta: The estimated time in seconds, None if unknown
364 * speed: The download speed in bytes/second, None if
365 unknown
5cda4eda
PH
366 * fragment_index: The counter of the currently
367 downloaded video fragment.
368 * fragment_count: The number of fragments (= individual
369 files that will be merged)
71b640cc
PH
370
371 Progress hooks are guaranteed to be called at least once
372 (with status "finished") if the download is successful.
819e0531 373 postprocessor_hooks: A list of functions that get called on postprocessing
374 progress, with a dictionary with the entries
375 * status: One of "started", "processing", or "finished".
376 Check this first and ignore unknown values.
377 * postprocessor: Name of the postprocessor
378 * info_dict: The extracted info_dict
379
380 Progress hooks are guaranteed to be called at least twice
381 (with status "started" and "finished") if the processing is successful.
fc61aff4 382 merge_output_format: "/" separated list of extensions to use when merging formats.
6b591b29 383 final_ext: Expected final extension; used to detect when the file was
59a7a13e 384 already downloaded and converted
6271f1ca
PH
385 fixup: Automatically correct known faults of the file.
386 One of:
387 - "never": do nothing
388 - "warn": only emit a warning
389 - "detect_or_warn": check whether we can do anything
62cd676c 390 about it, warn otherwise (default)
504f20dd 391 source_address: Client-side IP address to bind to.
1cf376f5 392 sleep_interval_requests: Number of seconds to sleep between requests
393 during extraction
7aa589a5
S
394 sleep_interval: Number of seconds to sleep before each download when
395 used alone or a lower bound of a range for randomized
396 sleep before each download (minimum possible number
397 of seconds to sleep) when used along with
398 max_sleep_interval.
399 max_sleep_interval:Upper bound of a range for randomized sleep before each
400 download (maximum possible number of seconds to sleep).
401 Must only be used along with sleep_interval.
402 Actual sleep time will be a random float from range
403 [sleep_interval; max_sleep_interval].
1cf376f5 404 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
405 listformats: Print an overview of available video formats and exit.
406 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 407 match_filter: A function that gets called for every video with the signature
408 (info_dict, *, incomplete: bool) -> Optional[str]
409 For backward compatibility with youtube-dl, the signature
410 (info_dict) -> Optional[str] is also allowed.
411 - If it returns a message, the video is ignored.
412 - If it returns None, the video is downloaded.
413 - If it returns utils.NO_DEFAULT, the user is interactively
414 asked whether to download the video.
fe2ce85a 415 - Raise utils.DownloadCancelled(msg) to abort remaining
416 downloads when a video is rejected.
347de493 417 match_filter_func in utils.py is one example for this.
7e5db8c9 418 no_color: Do not emit color codes in output.
0a840f58 419 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 420 HTTP header
0a840f58 421 geo_bypass_country:
773f291d
S
422 Two-letter ISO 3166-2 country code that will be used for
423 explicit geographic restriction bypassing via faking
504f20dd 424 X-Forwarded-For HTTP header
5f95927a
S
425 geo_bypass_ip_block:
426 IP range in CIDR notation that will be used similarly to
504f20dd 427 geo_bypass_country
52a8a1e1 428 external_downloader: A dictionary of protocol keys and the executable of the
429 external downloader to use for it. The allowed protocols
430 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
431 Set the value to 'native' to use the native downloader
53ed7066 432 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 433 The following options do not work when used through the API:
b5ae35ee 434 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 435 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 436 Refer __init__.py for their implementation
819e0531 437 progress_template: Dictionary of templates for progress outputs.
438 Allowed keys are 'download', 'postprocess',
439 'download-title' (console title) and 'postprocess-title'.
440 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 441 retry_sleep_functions: Dictionary of functions that takes the number of attempts
442 as argument and returns the time to sleep in seconds.
443 Allowed keys are 'http', 'fragment', 'file_access'
0f446365
SW
444 download_ranges: A callback function that gets called for every video with
445 the signature (info_dict, ydl) -> Iterable[Section].
446 Only the returned sections will be downloaded.
447 Each Section is a dict with the following keys:
5ec1b6b7 448 * start_time: Start time of the section in seconds
449 * end_time: End time of the section in seconds
450 * title: Section title (Optional)
451 * index: Section number (Optional)
0f446365 452 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
a7dc6a89 453 noprogress: Do not print the progress bar
a831c2ea 454 live_from_start: Whether to download livestreams videos from the start
fe7e0c98 455
8222d8de 456 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 457 the downloader (see yt_dlp/downloader/common.py):
51d9739f 458 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654 459 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
a7dc6a89 460 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 461 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
462
463 The following options are used by the post processors:
c0b7d117
S
464 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
465 to the binary or its containing directory.
43820c03 466 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 467 and a list of additional command-line arguments for the
468 postprocessor/executable. The dict can also have "PP+EXE" keys
469 which are used when the given exe is used by the given PP.
470 Use 'default' as the name for arguments to passed to all PP
471 For compatibility with youtube-dl, a single list of args
472 can also be used
e409895f 473
474 The following options are used by the extractors:
62bff2c1 475 extractor_retries: Number of times to retry for known errors
476 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 477 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 478 discontinuities such as ad breaks (default: False)
5d3a0e79 479 extractor_args: A dictionary of arguments to be passed to the extractors.
480 See "EXTRACTOR ARGUMENTS" for details.
62b58c09 481 E.g. {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 482 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 483
484 The following options are deprecated and may be removed in the future:
485
fe2ce85a 486 break_on_reject: Stop the download process when encountering a video that
487 has been filtered out.
488 - `raise DownloadCancelled(msg)` in match_filter instead
fe7866d0 489 force_generic_extractor: Force downloader to use the generic extractor
490 - Use allowed_extractors = ['generic', 'default']
7e9a6125 491 playliststart: - Use playlist_items
492 Playlist item to start at.
493 playlistend: - Use playlist_items
494 Playlist item to end at.
495 playlistreverse: - Use playlist_items
496 Download playlist items in reverse order.
1890fc63 497 forceurl: - Use forceprint
498 Force printing final URL.
499 forcetitle: - Use forceprint
500 Force printing title.
501 forceid: - Use forceprint
502 Force printing ID.
503 forcethumbnail: - Use forceprint
504 Force printing thumbnail URL.
505 forcedescription: - Use forceprint
506 Force printing description.
507 forcefilename: - Use forceprint
508 Force printing final filename.
509 forceduration: - Use forceprint
510 Force printing duration.
511 allsubtitles: - Use subtitleslangs = ['all']
512 Downloads all the subtitles of the video
513 (requires writesubtitles or writeautomaticsub)
514 include_ads: - Doesn't work
515 Download ads as well
516 call_home: - Not implemented
517 Boolean, true iff we are allowed to contact the
518 yt-dlp servers for debugging.
519 post_hooks: - Register a custom postprocessor
520 A list of functions that get called as the final step
521 for each video file, after all postprocessors have been
522 called. The filename will be passed as the only argument.
523 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
524 Use the native HLS downloader instead of ffmpeg/avconv
525 if True, otherwise use ffmpeg/avconv if False, otherwise
526 use downloader suggested by extractor if None.
527 prefer_ffmpeg: - avconv support is deprecated
528 If False, use avconv instead of ffmpeg if both are available,
529 otherwise prefer ffmpeg.
530 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 531 If True (default), DASH manifests and related
62bff2c1 532 data will be downloaded and processed by extractor.
533 You can reduce network I/O by disabling it if you don't
534 care about DASH. (only for youtube)
1890fc63 535 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 536 If True (default), HLS manifests and related
62bff2c1 537 data will be downloaded and processed by extractor.
538 You can reduce network I/O by disabling it if you don't
539 care about HLS. (only for youtube)
8222d8de
JMF
540 """
541
86e5f3ed 542 _NUMERIC_FIELDS = {
b8ed0f15 543 'width', 'height', 'asr', 'audio_channels', 'fps',
544 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
e6f21b3d 545 'timestamp', 'release_timestamp',
c9969434
S
546 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
547 'average_rating', 'comment_count', 'age_limit',
548 'start_time', 'end_time',
549 'chapter_number', 'season_number', 'episode_number',
550 'track_number', 'disc_number', 'release_year',
86e5f3ed 551 }
c9969434 552
6db9c4d5 553 _format_fields = {
554 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 555 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
105bfd90 556 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
d5d1df8a 557 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
6db9c4d5 558 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
559 'preference', 'language', 'language_preference', 'quality', 'source_preference',
7e68567e 560 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
6db9c4d5 561 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
562 }
48ee10ee 563 _format_selection_exts = {
8dc59305 564 'audio': set(MEDIA_EXTENSIONS.common_audio),
565 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
566 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
48ee10ee 567 }
568
3511266b 569 def __init__(self, params=None, auto_init=True):
883d4b1e 570 """Create a FileDownloader object with the given options.
571 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 572 Set to 'no_verbose_header' to not print the header
883d4b1e 573 """
e9f9a10f
JMF
574 if params is None:
575 params = {}
592b7485 576 self.params = params
8b7491c8 577 self._ies = {}
56c73665 578 self._ies_instances = {}
1e43a6f7 579 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 580 self._printed_messages = set()
1cf376f5 581 self._first_webpage_request = True
ab8e5e51 582 self._post_hooks = []
933605d7 583 self._progress_hooks = []
819e0531 584 self._postprocessor_hooks = []
8222d8de
JMF
585 self._download_retcode = 0
586 self._num_downloads = 0
9c906919 587 self._num_videos = 0
592b7485 588 self._playlist_level = 0
589 self._playlist_urls = set()
a0e07d31 590 self.cache = Cache(self)
34308b30 591
591bb9d3 592 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
593 self._out_files = Namespace(
594 out=stdout,
595 error=sys.stderr,
596 screen=sys.stderr if self.params.get('quiet') else stdout,
597 console=None if compat_os_name == 'nt' else next(
cf4f42cb 598 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 599 )
f0795149 600
601 try:
602 windows_enable_vt_mode()
603 except Exception as e:
604 self.write_debug(f'Failed to enable VT mode: {e}')
605
591bb9d3 606 self._allow_colors = Namespace(**{
607 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
64fa820c 608 for type_, stream in self._out_files.items_ if type_ != 'console'
591bb9d3 609 })
819e0531 610
6929b41a 611 # The code is left like this to be reused for future deprecations
612 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
eff42759 613 current_version = sys.version_info[:2]
614 if current_version < MIN_RECOMMENDED:
9d339c41 615 msg = ('Support for Python version %d.%d has been deprecated. '
24093d52 616 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
c6e07cf1 617 '\n You will no longer receive updates on this version')
eff42759 618 if current_version < MIN_SUPPORTED:
619 msg = 'Python version %d.%d is no longer supported'
5b28cef7 620 self.deprecated_feature(
eff42759 621 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
a61f4b28 622
88acdbc2 623 if self.params.get('allow_unplayable_formats'):
624 self.report_warning(
ec11a9f4 625 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 626 'This is a developer option intended for debugging. \n'
627 ' If you experience any issues while using this option, '
ec11a9f4 628 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 629
497074f0 630 if self.params.get('bidi_workaround', False):
631 try:
632 import pty
633 master, slave = pty.openpty()
634 width = shutil.get_terminal_size().columns
635 width_args = [] if width is None else ['-w', str(width)]
636 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
637 try:
638 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
639 except OSError:
640 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
641 self._output_channel = os.fdopen(master, 'rb')
642 except OSError as ose:
643 if ose.errno == errno.ENOENT:
644 self.report_warning(
645 'Could not find fribidi executable, ignoring --bidi-workaround. '
646 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
647 else:
648 raise
649
650 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
651 if auto_init and auto_init != 'no_verbose_header':
652 self.print_debug_header()
653
be5df5ee
S
654 def check_deprecated(param, option, suggestion):
655 if self.params.get(param) is not None:
86e5f3ed 656 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
657 return True
658 return False
659
660 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
661 if self.params.get('geo_verification_proxy') is None:
662 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
663
0d1bb027 664 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
665 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 666 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 667
49a57e70 668 for msg in self.params.get('_warnings', []):
0d1bb027 669 self.report_warning(msg)
ee8dd27a 670 for msg in self.params.get('_deprecation_warnings', []):
da4db748 671 self.deprecated_feature(msg)
0d1bb027 672
8a82af35 673 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 674 self.params['listformats_table'] = False
675
b5ae35ee 676 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 677 # nooverwrites was unnecessarily changed to overwrites
678 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
679 # This ensures compatibility with both keys
680 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 681 elif self.params.get('overwrites') is None:
682 self.params.pop('overwrites', None)
b868936c 683 else:
684 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 685
e4221b70 686 if self.params.get('simulate') is None and any((
687 self.params.get('list_thumbnails'),
688 self.params.get('listformats'),
689 self.params.get('listsubtitles'),
690 )):
691 self.params['simulate'] = 'list_only'
692
455a15e2 693 self.params.setdefault('forceprint', {})
694 self.params.setdefault('print_to_file', {})
bb66c247 695
696 # Compatibility with older syntax
ca30f449 697 if not isinstance(params['forceprint'], dict):
455a15e2 698 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 699
97ec5bc5 700 if auto_init:
97ec5bc5 701 self.add_default_info_extractors()
702
3089bc74
S
703 if (sys.platform != 'win32'
704 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 705 and not self.params.get('restrictfilenames', False)):
e9137224 706 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 707 self.report_warning(
6febd1c1 708 'Assuming --restrict-filenames since file system encoding '
1b725173 709 'cannot encode all characters. '
6febd1c1 710 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 711 self.params['restrictfilenames'] = True
34308b30 712
bf1824b3 713 self._parse_outtmpl()
486dd09e 714
187986a8 715 # Creating format selector here allows us to catch syntax errors before the extraction
716 self.format_selector = (
fa9f30b8 717 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 718 else self.params['format'] if callable(self.params['format'])
187986a8 719 else self.build_format_selector(self.params['format']))
720
8b7539d2 721 # Set http_headers defaults according to std_headers
722 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
723
013b50b7 724 hooks = {
725 'post_hooks': self.add_post_hook,
726 'progress_hooks': self.add_progress_hook,
727 'postprocessor_hooks': self.add_postprocessor_hook,
728 }
729 for opt, fn in hooks.items():
730 for ph in self.params.get(opt, []):
731 fn(ph)
71b640cc 732
5bfc8bee 733 for pp_def_raw in self.params.get('postprocessors', []):
734 pp_def = dict(pp_def_raw)
735 when = pp_def.pop('when', 'post_process')
736 self.add_post_processor(
f9934b96 737 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 738 when=when)
739
97ec5bc5 740 self._setup_opener()
51fb4995
YCH
741 register_socks_protocols()
742
ed39cac5 743 def preload_download_archive(fn):
744 """Preload the archive, if any is specified"""
ae103564 745 archive = set()
ed39cac5 746 if fn is None:
ae103564 747 return archive
941e881e 748 elif not is_path_like(fn):
ae103564 749 return fn
750
49a57e70 751 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 752 try:
753 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
754 for line in archive_file:
ae103564 755 archive.add(line.strip())
86e5f3ed 756 except OSError as ioe:
ed39cac5 757 if ioe.errno != errno.ENOENT:
758 raise
ae103564 759 return archive
ed39cac5 760
ae103564 761 self.archive = preload_download_archive(self.params.get('download_archive'))
ed39cac5 762
7d4111ed
PH
763 def warn_if_short_id(self, argv):
764 # short YouTube ID starting with dash?
765 idxs = [
766 i for i, a in enumerate(argv)
767 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
768 if idxs:
769 correct_argv = (
7a5c1cfe 770 ['yt-dlp']
3089bc74
S
771 + [a for i, a in enumerate(argv) if i not in idxs]
772 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
773 )
774 self.report_warning(
775 'Long argument string detected. '
49a57e70 776 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
777 args_to_str(correct_argv))
778
8222d8de
JMF
779 def add_info_extractor(self, ie):
780 """Add an InfoExtractor object to the end of the list."""
8b7491c8 781 ie_key = ie.ie_key()
782 self._ies[ie_key] = ie
e52d7f85 783 if not isinstance(ie, type):
8b7491c8 784 self._ies_instances[ie_key] = ie
e52d7f85 785 ie.set_downloader(self)
8222d8de 786
56c73665
JMF
787 def get_info_extractor(self, ie_key):
788 """
789 Get an instance of an IE with name ie_key, it will try to get one from
790 the _ies list, if there's no instance it will create a new one and add
791 it to the extractor list.
792 """
793 ie = self._ies_instances.get(ie_key)
794 if ie is None:
795 ie = get_info_extractor(ie_key)()
796 self.add_info_extractor(ie)
797 return ie
798
023fa8c4
JMF
799 def add_default_info_extractors(self):
800 """
801 Add the InfoExtractors returned by gen_extractors to the end of the list
802 """
fe7866d0 803 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
804 all_ies['end'] = UnsupportedURLIE()
805 try:
806 ie_names = orderedSet_from_options(
807 self.params.get('allowed_extractors', ['default']), {
808 'all': list(all_ies),
809 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
810 }, use_regex=True)
811 except re.error as e:
812 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
813 for name in ie_names:
814 self.add_info_extractor(all_ies[name])
815 self.write_debug(f'Loaded {len(ie_names)} extractors')
023fa8c4 816
56d868db 817 def add_post_processor(self, pp, when='post_process'):
8222d8de 818 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 819 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 820 self._pps[when].append(pp)
8222d8de
JMF
821 pp.set_downloader(self)
822
ab8e5e51
AM
823 def add_post_hook(self, ph):
824 """Add the post hook"""
825 self._post_hooks.append(ph)
826
933605d7 827 def add_progress_hook(self, ph):
819e0531 828 """Add the download progress hook"""
933605d7 829 self._progress_hooks.append(ph)
8ab470f1 830
819e0531 831 def add_postprocessor_hook(self, ph):
832 """Add the postprocessing progress hook"""
833 self._postprocessor_hooks.append(ph)
5bfc8bee 834 for pps in self._pps.values():
835 for pp in pps:
836 pp.add_progress_hook(ph)
819e0531 837
1c088fa8 838 def _bidi_workaround(self, message):
5d681e96 839 if not hasattr(self, '_output_channel'):
1c088fa8
PH
840 return message
841
5d681e96 842 assert hasattr(self, '_output_process')
14f25df2 843 assert isinstance(message, str)
6febd1c1 844 line_count = message.count('\n') + 1
0f06bcd7 845 self._output_process.stdin.write((message + '\n').encode())
5d681e96 846 self._output_process.stdin.flush()
0f06bcd7 847 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 848 for _ in range(line_count))
6febd1c1 849 return res[:-len('\n')]
1c088fa8 850
b35496d8 851 def _write_string(self, message, out=None, only_once=False):
852 if only_once:
853 if message in self._printed_messages:
854 return
855 self._printed_messages.add(message)
856 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 857
cf4f42cb 858 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 859 """Print message to stdout"""
cf4f42cb 860 if quiet is not None:
da4db748 861 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
862 'Use "YoutubeDL.to_screen" instead')
8a82af35 863 if skip_eol is not False:
da4db748 864 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
865 'Use "YoutubeDL.to_screen" instead')
0bf9dc1e 866 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 867
dfea94f8 868 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
cf4f42cb 869 """Print message to screen if not in quiet mode"""
8bf9319e 870 if self.params.get('logger'):
43afe285 871 self.params['logger'].debug(message)
cf4f42cb 872 return
873 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
874 return
875 self._write_string(
876 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
dfea94f8 877 self._out_files.screen, only_once=only_once)
8222d8de 878
b35496d8 879 def to_stderr(self, message, only_once=False):
0760b0a7 880 """Print message to stderr"""
14f25df2 881 assert isinstance(message, str)
8bf9319e 882 if self.params.get('logger'):
43afe285
IB
883 self.params['logger'].error(message)
884 else:
5792c950 885 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 886
887 def _send_console_code(self, code):
591bb9d3 888 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 889 return
591bb9d3 890 self._write_string(code, self._out_files.console)
8222d8de 891
1e5b9a95
PH
892 def to_console_title(self, message):
893 if not self.params.get('consoletitle', False):
894 return
3efb96a6 895 message = remove_terminal_sequences(message)
4bede0d8
C
896 if compat_os_name == 'nt':
897 if ctypes.windll.kernel32.GetConsoleWindow():
898 # c_wchar_p() might not be necessary if `message` is
899 # already of type unicode()
900 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 901 else:
902 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 903
bdde425c 904 def save_console_title(self):
cf4f42cb 905 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 906 return
592b7485 907 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
908
909 def restore_console_title(self):
cf4f42cb 910 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 911 return
592b7485 912 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
913
914 def __enter__(self):
915 self.save_console_title()
916 return self
917
918 def __exit__(self, *args):
919 self.restore_console_title()
f89197d7 920
dca08720 921 if self.params.get('cookiefile') is not None:
1bab3437 922 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 923
fa9f30b8 924 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
925 """Determine action to take when a download problem appears.
926
927 Depending on if the downloader has been configured to ignore
928 download errors or not, this method may throw an exception or
929 not when errors are found, after printing the message.
930
fa9f30b8 931 @param tb If given, is additional traceback information
932 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
933 """
934 if message is not None:
935 self.to_stderr(message)
936 if self.params.get('verbose'):
937 if tb is None:
938 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 939 tb = ''
8222d8de 940 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 941 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 942 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
943 else:
944 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 945 tb = ''.join(tb_data)
c19bc311 946 if tb:
947 self.to_stderr(tb)
fa9f30b8 948 if not is_error:
949 return
b1940459 950 if not self.params.get('ignoreerrors'):
8222d8de
JMF
951 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
952 exc_info = sys.exc_info()[1].exc_info
953 else:
954 exc_info = sys.exc_info()
955 raise DownloadError(message, exc_info)
956 self._download_retcode = 1
957
19a03940 958 Styles = Namespace(
959 HEADERS='yellow',
960 EMPHASIS='light blue',
492272fe 961 FILENAME='green',
19a03940 962 ID='green',
963 DELIM='blue',
964 ERROR='red',
965 WARNING='yellow',
966 SUPPRESS='light black',
967 )
ec11a9f4 968
7578d77d 969 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 970 text = str(text)
ec11a9f4 971 if test_encoding:
972 original_text = text
5c104538 973 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
974 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 975 text = text.encode(encoding, 'ignore').decode(encoding)
976 if fallback is not None and text != original_text:
977 text = fallback
7578d77d 978 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 979
591bb9d3 980 def _format_out(self, *args, **kwargs):
981 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
982
ec11a9f4 983 def _format_screen(self, *args, **kwargs):
591bb9d3 984 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 985
986 def _format_err(self, *args, **kwargs):
591bb9d3 987 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 988
c84aeac6 989 def report_warning(self, message, only_once=False):
8222d8de
JMF
990 '''
991 Print the message to stderr, it will be prefixed with 'WARNING:'
992 If stderr is a tty file the 'WARNING:' will be colored
993 '''
6d07ce01
JMF
994 if self.params.get('logger') is not None:
995 self.params['logger'].warning(message)
8222d8de 996 else:
ad8915b7
PH
997 if self.params.get('no_warnings'):
998 return
ec11a9f4 999 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 1000
da4db748 1001 def deprecation_warning(self, message, *, stacklevel=0):
1002 deprecation_warning(
1003 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1004
1005 def deprecated_feature(self, message):
ee8dd27a 1006 if self.params.get('logger') is not None:
da4db748 1007 self.params['logger'].warning(f'Deprecated Feature: {message}')
1008 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
ee8dd27a 1009
fa9f30b8 1010 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
1011 '''
1012 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1013 in red if stderr is a tty file.
1014 '''
fa9f30b8 1015 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 1016
b35496d8 1017 def write_debug(self, message, only_once=False):
0760b0a7 1018 '''Log debug message or Print message to stderr'''
1019 if not self.params.get('verbose', False):
1020 return
8a82af35 1021 message = f'[debug] {message}'
0760b0a7 1022 if self.params.get('logger'):
1023 self.params['logger'].debug(message)
1024 else:
b35496d8 1025 self.to_stderr(message, only_once)
0760b0a7 1026
8222d8de
JMF
1027 def report_file_already_downloaded(self, file_name):
1028 """Report file has already been fully downloaded."""
1029 try:
6febd1c1 1030 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 1031 except UnicodeEncodeError:
6febd1c1 1032 self.to_screen('[download] The file has already been downloaded')
8222d8de 1033
0c3d0f51 1034 def report_file_delete(self, file_name):
1035 """Report that existing file will be deleted."""
1036 try:
c25228e5 1037 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 1038 except UnicodeEncodeError:
c25228e5 1039 self.to_screen('Deleting existing file')
0c3d0f51 1040
319b6059 1041 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 1042 has_drm = info.get('_has_drm')
319b6059 1043 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1044 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1045 if forced or not ignored:
1151c407 1046 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1047 expected=has_drm or ignored or expected)
88acdbc2 1048 else:
1049 self.report_warning(msg)
1050
de6000d9 1051 def parse_outtmpl(self):
bf1824b3 1052 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1053 self._parse_outtmpl()
1054 return self.params['outtmpl']
1055
1056 def _parse_outtmpl(self):
7b2c3f47 1057 sanitize = IDENTITY
bf1824b3 1058 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1059 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1060
1061 outtmpl = self.params.setdefault('outtmpl', {})
1062 if not isinstance(outtmpl, dict):
1063 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1064 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1065
21cd8fae 1066 def get_output_path(self, dir_type='', filename=None):
1067 paths = self.params.get('paths', {})
d2c8aadf 1068 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
21cd8fae 1069 path = os.path.join(
1070 expand_path(paths.get('home', '').strip()),
1071 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1072 filename or '')
21cd8fae 1073 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1074
76a264ac 1075 @staticmethod
901130bb 1076 def _outtmpl_expandpath(outtmpl):
1077 # expand_path translates '%%' into '%' and '$$' into '$'
1078 # correspondingly that is not what we want since we need to keep
1079 # '%%' intact for template dict substitution step. Working around
1080 # with boundary-alike separator hack.
efa944f4 1081 sep = ''.join(random.choices(ascii_letters, k=32))
86e5f3ed 1082 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1083
1084 # outtmpl should be expand_path'ed before template dict substitution
1085 # because meta fields may contain env variables we don't want to
62b58c09 1086 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
901130bb 1087 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1088 return expand_path(outtmpl).replace(sep, '')
1089
1090 @staticmethod
1091 def escape_outtmpl(outtmpl):
1092 ''' Escape any remaining strings like %s, %abc% etc. '''
1093 return re.sub(
1094 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1095 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1096 outtmpl)
1097
1098 @classmethod
1099 def validate_outtmpl(cls, outtmpl):
76a264ac 1100 ''' @return None or Exception object '''
7d1eb38a 1101 outtmpl = re.sub(
47cdc68e 1102 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
7d1eb38a 1103 lambda mobj: f'{mobj.group(0)[:-1]}s',
1104 cls._outtmpl_expandpath(outtmpl))
76a264ac 1105 try:
7d1eb38a 1106 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1107 return None
1108 except ValueError as err:
1109 return err
1110
03b4de72 1111 @staticmethod
1112 def _copy_infodict(info_dict):
1113 info_dict = dict(info_dict)
09b49e1f 1114 info_dict.pop('__postprocessors', None)
415f8d51 1115 info_dict.pop('__pending_error', None)
03b4de72 1116 return info_dict
1117
e0fd9573 1118 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1119 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1120 @param sanitize Whether to sanitize the output as a filename.
1121 For backward compatibility, a function can also be passed
1122 """
1123
6e84b215 1124 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1125
03b4de72 1126 info_dict = self._copy_infodict(info_dict)
752cda38 1127 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1128 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1129 if info_dict.get('duration', None) is not None
1130 else None)
1d485a1a 1131 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1132 info_dict['video_autonumber'] = self._num_videos
752cda38 1133 if info_dict.get('resolution') is None:
1134 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1135
e6f21b3d 1136 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1137 # of %(field)s to %(field)0Nd for backward compatibility
1138 field_size_compat_map = {
0a5a191a 1139 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1140 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1141 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1142 }
752cda38 1143
385a27fa 1144 TMPL_DICT = {}
47cdc68e 1145 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
385a27fa 1146 MATH_FUNCTIONS = {
1147 '+': float.__add__,
1148 '-': float.__sub__,
1149 }
e625be0d 1150 # Field is of the form key1.key2...
07a1250e 1151 # where keys (except first) can be string, int, slice or "{field, ...}"
1152 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1153 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1154 'inner': FIELD_INNER_RE,
1155 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1156 }
1d485a1a 1157 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1158 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1d485a1a 1159 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
e625be0d 1160 (?P<negate>-)?
1d485a1a 1161 (?P<fields>{FIELD_RE})
1162 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1163 (?:>(?P<strf_format>.+?))?
34baa9fd 1164 (?P<remaining>
1165 (?P<alternate>(?<!\\),[^|&)]+)?
1166 (?:&(?P<replacement>.*?))?
1167 (?:\|(?P<default>.*?))?
1d485a1a 1168 )$''')
752cda38 1169
07a1250e 1170 def _traverse_infodict(fields):
1171 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1172 for f in ([x] if x.startswith('{') else x.split('.'))]
1173 for i in (0, -1):
1174 if fields and not fields[i]:
1175 fields.pop(i)
1176
1177 for i, f in enumerate(fields):
1178 if not f.startswith('{'):
1179 continue
1180 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1181 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1182
1183 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
76a264ac 1184
752cda38 1185 def get_value(mdict):
1186 # Object traversal
2b8a2973 1187 value = _traverse_infodict(mdict['fields'])
752cda38 1188 # Negative
1189 if mdict['negate']:
1190 value = float_or_none(value)
1191 if value is not None:
1192 value *= -1
1193 # Do maths
385a27fa 1194 offset_key = mdict['maths']
1195 if offset_key:
752cda38 1196 value = float_or_none(value)
1197 operator = None
385a27fa 1198 while offset_key:
1199 item = re.match(
1200 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1201 offset_key).group(0)
1202 offset_key = offset_key[len(item):]
1203 if operator is None:
752cda38 1204 operator = MATH_FUNCTIONS[item]
385a27fa 1205 continue
1206 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1207 offset = float_or_none(item)
1208 if offset is None:
2b8a2973 1209 offset = float_or_none(_traverse_infodict(item))
385a27fa 1210 try:
1211 value = operator(value, multiplier * offset)
1212 except (TypeError, ZeroDivisionError):
1213 return None
1214 operator = None
752cda38 1215 # Datetime formatting
1216 if mdict['strf_format']:
7c37ff97 1217 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1218
a6bcaf71 1219 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1220 if sanitize and value == '':
1221 value = None
752cda38 1222 return value
1223
b868936c 1224 na = self.params.get('outtmpl_na_placeholder', 'NA')
1225
e0fd9573 1226 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1227 return sanitize_filename(str(value), restricted=restricted, is_id=(
1228 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1229 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1230 else NO_DEFAULT))
e0fd9573 1231
1232 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1233 sanitize = bool(sanitize)
1234
6e84b215 1235 def _dumpjson_default(obj):
1236 if isinstance(obj, (set, LazyList)):
1237 return list(obj)
adbc4ec4 1238 return repr(obj)
6e84b215 1239
752cda38 1240 def create_key(outer_mobj):
1241 if not outer_mobj.group('has_key'):
b836dc94 1242 return outer_mobj.group(0)
752cda38 1243 key = outer_mobj.group('key')
752cda38 1244 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1245 initial_field = mobj.group('fields') if mobj else ''
e978789f 1246 value, replacement, default = None, None, na
7c37ff97 1247 while mobj:
e625be0d 1248 mobj = mobj.groupdict()
7c37ff97 1249 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1250 value = get_value(mobj)
e978789f 1251 replacement = mobj['replacement']
7c37ff97 1252 if value is None and mobj['alternate']:
34baa9fd 1253 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1254 else:
1255 break
752cda38 1256
b868936c 1257 fmt = outer_mobj.group('format')
752cda38 1258 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1259 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1260
e978789f 1261 value = default if value is None else value if replacement is None else replacement
752cda38 1262
4476d2c7 1263 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1264 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1265 if fmt[-1] == 'l': # list
4476d2c7 1266 delim = '\n' if '#' in flags else ', '
9e907ebd 1267 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1268 elif fmt[-1] == 'j': # json
deae7c17 1269 value, fmt = json.dumps(
1270 value, default=_dumpjson_default,
9b9dad11 1271 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
47cdc68e 1272 elif fmt[-1] == 'h': # html
deae7c17 1273 value, fmt = escapeHTML(str(value)), str_fmt
524e2e4f 1274 elif fmt[-1] == 'q': # quoted
4476d2c7 1275 value = map(str, variadic(value) if '#' in flags else [value])
1276 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1277 elif fmt[-1] == 'B': # bytes
0f06bcd7 1278 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1279 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1280 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1281 value, fmt = unicodedata.normalize(
1282 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1283 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1284 value), str_fmt
e0fd9573 1285 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1286 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1287 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1288 factor=1024 if '#' in flags else 1000)
37893bb0 1289 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1290 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1291 elif fmt[-1] == 'c':
524e2e4f 1292 if value:
1293 value = str(value)[0]
76a264ac 1294 else:
524e2e4f 1295 fmt = str_fmt
76a264ac 1296 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1297 value = float_or_none(value)
752cda38 1298 if value is None:
1299 value, fmt = default, 's'
901130bb 1300
752cda38 1301 if sanitize:
1302 if fmt[-1] == 'r':
1303 # If value is an object, sanitize might convert it to a string
1304 # So we convert it to repr first
7d1eb38a 1305 value, fmt = repr(value), str_fmt
639f1cea 1306 if fmt[-1] in 'csr':
e0fd9573 1307 value = sanitizer(initial_field, value)
901130bb 1308
b868936c 1309 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1310 TMPL_DICT[key] = value
b868936c 1311 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1312
385a27fa 1313 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1314
819e0531 1315 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1316 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1317 return self.escape_outtmpl(outtmpl) % info_dict
1318
5127e92a 1319 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1320 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1321 if outtmpl is None:
bf1824b3 1322 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1323 try:
5127e92a 1324 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1325 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1326 if not filename:
1327 return None
15da37c7 1328
5127e92a 1329 if tmpl_type in ('', 'temp'):
6a0546e3 1330 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1331 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1332 filename = replace_extension(filename, ext, final_ext)
5127e92a 1333 elif tmpl_type:
6a0546e3 1334 force_ext = OUTTMPL_TYPES[tmpl_type]
1335 if force_ext:
1336 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1337
bdc3fd2f
U
1338 # https://github.com/blackjack4494/youtube-dlc/issues/85
1339 trim_file_name = self.params.get('trim_file_name', False)
1340 if trim_file_name:
5c22c63d 1341 no_ext, *ext = filename.rsplit('.', 2)
1342 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1343
0202b52a 1344 return filename
8222d8de 1345 except ValueError as err:
6febd1c1 1346 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1347 return None
1348
5127e92a 1349 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1350 """Generate the output filename"""
1351 if outtmpl:
1352 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1353 dir_type = None
1354 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1355 if not filename and dir_type not in ('', 'temp'):
1356 return ''
de6000d9 1357
c84aeac6 1358 if warn:
21cd8fae 1359 if not self.params.get('paths'):
de6000d9 1360 pass
1361 elif filename == '-':
c84aeac6 1362 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1363 elif os.path.isabs(filename):
c84aeac6 1364 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1365 if filename == '-' or not filename:
1366 return filename
1367
21cd8fae 1368 return self.get_output_path(dir_type, filename)
0202b52a 1369
120fe513 1370 def _match_entry(self, info_dict, incomplete=False, silent=False):
6368e2e6 1371 """Returns None if the file should be downloaded"""
d7b460d0 1372 _type = info_dict.get('_type', 'video')
1373 assert incomplete or _type == 'video', 'Only video result can be considered complete'
8222d8de 1374
3bec830a 1375 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
c77495e3 1376
8b0d7497 1377 def check_filter():
d7b460d0 1378 if _type in ('playlist', 'multi_video'):
1379 return
1380 elif _type in ('url', 'url_transparent') and not try_call(
1381 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1382 return
1383
8b0d7497 1384 if 'title' in info_dict:
1385 # This can happen when we're just evaluating the playlist
1386 title = info_dict['title']
1387 matchtitle = self.params.get('matchtitle', False)
1388 if matchtitle:
1389 if not re.search(matchtitle, title, re.IGNORECASE):
1390 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1391 rejecttitle = self.params.get('rejecttitle', False)
1392 if rejecttitle:
1393 if re.search(rejecttitle, title, re.IGNORECASE):
1394 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
6368e2e6 1395
8b0d7497 1396 date = info_dict.get('upload_date')
1397 if date is not None:
1398 dateRange = self.params.get('daterange', DateRange())
1399 if date not in dateRange:
86e5f3ed 1400 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1401 view_count = info_dict.get('view_count')
1402 if view_count is not None:
1403 min_views = self.params.get('min_views')
1404 if min_views is not None and view_count < min_views:
1405 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1406 max_views = self.params.get('max_views')
1407 if max_views is not None and view_count > max_views:
1408 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1409 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1410 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1411
8f18aca8 1412 match_filter = self.params.get('match_filter')
fe2ce85a 1413 if match_filter is None:
1414 return None
1415
1416 cancelled = None
1417 try:
8f18aca8 1418 try:
1419 ret = match_filter(info_dict, incomplete=incomplete)
1420 except TypeError:
1421 # For backward compatibility
1422 ret = None if incomplete else match_filter(info_dict)
fe2ce85a 1423 except DownloadCancelled as err:
1424 if err.msg is not NO_DEFAULT:
1425 raise
1426 ret, cancelled = err.msg, err
1427
1428 if ret is NO_DEFAULT:
1429 while True:
1430 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1431 reply = input(self._format_screen(
1432 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1433 if reply in {'y', ''}:
1434 return None
1435 elif reply == 'n':
1436 if cancelled:
1437 raise type(cancelled)(f'Skipping {video_title}')
1438 return f'Skipping {video_title}'
1439 return ret
8b0d7497 1440
c77495e3 1441 if self.in_download_archive(info_dict):
1442 reason = '%s has already been recorded in the archive' % video_title
1443 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1444 else:
fe2ce85a 1445 try:
1446 reason = check_filter()
1447 except DownloadCancelled as e:
1448 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1449 else:
1450 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1451 if reason is not None:
120fe513 1452 if not silent:
1453 self.to_screen('[download] ' + reason)
c77495e3 1454 if self.params.get(break_opt, False):
1455 raise break_err()
8b0d7497 1456 return reason
fe7e0c98 1457
b6c45014
JMF
1458 @staticmethod
1459 def add_extra_info(info_dict, extra_info):
1460 '''Set the keys from extra_info in info dict if they are missing'''
1461 for key, value in extra_info.items():
1462 info_dict.setdefault(key, value)
1463
409e1828 1464 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1465 process=True, force_generic_extractor=False):
41d1cca3 1466 """
17ffed18 1467 Extract and return the information dictionary of the URL
41d1cca3 1468
1469 Arguments:
17ffed18 1470 @param url URL to extract
41d1cca3 1471
1472 Keyword arguments:
17ffed18 1473 @param download Whether to download videos
1474 @param process Whether to resolve all unresolved references (URLs, playlist items).
1475 Must be True for download to work
1476 @param ie_key Use only the extractor with this key
1477
1478 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1479 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
41d1cca3 1480 """
fe7e0c98 1481
409e1828 1482 if extra_info is None:
1483 extra_info = {}
1484
61aa5ba3 1485 if not ie_key and force_generic_extractor:
d22dec74
S
1486 ie_key = 'Generic'
1487
8222d8de 1488 if ie_key:
fe7866d0 1489 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
8222d8de
JMF
1490 else:
1491 ies = self._ies
1492
fe7866d0 1493 for key, ie in ies.items():
8222d8de
JMF
1494 if not ie.suitable(url):
1495 continue
1496
1497 if not ie.working():
6febd1c1
PH
1498 self.report_warning('The program functionality for this site has been marked as broken, '
1499 'and will probably not work.')
8222d8de 1500
1151c407 1501 temp_id = ie.get_temp_id(url)
fe7866d0 1502 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1503 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
5e5be0c0 1504 if self.params.get('break_on_existing', False):
1505 raise ExistingVideoReached()
a0566bbf 1506 break
fe7866d0 1507 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
a0566bbf 1508 else:
fe7866d0 1509 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1510 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1511 tb=False if extractors_restricted else None)
a0566bbf 1512
7e88d7d7 1513 def _handle_extraction_exceptions(func):
b5ae35ee 1514 @functools.wraps(func)
a0566bbf 1515 def wrapper(self, *args, **kwargs):
6da22e7d 1516 while True:
1517 try:
1518 return func(self, *args, **kwargs)
1519 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1520 raise
6da22e7d 1521 except ReExtractInfo as e:
1522 if e.expected:
1523 self.to_screen(f'{e}; Re-extracting data')
1524 else:
1525 self.to_stderr('\r')
1526 self.report_warning(f'{e}; Re-extracting data')
1527 continue
1528 except GeoRestrictedError as e:
1529 msg = e.msg
1530 if e.countries:
1531 msg += '\nThis video is available in %s.' % ', '.join(
1532 map(ISO3166Utils.short2full, e.countries))
1533 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1534 self.report_error(msg)
1535 except ExtractorError as e: # An error we somewhat expected
1536 self.report_error(str(e), e.format_traceback())
1537 except Exception as e:
1538 if self.params.get('ignoreerrors'):
1539 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1540 else:
1541 raise
1542 break
a0566bbf 1543 return wrapper
1544
693f0600 1545 def _wait_for_video(self, ie_result={}):
f2ebc5c7 1546 if (not self.params.get('wait_for_video')
1547 or ie_result.get('_type', 'video') != 'video'
1548 or ie_result.get('formats') or ie_result.get('url')):
1549 return
1550
1551 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1552 last_msg = ''
1553
1554 def progress(msg):
1555 nonlocal last_msg
a7dc6a89 1556 full_msg = f'{msg}\n'
1557 if not self.params.get('noprogress'):
1558 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1559 elif last_msg:
1560 return
1561 self.to_screen(full_msg, skip_eol=True)
f2ebc5c7 1562 last_msg = msg
1563
1564 min_wait, max_wait = self.params.get('wait_for_video')
1565 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1566 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1567 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1568 self.report_warning('Release time of video is not known')
693f0600 1569 elif ie_result and (diff or 0) <= 0:
f2ebc5c7 1570 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1571 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1572 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1573
1574 wait_till = time.time() + diff
1575 try:
1576 while True:
1577 diff = wait_till - time.time()
1578 if diff <= 0:
1579 progress('')
1580 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1581 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1582 time.sleep(1)
1583 except KeyboardInterrupt:
1584 progress('')
1585 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1586 except BaseException as e:
1587 if not isinstance(e, ReExtractInfo):
1588 self.to_screen('')
1589 raise
1590
7e88d7d7 1591 @_handle_extraction_exceptions
58f197b7 1592 def __extract_info(self, url, ie, download, extra_info, process):
693f0600 1593 try:
1594 ie_result = ie.extract(url)
1595 except UserNotLive as e:
1596 if process:
1597 if self.params.get('wait_for_video'):
1598 self.report_warning(e)
1599 self._wait_for_video()
1600 raise
a0566bbf 1601 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
cb794ee0 1602 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
a0566bbf 1603 return
1604 if isinstance(ie_result, list):
1605 # Backwards compatibility: old IE result format
1606 ie_result = {
1607 '_type': 'compat_list',
1608 'entries': ie_result,
1609 }
e37d0efb 1610 if extra_info.get('original_url'):
1611 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1612 self.add_default_extra_info(ie_result, ie, url)
1613 if process:
f2ebc5c7 1614 self._wait_for_video(ie_result)
a0566bbf 1615 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1616 else:
a0566bbf 1617 return ie_result
fe7e0c98 1618
ea38e55f 1619 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1620 if url is not None:
1621 self.add_extra_info(ie_result, {
1622 'webpage_url': url,
1623 'original_url': url,
57ebfca3 1624 })
1625 webpage_url = ie_result.get('webpage_url')
1626 if webpage_url:
1627 self.add_extra_info(ie_result, {
1628 'webpage_url_basename': url_basename(webpage_url),
1629 'webpage_url_domain': get_domain(webpage_url),
6033d980 1630 })
1631 if ie is not None:
1632 self.add_extra_info(ie_result, {
1633 'extractor': ie.IE_NAME,
1634 'extractor_key': ie.ie_key(),
1635 })
ea38e55f 1636
58adec46 1637 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1638 """
1639 Take the result of the ie(may be modified) and resolve all unresolved
1640 references (URLs, playlist items).
1641
1642 It will also download the videos if 'download'.
1643 Returns the resolved ie_result.
1644 """
58adec46 1645 if extra_info is None:
1646 extra_info = {}
e8ee972c
PH
1647 result_type = ie_result.get('_type', 'video')
1648
057a5206 1649 if result_type in ('url', 'url_transparent'):
8f97a15d 1650 ie_result['url'] = sanitize_url(
1651 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
8791e78c 1652 if ie_result.get('original_url') and not extra_info.get('original_url'):
1653 extra_info = {'original_url': ie_result['original_url'], **extra_info}
e37d0efb 1654
057a5206 1655 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1656 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1657 or extract_flat is True):
ecb54191 1658 info_copy = ie_result.copy()
6033d980 1659 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1660 if ie and not ie_result.get('id'):
4614bc22 1661 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1662 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1663 self.add_extra_info(info_copy, extra_info)
b5475f11 1664 info_copy, _ = self.pre_process(info_copy)
94dc8604 1665 self._fill_common_fields(info_copy, False)
ecb54191 1666 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
415f8d51 1667 self._raise_pending_errors(info_copy)
4614bc22 1668 if self.params.get('force_write_download_archive', False):
1669 self.record_download_archive(info_copy)
e8ee972c
PH
1670 return ie_result
1671
8222d8de 1672 if result_type == 'video':
b6c45014 1673 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1674 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1675 self._raise_pending_errors(ie_result)
28b0eb0f 1676 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1677 if additional_urls:
e9f4ccd1 1678 # TODO: Improve MetadataParserPP to allow setting a list
14f25df2 1679 if isinstance(additional_urls, str):
9c2b75b5 1680 additional_urls = [additional_urls]
1681 self.to_screen(
1682 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1683 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1684 ie_result['additional_entries'] = [
1685 self.extract_info(
b69fd25c 1686 url, download, extra_info=extra_info,
9c2b75b5 1687 force_generic_extractor=self.params.get('force_generic_extractor'))
1688 for url in additional_urls
1689 ]
1690 return ie_result
8222d8de
JMF
1691 elif result_type == 'url':
1692 # We have to add extra_info to the results because it may be
1693 # contained in a playlist
07cce701 1694 return self.extract_info(
1695 ie_result['url'], download,
1696 ie_key=ie_result.get('ie_key'),
1697 extra_info=extra_info)
7fc3fa05
PH
1698 elif result_type == 'url_transparent':
1699 # Use the information from the embedding page
1700 info = self.extract_info(
1701 ie_result['url'], ie_key=ie_result.get('ie_key'),
1702 extra_info=extra_info, download=False, process=False)
1703
1640eb09
S
1704 # extract_info may return None when ignoreerrors is enabled and
1705 # extraction failed with an error, don't crash and return early
1706 # in this case
1707 if not info:
1708 return info
1709
3975b4d2 1710 exempted_fields = {'_type', 'url', 'ie_key'}
1711 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1712 # For video clips, the id etc of the clip extractor should be used
1713 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1714
412c617d 1715 new_result = info.copy()
3975b4d2 1716 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1717
0563f7ac
S
1718 # Extracted info may not be a video result (i.e.
1719 # info.get('_type', 'video') != video) but rather an url or
1720 # url_transparent. In such cases outer metadata (from ie_result)
1721 # should be propagated to inner one (info). For this to happen
1722 # _type of info should be overridden with url_transparent. This
067aa17e 1723 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1724 if new_result.get('_type') == 'url':
1725 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1726
1727 return self.process_ie_result(
1728 new_result, download=download, extra_info=extra_info)
40fcba5e 1729 elif result_type in ('playlist', 'multi_video'):
30a074c2 1730 # Protect from infinite recursion due to recursively nested playlists
1731 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
0bd5a039 1732 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1733 if webpage_url and webpage_url in self._playlist_urls:
7e85e872 1734 self.to_screen(
30a074c2 1735 '[download] Skipping already downloaded playlist: %s'
1736 % ie_result.get('title') or ie_result.get('id'))
1737 return
7e85e872 1738
30a074c2 1739 self._playlist_level += 1
1740 self._playlist_urls.add(webpage_url)
03f83004 1741 self._fill_common_fields(ie_result, False)
bc516a3f 1742 self._sanitize_thumbnails(ie_result)
30a074c2 1743 try:
1744 return self.__process_playlist(ie_result, download)
1745 finally:
1746 self._playlist_level -= 1
1747 if not self._playlist_level:
1748 self._playlist_urls.clear()
8222d8de 1749 elif result_type == 'compat_list':
c9bf4114
PH
1750 self.report_warning(
1751 'Extractor %s returned a compat_list result. '
1752 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1753
8222d8de 1754 def _fixup(r):
b868936c 1755 self.add_extra_info(r, {
1756 'extractor': ie_result['extractor'],
1757 'webpage_url': ie_result['webpage_url'],
1758 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1759 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1760 'extractor_key': ie_result['extractor_key'],
1761 })
8222d8de
JMF
1762 return r
1763 ie_result['entries'] = [
b6c45014 1764 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1765 for r in ie_result['entries']
1766 ]
1767 return ie_result
1768 else:
1769 raise Exception('Invalid result type: %s' % result_type)
1770
e92caff5 1771 def _ensure_dir_exists(self, path):
1772 return make_dir(path, self.report_error)
1773
3b603dbd 1774 @staticmethod
3bec830a 1775 def _playlist_infodict(ie_result, strict=False, **kwargs):
1776 info = {
1777 'playlist_count': ie_result.get('playlist_count'),
3b603dbd 1778 'playlist': ie_result.get('title') or ie_result.get('id'),
1779 'playlist_id': ie_result.get('id'),
1780 'playlist_title': ie_result.get('title'),
1781 'playlist_uploader': ie_result.get('uploader'),
1782 'playlist_uploader_id': ie_result.get('uploader_id'),
3b603dbd 1783 **kwargs,
1784 }
3bec830a 1785 if strict:
1786 return info
0bd5a039 1787 if ie_result.get('webpage_url'):
1788 info.update({
1789 'webpage_url': ie_result['webpage_url'],
1790 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1791 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1792 })
3bec830a 1793 return {
1794 **info,
1795 'playlist_index': 0,
59d7de0d 1796 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
3bec830a 1797 'extractor': ie_result['extractor'],
3bec830a 1798 'extractor_key': ie_result['extractor_key'],
1799 }
3b603dbd 1800
30a074c2 1801 def __process_playlist(self, ie_result, download):
7e88d7d7 1802 """Process each entry in the playlist"""
f5ea4748 1803 assert ie_result['_type'] in ('playlist', 'multi_video')
1804
3bec830a 1805 common_info = self._playlist_infodict(ie_result, strict=True)
3955b207 1806 title = common_info.get('playlist') or '<Untitled>'
3bec830a 1807 if self._match_entry(common_info, incomplete=True) is not None:
1808 return
c6e07cf1 1809 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
f0d785d3 1810
7e88d7d7 1811 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1812 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1813
1814 lazy = self.params.get('lazy_playlist')
1815 if lazy:
1816 resolved_entries, n_entries = [], 'N/A'
1817 ie_result['requested_entries'], ie_result['entries'] = None, None
1818 else:
1819 entries = resolved_entries = list(entries)
1820 n_entries = len(resolved_entries)
1821 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1822 if not ie_result.get('playlist_count'):
1823 # Better to do this after potentially exhausting entries
1824 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1825
0647d925 1826 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1827 ie_copy = collections.ChainMap(ie_result, extra)
3bec830a 1828
e08a85d8 1829 _infojson_written = False
0bfc53d0 1830 write_playlist_files = self.params.get('allow_playlist_files', True)
1831 if write_playlist_files and self.params.get('list_thumbnails'):
1832 self.list_thumbnails(ie_result)
1833 if write_playlist_files and not self.params.get('simulate'):
e08a85d8 1834 _infojson_written = self._write_info_json(
1835 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1836 if _infojson_written is None:
80c03fa9 1837 return
1838 if self._write_description('playlist', ie_result,
1839 self.prepare_filename(ie_copy, 'pl_description')) is None:
1840 return
681de68e 1841 # TODO: This should be passed to ThumbnailsConvertor if necessary
3bec830a 1842 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1843
7e9a6125 1844 if lazy:
1845 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1846 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1847 elif self.params.get('playlistreverse'):
1848 entries.reverse()
1849 elif self.params.get('playlistrandom'):
30a074c2 1850 random.shuffle(entries)
1851
bc5c2f8a 1852 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
7e88d7d7 1853 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1854
134c913c 1855 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1856 if self.params.get('extract_flat') == 'discard_in_playlist':
1857 keep_resolved_entries = ie_result['_type'] != 'playlist'
1858 if keep_resolved_entries:
1859 self.write_debug('The information of all playlist entries will be held in memory')
1860
26e2805c 1861 failures = 0
1862 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 1863 for i, (playlist_index, entry) in enumerate(entries):
1864 if lazy:
1865 resolved_entries.append((playlist_index, entry))
3bec830a 1866 if not entry:
7e88d7d7 1867 continue
1868
7e88d7d7 1869 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
7e9a6125 1870 if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1871 playlist_index = ie_result['requested_entries'][i]
1872
0647d925 1873 entry_copy = collections.ChainMap(entry, {
3bec830a 1874 **common_info,
3955b207 1875 'n_entries': int_or_none(n_entries),
71729754 1876 'playlist_index': playlist_index,
7e9a6125 1877 'playlist_autonumber': i + 1,
0647d925 1878 })
3bec830a 1879
0647d925 1880 if self._match_entry(entry_copy, incomplete=True) is not None:
f0ad6f8c 1881 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1882 resolved_entries[i] = (playlist_index, NO_DEFAULT)
3bec830a 1883 continue
1884
bc5c2f8a 1885 self.to_screen('[download] Downloading item %s of %s' % (
3bec830a 1886 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1887
ec54bd43 1888 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
a6ca61d4 1889 'playlist_index': playlist_index,
1890 'playlist_autonumber': i + 1,
ec54bd43 1891 }, extra))
26e2805c 1892 if not entry_result:
1893 failures += 1
1894 if failures >= max_failures:
1895 self.report_error(
7e88d7d7 1896 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 1897 break
134c913c 1898 if keep_resolved_entries:
1899 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 1900
1901 # Update with processed data
f0ad6f8c 1902 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
bc5c2f8a 1903 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1904 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1905 # Do not set for full playlist
1906 ie_result.pop('requested_entries')
e08a85d8 1907
1908 # Write the updated info to json
cb96c5be 1909 if _infojson_written is True and self._write_info_json(
e08a85d8 1910 'updated playlist', ie_result,
1911 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1912 return
ca30f449 1913
ed5835b4 1914 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 1915 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 1916 return ie_result
1917
7e88d7d7 1918 @_handle_extraction_exceptions
a0566bbf 1919 def __process_iterable_entry(self, entry, download, extra_info):
1920 return self.process_ie_result(
1921 entry, download=download, extra_info=extra_info)
1922
67134eab
JMF
1923 def _build_format_filter(self, filter_spec):
1924 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1925
1926 OPERATORS = {
1927 '<': operator.lt,
1928 '<=': operator.le,
1929 '>': operator.gt,
1930 '>=': operator.ge,
1931 '=': operator.eq,
1932 '!=': operator.ne,
1933 }
67134eab 1934 operator_rex = re.compile(r'''(?x)\s*
187986a8 1935 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1936 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1937 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1938 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1939 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1940 if m:
1941 try:
1942 comparison_value = int(m.group('value'))
1943 except ValueError:
1944 comparison_value = parse_filesize(m.group('value'))
1945 if comparison_value is None:
1946 comparison_value = parse_filesize(m.group('value') + 'B')
1947 if comparison_value is None:
1948 raise ValueError(
1949 'Invalid value %r in format specification %r' % (
67134eab 1950 m.group('value'), filter_spec))
9ddb6925
S
1951 op = OPERATORS[m.group('op')]
1952
083c9df9 1953 if not m:
9ddb6925
S
1954 STR_OPERATORS = {
1955 '=': operator.eq,
10d33b34
YCH
1956 '^=': lambda attr, value: attr.startswith(value),
1957 '$=': lambda attr, value: attr.endswith(value),
1958 '*=': lambda attr, value: value in attr,
1ce9a3cb 1959 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1960 }
187986a8 1961 str_operator_rex = re.compile(r'''(?x)\s*
1962 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1963 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1964 (?P<quote>["'])?
1965 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1966 (?(quote)(?P=quote))\s*
9ddb6925 1967 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1968 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1969 if m:
1ce9a3cb
LF
1970 if m.group('op') == '~=':
1971 comparison_value = re.compile(m.group('value'))
1972 else:
1973 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1974 str_op = STR_OPERATORS[m.group('op')]
1975 if m.group('negation'):
e118a879 1976 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1977 else:
1978 op = str_op
083c9df9 1979
9ddb6925 1980 if not m:
187986a8 1981 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1982
1983 def _filter(f):
1984 actual_value = f.get(m.group('key'))
1985 if actual_value is None:
1986 return m.group('none_inclusive')
1987 return op(actual_value, comparison_value)
67134eab
JMF
1988 return _filter
1989
9f1a1c36 1990 def _check_formats(self, formats):
1991 for f in formats:
1992 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1993 path = self.get_output_path('temp')
1994 if not self._ensure_dir_exists(f'{path}/'):
1995 continue
1996 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1997 temp_file.close()
1998 try:
1999 success, _ = self.dl(temp_file.name, f, test=True)
8a82af35 2000 except (DownloadError, OSError, ValueError) + network_exceptions:
9f1a1c36 2001 success = False
2002 finally:
2003 if os.path.exists(temp_file.name):
2004 try:
2005 os.remove(temp_file.name)
2006 except OSError:
2007 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2008 if success:
2009 yield f
2010 else:
2011 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2012
0017d9ad 2013 def _default_format_spec(self, info_dict, download=True):
0017d9ad 2014
af0f7428
S
2015 def can_merge():
2016 merger = FFmpegMergerPP(self)
2017 return merger.available and merger.can_merge()
2018
91ebc640 2019 prefer_best = (
b7b04c78 2020 not self.params.get('simulate')
91ebc640 2021 and download
2022 and (
2023 not can_merge()
21633673 2024 or info_dict.get('is_live') and not self.params.get('live_from_start')
bf1824b3 2025 or self.params['outtmpl']['default'] == '-'))
53ed7066 2026 compat = (
2027 prefer_best
2028 or self.params.get('allow_multiple_audio_streams', False)
8a82af35 2029 or 'format-spec' in self.params['compat_opts'])
91ebc640 2030
2031 return (
53ed7066 2032 'best/bestvideo+bestaudio' if prefer_best
2033 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 2034 else 'bestvideo+bestaudio/best')
0017d9ad 2035
67134eab
JMF
2036 def build_format_selector(self, format_spec):
2037 def syntax_error(note, start):
2038 message = (
2039 'Invalid format specification: '
86e5f3ed 2040 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
2041 return SyntaxError(message)
2042
2043 PICKFIRST = 'PICKFIRST'
2044 MERGE = 'MERGE'
2045 SINGLE = 'SINGLE'
0130afb7 2046 GROUP = 'GROUP'
67134eab
JMF
2047 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2048
91ebc640 2049 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2050 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 2051
9f1a1c36 2052 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 2053
67134eab
JMF
2054 def _parse_filter(tokens):
2055 filter_parts = []
2056 for type, string, start, _, _ in tokens:
2057 if type == tokenize.OP and string == ']':
2058 return ''.join(filter_parts)
2059 else:
2060 filter_parts.append(string)
2061
232541df 2062 def _remove_unused_ops(tokens):
62b58c09
L
2063 # Remove operators that we don't use and join them with the surrounding strings.
2064 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
232541df
JMF
2065 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2066 last_string, last_start, last_end, last_line = None, None, None, None
2067 for type, string, start, end, line in tokens:
2068 if type == tokenize.OP and string == '[':
2069 if last_string:
2070 yield tokenize.NAME, last_string, last_start, last_end, last_line
2071 last_string = None
2072 yield type, string, start, end, line
2073 # everything inside brackets will be handled by _parse_filter
2074 for type, string, start, end, line in tokens:
2075 yield type, string, start, end, line
2076 if type == tokenize.OP and string == ']':
2077 break
2078 elif type == tokenize.OP and string in ALLOWED_OPS:
2079 if last_string:
2080 yield tokenize.NAME, last_string, last_start, last_end, last_line
2081 last_string = None
2082 yield type, string, start, end, line
2083 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2084 if not last_string:
2085 last_string = string
2086 last_start = start
2087 last_end = end
2088 else:
2089 last_string += string
2090 if last_string:
2091 yield tokenize.NAME, last_string, last_start, last_end, last_line
2092
cf2ac6df 2093 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2094 selectors = []
2095 current_selector = None
2096 for type, string, start, _, _ in tokens:
2097 # ENCODING is only defined in python 3.x
2098 if type == getattr(tokenize, 'ENCODING', None):
2099 continue
2100 elif type in [tokenize.NAME, tokenize.NUMBER]:
2101 current_selector = FormatSelector(SINGLE, string, [])
2102 elif type == tokenize.OP:
cf2ac6df
JMF
2103 if string == ')':
2104 if not inside_group:
2105 # ')' will be handled by the parentheses group
2106 tokens.restore_last_token()
67134eab 2107 break
cf2ac6df 2108 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2109 tokens.restore_last_token()
2110 break
cf2ac6df
JMF
2111 elif inside_choice and string == ',':
2112 tokens.restore_last_token()
2113 break
2114 elif string == ',':
0a31a350
JMF
2115 if not current_selector:
2116 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2117 selectors.append(current_selector)
2118 current_selector = None
2119 elif string == '/':
d96d604e
JMF
2120 if not current_selector:
2121 raise syntax_error('"/" must follow a format selector', start)
67134eab 2122 first_choice = current_selector
cf2ac6df 2123 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2124 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2125 elif string == '[':
2126 if not current_selector:
2127 current_selector = FormatSelector(SINGLE, 'best', [])
2128 format_filter = _parse_filter(tokens)
2129 current_selector.filters.append(format_filter)
0130afb7
JMF
2130 elif string == '(':
2131 if current_selector:
2132 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2133 group = _parse_format_selection(tokens, inside_group=True)
2134 current_selector = FormatSelector(GROUP, group, [])
67134eab 2135 elif string == '+':
d03cfdce 2136 if not current_selector:
2137 raise syntax_error('Unexpected "+"', start)
2138 selector_1 = current_selector
2139 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2140 if not selector_2:
2141 raise syntax_error('Expected a selector', start)
2142 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2143 else:
86e5f3ed 2144 raise syntax_error(f'Operator not recognized: "{string}"', start)
67134eab
JMF
2145 elif type == tokenize.ENDMARKER:
2146 break
2147 if current_selector:
2148 selectors.append(current_selector)
2149 return selectors
2150
f8d4ad9a 2151 def _merge(formats_pair):
2152 format_1, format_2 = formats_pair
2153
2154 formats_info = []
2155 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2156 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2157
2158 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2159 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2160 for (i, fmt_info) in enumerate(formats_info):
551f9388 2161 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2162 formats_info.pop(i)
2163 continue
2164 for aud_vid in ['audio', 'video']:
f8d4ad9a 2165 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2166 if get_no_more[aud_vid]:
2167 formats_info.pop(i)
f5510afe 2168 break
f8d4ad9a 2169 get_no_more[aud_vid] = True
2170
2171 if len(formats_info) == 1:
2172 return formats_info[0]
2173
2174 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2175 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2176
2177 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2178 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2179
fc61aff4
LL
2180 output_ext = get_compatible_ext(
2181 vcodecs=[f.get('vcodec') for f in video_fmts],
2182 acodecs=[f.get('acodec') for f in audio_fmts],
2183 vexts=[f['ext'] for f in video_fmts],
2184 aexts=[f['ext'] for f in audio_fmts],
2185 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2186 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
f8d4ad9a 2187
975a0d0d 2188 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2189
f8d4ad9a 2190 new_dict = {
2191 'requested_formats': formats_info,
975a0d0d 2192 'format': '+'.join(filtered('format')),
2193 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2194 'ext': output_ext,
975a0d0d 2195 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2196 'language': '+'.join(orderedSet(filtered('language'))) or None,
2197 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2198 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2199 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2200 }
2201
2202 if the_only_video:
2203 new_dict.update({
2204 'width': the_only_video.get('width'),
2205 'height': the_only_video.get('height'),
2206 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2207 'fps': the_only_video.get('fps'),
49a57e70 2208 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2209 'vcodec': the_only_video.get('vcodec'),
2210 'vbr': the_only_video.get('vbr'),
2211 'stretched_ratio': the_only_video.get('stretched_ratio'),
105bfd90 2212 'aspect_ratio': the_only_video.get('aspect_ratio'),
f8d4ad9a 2213 })
2214
2215 if the_only_audio:
2216 new_dict.update({
2217 'acodec': the_only_audio.get('acodec'),
2218 'abr': the_only_audio.get('abr'),
975a0d0d 2219 'asr': the_only_audio.get('asr'),
b8ed0f15 2220 'audio_channels': the_only_audio.get('audio_channels')
f8d4ad9a 2221 })
2222
2223 return new_dict
2224
e8e73840 2225 def _check_formats(formats):
981052c9 2226 if not check_formats:
2227 yield from formats
b5ac45b1 2228 return
9f1a1c36 2229 yield from self._check_formats(formats)
e8e73840 2230
67134eab 2231 def _build_selector_function(selector):
909d24dd 2232 if isinstance(selector, list): # ,
67134eab
JMF
2233 fs = [_build_selector_function(s) for s in selector]
2234
317f7ab6 2235 def selector_function(ctx):
67134eab 2236 for f in fs:
981052c9 2237 yield from f(ctx)
67134eab 2238 return selector_function
909d24dd 2239
2240 elif selector.type == GROUP: # ()
0130afb7 2241 selector_function = _build_selector_function(selector.selector)
909d24dd 2242
2243 elif selector.type == PICKFIRST: # /
67134eab
JMF
2244 fs = [_build_selector_function(s) for s in selector.selector]
2245
317f7ab6 2246 def selector_function(ctx):
67134eab 2247 for f in fs:
317f7ab6 2248 picked_formats = list(f(ctx))
67134eab
JMF
2249 if picked_formats:
2250 return picked_formats
2251 return []
67134eab 2252
981052c9 2253 elif selector.type == MERGE: # +
2254 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2255
2256 def selector_function(ctx):
adbc4ec4 2257 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2258 yield _merge(pair)
2259
909d24dd 2260 elif selector.type == SINGLE: # atom
598d185d 2261 format_spec = selector.selector or 'best'
909d24dd 2262
f8d4ad9a 2263 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2264 if format_spec == 'all':
2265 def selector_function(ctx):
9222c381 2266 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2267 elif format_spec == 'mergeall':
2268 def selector_function(ctx):
316f2650 2269 formats = list(_check_formats(
2270 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2271 if not formats:
2272 return
921b76ca 2273 merged_format = formats[-1]
2274 for f in formats[-2::-1]:
f8d4ad9a 2275 merged_format = _merge((merged_format, f))
2276 yield merged_format
909d24dd 2277
2278 else:
85e801a9 2279 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2280 mobj = re.match(
2281 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2282 format_spec)
2283 if mobj is not None:
2284 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2285 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2286 format_type = (mobj.group('type') or [None])[0]
2287 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2288 format_modified = mobj.group('mod') is not None
909d24dd 2289
2290 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2291 _filter_f = (
eff63539 2292 (lambda f: f.get('%scodec' % format_type) != 'none')
2293 if format_type and format_modified # bv*, ba*, wv*, wa*
2294 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2295 if format_type # bv, ba, wv, wa
2296 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2297 if not format_modified # b, w
8326b00a 2298 else lambda f: True) # b*, w*
2299 filter_f = lambda f: _filter_f(f) and (
2300 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2301 else:
48ee10ee 2302 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2303 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2304 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2305 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2306 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2307 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2308 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2309 else:
b5ae35ee 2310 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2311
2312 def selector_function(ctx):
2313 formats = list(ctx['formats'])
909d24dd 2314 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2315 if not matches:
2316 if format_fallback and ctx['incomplete_formats']:
2317 # for extractors with incomplete formats (audio only (soundcloud)
2318 # or video only (imgur)) best/worst will fallback to
2319 # best/worst {video,audio}-only format
2320 matches = formats
2321 elif seperate_fallback and not ctx['has_merged_format']:
2322 # for compatibility with youtube-dl when there is no pre-merged format
2323 matches = list(filter(seperate_fallback, formats))
981052c9 2324 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2325 try:
e8e73840 2326 yield matches[format_idx - 1]
4abea8ca 2327 except LazyList.IndexError:
981052c9 2328 return
083c9df9 2329
67134eab 2330 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2331
317f7ab6 2332 def final_selector(ctx):
adbc4ec4 2333 ctx_copy = dict(ctx)
67134eab 2334 for _filter in filters:
317f7ab6
S
2335 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2336 return selector_function(ctx_copy)
67134eab 2337 return final_selector
083c9df9 2338
0f06bcd7 2339 stream = io.BytesIO(format_spec.encode())
0130afb7 2340 try:
f9934b96 2341 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2342 except tokenize.TokenError:
2343 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2344
86e5f3ed 2345 class TokenIterator:
0130afb7
JMF
2346 def __init__(self, tokens):
2347 self.tokens = tokens
2348 self.counter = 0
2349
2350 def __iter__(self):
2351 return self
2352
2353 def __next__(self):
2354 if self.counter >= len(self.tokens):
2355 raise StopIteration()
2356 value = self.tokens[self.counter]
2357 self.counter += 1
2358 return value
2359
2360 next = __next__
2361
2362 def restore_last_token(self):
2363 self.counter -= 1
2364
2365 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2366 return _build_selector_function(parsed_selector)
a9c58ad9 2367
e5660ee6 2368 def _calc_headers(self, info_dict):
8b7539d2 2369 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6 2370
c487cf00 2371 cookies = self._calc_cookies(info_dict['url'])
e5660ee6
JMF
2372 if cookies:
2373 res['Cookie'] = cookies
2374
0016b84e
S
2375 if 'X-Forwarded-For' not in res:
2376 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2377 if x_forwarded_for_ip:
2378 res['X-Forwarded-For'] = x_forwarded_for_ip
2379
e5660ee6
JMF
2380 return res
2381
c487cf00 2382 def _calc_cookies(self, url):
2383 pr = sanitized_Request(url)
e5660ee6 2384 self.cookiejar.add_cookie_header(pr)
662435f7 2385 return pr.get_header('Cookie')
e5660ee6 2386
9f1a1c36 2387 def _sort_thumbnails(self, thumbnails):
2388 thumbnails.sort(key=lambda t: (
2389 t.get('preference') if t.get('preference') is not None else -1,
2390 t.get('width') if t.get('width') is not None else -1,
2391 t.get('height') if t.get('height') is not None else -1,
2392 t.get('id') if t.get('id') is not None else '',
2393 t.get('url')))
2394
b0249bca 2395 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2396 thumbnails = info_dict.get('thumbnails')
2397 if thumbnails is None:
2398 thumbnail = info_dict.get('thumbnail')
2399 if thumbnail:
2400 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2401 if not thumbnails:
2402 return
2403
2404 def check_thumbnails(thumbnails):
2405 for t in thumbnails:
2406 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2407 try:
2408 self.urlopen(HEADRequest(t['url']))
2409 except network_exceptions as err:
2410 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2411 continue
2412 yield t
2413
2414 self._sort_thumbnails(thumbnails)
2415 for i, t in enumerate(thumbnails):
2416 if t.get('id') is None:
2417 t['id'] = '%d' % i
2418 if t.get('width') and t.get('height'):
2419 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2420 t['url'] = sanitize_url(t['url'])
2421
2422 if self.params.get('check_formats') is True:
282f5709 2423 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2424 else:
2425 info_dict['thumbnails'] = thumbnails
bc516a3f 2426
94dc8604 2427 def _fill_common_fields(self, info_dict, final=True):
03f83004 2428 # TODO: move sanitization here
94dc8604 2429 if final:
7aefd19a 2430 title = info_dict['fulltitle'] = info_dict.get('title')
d4736fdb 2431 if not title:
2432 if title == '':
2433 self.write_debug('Extractor gave empty title. Creating a generic title')
2434 else:
2435 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2436 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2437
2438 if info_dict.get('duration') is not None:
2439 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2440
2441 for ts_key, date_key in (
2442 ('timestamp', 'upload_date'),
2443 ('release_timestamp', 'release_date'),
2444 ('modified_timestamp', 'modified_date'),
2445 ):
2446 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2447 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2448 # see http://bugs.python.org/issue1646728)
19a03940 2449 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2450 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2451 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2452
2453 live_keys = ('is_live', 'was_live')
2454 live_status = info_dict.get('live_status')
2455 if live_status is None:
2456 for key in live_keys:
2457 if info_dict.get(key) is False:
2458 continue
2459 if info_dict.get(key):
2460 live_status = key
2461 break
2462 if all(info_dict.get(key) is False for key in live_keys):
2463 live_status = 'not_live'
2464 if live_status:
2465 info_dict['live_status'] = live_status
2466 for key in live_keys:
2467 if info_dict.get(key) is None:
2468 info_dict[key] = (live_status == key)
a057779d 2469 if live_status == 'post_live':
2470 info_dict['was_live'] = True
03f83004
LNO
2471
2472 # Auto generate title fields corresponding to the *_number fields when missing
2473 # in order to always have clean titles. This is very common for TV series.
2474 for field in ('chapter', 'season', 'episode'):
94dc8604 2475 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
03f83004
LNO
2476 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2477
415f8d51 2478 def _raise_pending_errors(self, info):
2479 err = info.pop('__pending_error', None)
2480 if err:
2481 self.report_error(err, tb=False)
2482
784320c9 2483 def sort_formats(self, info_dict):
2484 formats = self._get_formats(info_dict)
784320c9 2485 formats.sort(key=FormatSorter(
c154302c 2486 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
784320c9 2487
dd82ffea
JMF
2488 def process_video_result(self, info_dict, download=True):
2489 assert info_dict.get('_type', 'video') == 'video'
9c906919 2490 self._num_videos += 1
dd82ffea 2491
bec1fad2 2492 if 'id' not in info_dict:
fc08bdd6 2493 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2494 elif not info_dict.get('id'):
2495 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2496
c9969434
S
2497 def report_force_conversion(field, field_not, conversion):
2498 self.report_warning(
2499 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2500 % (field, field_not, conversion))
2501
2502 def sanitize_string_field(info, string_field):
2503 field = info.get(string_field)
14f25df2 2504 if field is None or isinstance(field, str):
c9969434
S
2505 return
2506 report_force_conversion(string_field, 'a string', 'string')
14f25df2 2507 info[string_field] = str(field)
c9969434
S
2508
2509 def sanitize_numeric_fields(info):
2510 for numeric_field in self._NUMERIC_FIELDS:
2511 field = info.get(numeric_field)
f9934b96 2512 if field is None or isinstance(field, (int, float)):
c9969434
S
2513 continue
2514 report_force_conversion(numeric_field, 'numeric', 'int')
2515 info[numeric_field] = int_or_none(field)
2516
2517 sanitize_string_field(info_dict, 'id')
2518 sanitize_numeric_fields(info_dict)
3975b4d2 2519 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2520 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2521 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2522 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2523
9eef7c4e 2524 chapters = info_dict.get('chapters') or []
a3976e07 2525 if chapters and chapters[0].get('start_time'):
2526 chapters.insert(0, {'start_time': 0})
2527
9eef7c4e 2528 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
a3976e07 2529 for idx, (prev, current, next_) in enumerate(zip(
2530 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
9eef7c4e 2531 if current.get('start_time') is None:
2532 current['start_time'] = prev.get('end_time')
2533 if not current.get('end_time'):
2534 current['end_time'] = next_.get('start_time')
a3976e07 2535 if not current.get('title'):
2536 current['title'] = f'<Untitled Chapter {idx}>'
9eef7c4e 2537
dd82ffea
JMF
2538 if 'playlist' not in info_dict:
2539 # It isn't part of a playlist
2540 info_dict['playlist'] = None
2541 info_dict['playlist_index'] = None
2542
bc516a3f 2543 self._sanitize_thumbnails(info_dict)
d5519808 2544
536a55da 2545 thumbnail = info_dict.get('thumbnail')
bc516a3f 2546 thumbnails = info_dict.get('thumbnails')
536a55da
S
2547 if thumbnail:
2548 info_dict['thumbnail'] = sanitize_url(thumbnail)
2549 elif thumbnails:
d5519808
PH
2550 info_dict['thumbnail'] = thumbnails[-1]['url']
2551
ae30b840 2552 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2553 info_dict['display_id'] = info_dict['id']
2554
03f83004 2555 self._fill_common_fields(info_dict)
33d2fc2f 2556
05108a49
S
2557 for cc_kind in ('subtitles', 'automatic_captions'):
2558 cc = info_dict.get(cc_kind)
2559 if cc:
2560 for _, subtitle in cc.items():
2561 for subtitle_format in subtitle:
2562 if subtitle_format.get('url'):
2563 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2564 if subtitle_format.get('ext') is None:
2565 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2566
2567 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2568 subtitles = info_dict.get('subtitles')
4bba3716 2569
360e1ca5 2570 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2571 info_dict['id'], subtitles, automatic_captions)
a504ced0 2572
aebb4f4b 2573 formats = self._get_formats(info_dict)
dd82ffea 2574
c154302c 2575 # Backward compatibility with InfoExtractor._sort_formats
9ebac355 2576 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
c154302c 2577 if field_preference:
2578 info_dict['_format_sort_fields'] = field_preference
2579
0a5a191a 2580 # or None ensures --clean-infojson removes it
2581 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2582 if not self.params.get('allow_unplayable_formats'):
2583 formats = [f for f in formats if not f.get('has_drm')]
17ffed18 2584
2585 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2586 self.report_warning(
2587 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2588 'only images are available for download. Use --list-formats to see them'.capitalize())
88acdbc2 2589
319b6059 2590 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2591 if not get_from_start:
2592 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2593 if info_dict.get('is_live') and formats:
adbc4ec4 2594 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2595 if get_from_start and not formats:
a44ca5a4 2596 self.raise_no_formats(info_dict, msg=(
2597 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2598 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2599
73af5cc8
S
2600 def is_wellformed(f):
2601 url = f.get('url')
a5ac0c47 2602 if not url:
73af5cc8
S
2603 self.report_warning(
2604 '"url" field is missing or empty - skipping format, '
2605 'there is an error in extractor')
a5ac0c47
S
2606 return False
2607 if isinstance(url, bytes):
2608 sanitize_string_field(f, 'url')
2609 return True
73af5cc8
S
2610
2611 # Filter out malformed formats for better extraction robustness
1ac7f461 2612 formats = list(filter(is_wellformed, formats or []))
2613
2614 if not formats:
2615 self.raise_no_formats(info_dict)
73af5cc8 2616
39f32f17 2617 for format in formats:
c9969434
S
2618 sanitize_string_field(format, 'format_id')
2619 sanitize_numeric_fields(format)
dcf77cf1 2620 format['url'] = sanitize_url(format['url'])
39f32f17 2621 if format.get('ext') is None:
2622 format['ext'] = determine_ext(format['url']).lower()
2623 if format.get('protocol') is None:
2624 format['protocol'] = determine_protocol(format)
2625 if format.get('resolution') is None:
2626 format['resolution'] = self.format_resolution(format, default=None)
2627 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2628 format['dynamic_range'] = 'SDR'
2629 if format.get('aspect_ratio') is None:
2630 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2631 if (info_dict.get('duration') and format.get('tbr')
2632 and not format.get('filesize') and not format.get('filesize_approx')):
2633 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2634 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
2635
2636 # This is copied to http_headers by the above _calc_headers and can now be removed
2637 if '__x_forwarded_for_ip' in info_dict:
2638 del info_dict['__x_forwarded_for_ip']
2639
c154302c 2640 self.sort_formats({
2641 'formats': formats,
2642 '_format_sort_fields': info_dict.get('_format_sort_fields')
2643 })
39f32f17 2644
2645 # Sanitize and group by format_id
2646 formats_dict = {}
2647 for i, format in enumerate(formats):
e74e3b63 2648 if not format.get('format_id'):
14f25df2 2649 format['format_id'] = str(i)
e2effb08
S
2650 else:
2651 # Sanitize format_id from characters used in format selector expression
ec85ded8 2652 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
39f32f17 2653 formats_dict.setdefault(format['format_id'], []).append(format)
181c7053
S
2654
2655 # Make sure all formats have unique format_id
03b4de72 2656 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2657 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2658 ambigious_id = len(ambiguous_formats) > 1
2659 for i, format in enumerate(ambiguous_formats):
2660 if ambigious_id:
181c7053 2661 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2662 # Ensure there is no conflict between id and ext in format selection
2663 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2664 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2665 format['format_id'] = 'f%s' % format['format_id']
181c7053 2666
39f32f17 2667 if format.get('format') is None:
2668 format['format'] = '{id} - {res}{note}'.format(
2669 id=format['format_id'],
2670 res=self.format_resolution(format),
2671 note=format_field(format, 'format_note', ' (%s)'),
2672 )
dd82ffea 2673
9f1a1c36 2674 if self.params.get('check_formats') is True:
282f5709 2675 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2676
88acdbc2 2677 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2678 # only set the 'formats' fields if the original info_dict list them
2679 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2680 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2681 # which can't be exported to json
b3d9ef88 2682 info_dict['formats'] = formats
4ec82a72 2683
2684 info_dict, _ = self.pre_process(info_dict)
2685
6db9c4d5 2686 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2687 return info_dict
2688
2689 self.post_extract(info_dict)
2690 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2691
093a1710 2692 # The pre-processors may have modified the formats
aebb4f4b 2693 formats = self._get_formats(info_dict)
093a1710 2694
e4221b70 2695 list_only = self.params.get('simulate') == 'list_only'
fa9f30b8 2696 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2697 if self.params.get('list_thumbnails'):
2698 self.list_thumbnails(info_dict)
b7b04c78 2699 if self.params.get('listsubtitles'):
2700 if 'automatic_captions' in info_dict:
2701 self.list_subtitles(
2702 info_dict['id'], automatic_captions, 'automatic captions')
2703 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2704 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2705 self.list_formats(info_dict)
169dbde9 2706 if list_only:
b7b04c78 2707 # Without this printing, -F --print-json will not work
169dbde9 2708 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
c487cf00 2709 return info_dict
bfaae0a7 2710
187986a8 2711 format_selector = self.format_selector
2712 if format_selector is None:
0017d9ad 2713 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2714 self.write_debug('Default format spec: %s' % req_format)
187986a8 2715 format_selector = self.build_format_selector(req_format)
317f7ab6 2716
fa9f30b8 2717 while True:
2718 if interactive_format_selection:
2719 req_format = input(
2720 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2721 try:
2722 format_selector = self.build_format_selector(req_format)
2723 except SyntaxError as err:
2724 self.report_error(err, tb=False, is_error=False)
2725 continue
2726
85e801a9 2727 formats_to_download = list(format_selector({
fa9f30b8 2728 'formats': formats,
85e801a9 2729 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2730 'incomplete_formats': (
2731 # All formats are video-only or
2732 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2733 # all formats are audio-only
2734 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2735 }))
fa9f30b8 2736 if interactive_format_selection and not formats_to_download:
2737 self.report_error('Requested format is not available', tb=False, is_error=False)
2738 continue
2739 break
317f7ab6 2740
dd82ffea 2741 if not formats_to_download:
b7da73eb 2742 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2743 raise ExtractorError(
2744 'Requested format is not available. Use --list-formats for a list of available formats',
2745 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2746 self.report_warning('Requested format is not available')
2747 # Process what we can, even without any available formats.
2748 formats_to_download = [{}]
a13e6848 2749
0500ee3d 2750 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
5ec1b6b7 2751 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2752 if download:
0500ee3d 2753 if best_format and requested_ranges:
5ec1b6b7 2754 def to_screen(*msg):
2755 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2756
2757 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2758 (f['format_id'] for f in formats_to_download))
0500ee3d 2759 if requested_ranges != ({}, ):
5ec1b6b7 2760 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
fc2ba496 2761 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
a13e6848 2762 max_downloads_reached = False
5ec1b6b7 2763
0500ee3d 2764 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
5ec1b6b7 2765 new_info = self._copy_infodict(info_dict)
b7da73eb 2766 new_info.update(fmt)
3975b4d2 2767 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
fc2ba496 2768 end_time = offset + min(chapter.get('end_time', duration), duration)
3975b4d2 2769 if chapter or offset:
5ec1b6b7 2770 new_info.update({
3975b4d2 2771 'section_start': offset + chapter.get('start_time', 0),
2576d53a 2772 # duration may not be accurate. So allow deviations <1sec
2773 'section_end': end_time if end_time <= offset + duration + 1 else None,
5ec1b6b7 2774 'section_title': chapter.get('title'),
2775 'section_number': chapter.get('index'),
2776 })
2777 downloaded_formats.append(new_info)
a13e6848 2778 try:
2779 self.process_info(new_info)
2780 except MaxDownloadsReached:
2781 max_downloads_reached = True
415f8d51 2782 self._raise_pending_errors(new_info)
f46e2f9d 2783 # Remove copied info
2784 for key, val in tuple(new_info.items()):
2785 if info_dict.get(key) == val:
2786 new_info.pop(key)
a13e6848 2787 if max_downloads_reached:
2788 break
ebed8b37 2789
5ec1b6b7 2790 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 2791 assert write_archive.issubset({True, False, 'ignore'})
2792 if True in write_archive and False not in write_archive:
2793 self.record_download_archive(info_dict)
be72c624 2794
5ec1b6b7 2795 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 2796 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2797 if max_downloads_reached:
2798 raise MaxDownloadsReached()
ebed8b37 2799
49a57e70 2800 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2801 info_dict.update(best_format)
dd82ffea
JMF
2802 return info_dict
2803
98c70d6f 2804 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2805 """Select the requested subtitles and their format"""
d8a58ddc 2806 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2807 if normal_subtitles and self.params.get('writesubtitles'):
2808 available_subs.update(normal_subtitles)
d8a58ddc 2809 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2810 if automatic_captions and self.params.get('writeautomaticsub'):
2811 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2812 if lang not in available_subs:
2813 available_subs[lang] = cap_info
2814
d2c8aadf 2815 if not available_subs or (
2816 not self.params.get('writesubtitles')
2817 and not self.params.get('writeautomaticsub')):
4d171848 2818 return None
a504ced0 2819
d8a58ddc 2820 all_sub_langs = tuple(available_subs.keys())
a504ced0 2821 if self.params.get('allsubtitles', False):
c32b0aab 2822 requested_langs = all_sub_langs
2823 elif self.params.get('subtitleslangs', False):
5314b521 2824 try:
2825 requested_langs = orderedSet_from_options(
2826 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2827 except re.error as e:
2828 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
a504ced0 2829 else:
376aa24b
SS
2830 requested_langs = LazyList(itertools.chain(
2831 ['en'] if 'en' in normal_sub_langs else [],
2832 filter(lambda f: f.startswith('en'), normal_sub_langs),
2833 ['en'] if 'en' in all_sub_langs else [],
2834 filter(lambda f: f.startswith('en'), all_sub_langs),
2835 normal_sub_langs, all_sub_langs,
2836 ))[:1]
ad3dc496 2837 if requested_langs:
d2c8aadf 2838 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
a504ced0
JMF
2839
2840 formats_query = self.params.get('subtitlesformat', 'best')
2841 formats_preference = formats_query.split('/') if formats_query else []
2842 subs = {}
2843 for lang in requested_langs:
2844 formats = available_subs.get(lang)
2845 if formats is None:
86e5f3ed 2846 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2847 continue
a504ced0
JMF
2848 for ext in formats_preference:
2849 if ext == 'best':
2850 f = formats[-1]
2851 break
2852 matches = list(filter(lambda f: f['ext'] == ext, formats))
2853 if matches:
2854 f = matches[-1]
2855 break
2856 else:
2857 f = formats[-1]
2858 self.report_warning(
2859 'No subtitle format found matching "%s" for language %s, '
2860 'using %s' % (formats_query, lang, f['ext']))
2861 subs[lang] = f
2862 return subs
2863
bb66c247 2864 def _forceprint(self, key, info_dict):
2865 if info_dict is None:
2866 return
2867 info_copy = info_dict.copy()
2868 info_copy['formats_table'] = self.render_formats_table(info_dict)
2869 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2870 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2871 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2872
2873 def format_tmpl(tmpl):
48c8424b 2874 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
07a1250e 2875 if not mobj:
2876 return tmpl
48c8424b 2877
2878 fmt = '%({})s'
2879 if tmpl.startswith('{'):
2880 tmpl = f'.{tmpl}'
2881 if tmpl.endswith('='):
2882 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
2883 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
8130779d 2884
bb66c247 2885 for tmpl in self.params['forceprint'].get(key, []):
2886 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2887
2888 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2889 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2890 tmpl = format_tmpl(tmpl)
2891 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2892 if self._ensure_dir_exists(filename):
86e5f3ed 2893 with open(filename, 'a', encoding='utf-8') as f:
8d93e69d 2894 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2895
d06daf23 2896 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2897 def print_mandatory(field, actual_field=None):
2898 if actual_field is None:
2899 actual_field = field
d06daf23 2900 if (self.params.get('force%s' % field, False)
53c18592 2901 and (not incomplete or info_dict.get(actual_field) is not None)):
2902 self.to_stdout(info_dict[actual_field])
d06daf23
S
2903
2904 def print_optional(field):
2905 if (self.params.get('force%s' % field, False)
2906 and info_dict.get(field) is not None):
2907 self.to_stdout(info_dict[field])
2908
53c18592 2909 info_dict = info_dict.copy()
2910 if filename is not None:
2911 info_dict['filename'] = filename
2912 if info_dict.get('requested_formats') is not None:
2913 # For RTMP URLs, also include the playpath
2914 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2915 elif info_dict.get('url'):
53c18592 2916 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2917
bb66c247 2918 if (self.params.get('forcejson')
2919 or self.params['forceprint'].get('video')
2920 or self.params['print_to_file'].get('video')):
2b8a2973 2921 self.post_extract(info_dict)
bb66c247 2922 self._forceprint('video', info_dict)
53c18592 2923
d06daf23
S
2924 print_mandatory('title')
2925 print_mandatory('id')
53c18592 2926 print_mandatory('url', 'urls')
d06daf23
S
2927 print_optional('thumbnail')
2928 print_optional('description')
53c18592 2929 print_optional('filename')
b868936c 2930 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2931 self.to_stdout(formatSeconds(info_dict['duration']))
2932 print_mandatory('format')
53c18592 2933
2b8a2973 2934 if self.params.get('forcejson'):
6e84b215 2935 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2936
e8e73840 2937 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2938 if not info.get('url'):
1151c407 2939 self.raise_no_formats(info, True)
e8e73840 2940
2941 if test:
2942 verbose = self.params.get('verbose')
2943 params = {
2944 'test': True,
a169858f 2945 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2946 'verbose': verbose,
2947 'noprogress': not verbose,
2948 'nopart': True,
2949 'skip_unavailable_fragments': False,
2950 'keep_fragments': False,
2951 'overwrites': True,
2952 '_no_ytdl_file': True,
2953 }
2954 else:
2955 params = self.params
96fccc10 2956 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2957 if not test:
2958 for ph in self._progress_hooks:
2959 fd.add_progress_hook(ph)
42676437
M
2960 urls = '", "'.join(
2961 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2962 for f in info.get('requested_formats', []) or [info])
3a408f9d 2963 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 2964
adbc4ec4
THD
2965 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2966 # But it may contain objects that are not deep-copyable
2967 new_info = self._copy_infodict(info)
e8e73840 2968 if new_info.get('http_headers') is None:
2969 new_info['http_headers'] = self._calc_headers(new_info)
2970 return fd.download(name, new_info, subtitle)
2971
e04938ab 2972 def existing_file(self, filepaths, *, default_overwrite=True):
2973 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2974 if existing_files and not self.params.get('overwrites', default_overwrite):
2975 return existing_files[0]
2976
2977 for file in existing_files:
2978 self.report_file_delete(file)
2979 os.remove(file)
2980 return None
2981
8222d8de 2982 def process_info(self, info_dict):
09b49e1f 2983 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2984
2985 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2986 original_infodict = info_dict
fd288278 2987
4513a41a 2988 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2989 info_dict['format'] = info_dict['ext']
2990
c77495e3 2991 if self._match_entry(info_dict) is not None:
9e907ebd 2992 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2993 return
2994
09b49e1f 2995 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2996 self.post_extract(info_dict)
119e40ef 2997
2998 def replace_info_dict(new_info):
2999 nonlocal info_dict
3000 if new_info == info_dict:
3001 return
3002 info_dict.clear()
3003 info_dict.update(new_info)
3004
3005 new_info, _ = self.pre_process(info_dict, 'video')
3006 replace_info_dict(new_info)
0c14d66a 3007 self._num_downloads += 1
8222d8de 3008
dcf64d43 3009 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 3010 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3011 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 3012 files_to_move = {}
8222d8de
JMF
3013
3014 # Forced printings
4513a41a 3015 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 3016
ca6d59d2 3017 def check_max_downloads():
3018 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3019 raise MaxDownloadsReached()
3020
b7b04c78 3021 if self.params.get('simulate'):
9e907ebd 3022 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 3023 check_max_downloads()
8222d8de
JMF
3024 return
3025
de6000d9 3026 if full_filename is None:
8222d8de 3027 return
e92caff5 3028 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 3029 return
e92caff5 3030 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
3031 return
3032
80c03fa9 3033 if self._write_description('video', info_dict,
3034 self.prepare_filename(info_dict, 'description')) is None:
3035 return
3036
3037 sub_files = self._write_subtitles(info_dict, temp_filename)
3038 if sub_files is None:
3039 return
3040 files_to_move.update(dict(sub_files))
3041
3042 thumb_files = self._write_thumbnails(
3043 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3044 if thumb_files is None:
3045 return
3046 files_to_move.update(dict(thumb_files))
8222d8de 3047
80c03fa9 3048 infofn = self.prepare_filename(info_dict, 'infojson')
3049 _infojson_written = self._write_info_json('video', info_dict, infofn)
3050 if _infojson_written:
dac5df5a 3051 info_dict['infojson_filename'] = infofn
e75bb0d6 3052 # For backward compatibility, even though it was a private field
80c03fa9 3053 info_dict['__infojson_filename'] = infofn
3054 elif _infojson_written is None:
3055 return
3056
3057 # Note: Annotations are deprecated
3058 annofn = None
1fb07d10 3059 if self.params.get('writeannotations', False):
de6000d9 3060 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 3061 if annofn:
e92caff5 3062 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 3063 return
0c3d0f51 3064 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 3065 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
3066 elif not info_dict.get('annotations'):
3067 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
3068 else:
3069 try:
6febd1c1 3070 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 3071 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
3072 annofile.write(info_dict['annotations'])
3073 except (KeyError, TypeError):
6febd1c1 3074 self.report_warning('There are no annotations to write.')
86e5f3ed 3075 except OSError:
6febd1c1 3076 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 3077 return
1fb07d10 3078
732044af 3079 # Write internet shortcut files
08438d2c 3080 def _write_link_file(link_type):
60f3e995 3081 url = try_get(info_dict['webpage_url'], iri_to_uri)
3082 if not url:
3083 self.report_warning(
3084 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3085 return True
08438d2c 3086 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
3087 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3088 return False
10e3742e 3089 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 3090 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3091 return True
3092 try:
3093 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 3094 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3095 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 3096 template_vars = {'url': url}
08438d2c 3097 if link_type == 'desktop':
3098 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3099 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 3100 except OSError:
08438d2c 3101 self.report_error(f'Cannot write internet shortcut {linkfn}')
3102 return False
732044af 3103 return True
3104
08438d2c 3105 write_links = {
3106 'url': self.params.get('writeurllink'),
3107 'webloc': self.params.get('writewebloclink'),
3108 'desktop': self.params.get('writedesktoplink'),
3109 }
3110 if self.params.get('writelink'):
3111 link_type = ('webloc' if sys.platform == 'darwin'
3112 else 'desktop' if sys.platform.startswith('linux')
3113 else 'url')
3114 write_links[link_type] = True
3115
3116 if any(should_write and not _write_link_file(link_type)
3117 for link_type, should_write in write_links.items()):
3118 return
732044af 3119
415f8d51 3120 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3121 replace_info_dict(new_info)
56d868db 3122
a13e6848 3123 if self.params.get('skip_download'):
56d868db 3124 info_dict['filepath'] = temp_filename
3125 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3126 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3127 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3128 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3129 else:
3130 # Download
b868936c 3131 info_dict.setdefault('__postprocessors', [])
4340deca 3132 try:
0202b52a 3133
e04938ab 3134 def existing_video_file(*filepaths):
6b591b29 3135 ext = info_dict.get('ext')
e04938ab 3136 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3137 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3138 default_overwrite=False)
3139 if file:
3140 info_dict['ext'] = os.path.splitext(file)[1][1:]
3141 return file
0202b52a 3142
7b2c3f47 3143 fd, success = None, True
fccf90e7 3144 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3145 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
71df9b7f 3146 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
56ba69e4 3147 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3148 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3149 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3150 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3151 return
5ec1b6b7 3152
4340deca 3153 if info_dict.get('requested_formats') is not None:
81cd954a 3154 requested_formats = info_dict['requested_formats']
0202b52a 3155 old_ext = info_dict['ext']
4e3b637d 3156 if self.params.get('merge_output_format') is None:
4e3b637d 3157 if (info_dict['ext'] == 'webm'
3158 and info_dict.get('thumbnails')
3159 # check with type instead of pp_key, __name__, or isinstance
3160 # since we dont want any custom PPs to trigger this
c487cf00 3161 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3162 info_dict['ext'] = 'mkv'
3163 self.report_warning(
3164 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3165 new_ext = info_dict['ext']
0202b52a 3166
124bc071 3167 def correct_ext(filename, ext=new_ext):
96fccc10 3168 if filename == '-':
3169 return filename
0202b52a 3170 filename_real_ext = os.path.splitext(filename)[1][1:]
3171 filename_wo_ext = (
3172 os.path.splitext(filename)[0]
124bc071 3173 if filename_real_ext in (old_ext, new_ext)
0202b52a 3174 else filename)
86e5f3ed 3175 return f'{filename_wo_ext}.{ext}'
0202b52a 3176
38c6902b 3177 # Ensure filename always has a correct extension for successful merge
0202b52a 3178 full_filename = correct_ext(full_filename)
3179 temp_filename = correct_ext(temp_filename)
e04938ab 3180 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3181 info_dict['__real_download'] = False
18e674b4 3182
7b2c3f47 3183 merger = FFmpegMergerPP(self)
adbc4ec4 3184 downloaded = []
dbf5416a 3185 if dl_filename is not None:
6c7274ec 3186 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3187 elif fd:
3188 for f in requested_formats if fd != FFmpegFD else []:
3189 f['filepath'] = fname = prepend_extension(
3190 correct_ext(temp_filename, info_dict['ext']),
3191 'f%s' % f['format_id'], info_dict['ext'])
3192 downloaded.append(fname)
dbf5416a 3193 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3194 success, real_download = self.dl(temp_filename, info_dict)
3195 info_dict['__real_download'] = real_download
18e674b4 3196 else:
18e674b4 3197 if self.params.get('allow_unplayable_formats'):
3198 self.report_warning(
3199 'You have requested merging of multiple formats '
3200 'while also allowing unplayable formats to be downloaded. '
3201 'The formats won\'t be merged to prevent data corruption.')
3202 elif not merger.available:
e8969bda 3203 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3204 if not self.params.get('ignoreerrors'):
3205 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3206 return
3207 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3208
96fccc10 3209 if temp_filename == '-':
adbc4ec4 3210 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3211 else 'but the formats are incompatible for simultaneous download' if merger.available
3212 else 'but ffmpeg is not installed')
3213 self.report_warning(
3214 f'You have requested downloading multiple formats to stdout {reason}. '
3215 'The formats will be streamed one after the other')
3216 fname = temp_filename
dbf5416a 3217 for f in requested_formats:
3218 new_info = dict(info_dict)
3219 del new_info['requested_formats']
3220 new_info.update(f)
96fccc10 3221 if temp_filename != '-':
124bc071 3222 fname = prepend_extension(
3223 correct_ext(temp_filename, new_info['ext']),
3224 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3225 if not self._ensure_dir_exists(fname):
3226 return
a21e0ab1 3227 f['filepath'] = fname
96fccc10 3228 downloaded.append(fname)
dbf5416a 3229 partial_success, real_download = self.dl(fname, new_info)
3230 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3231 success = success and partial_success
adbc4ec4
THD
3232
3233 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3234 info_dict['__postprocessors'].append(merger)
3235 info_dict['__files_to_merge'] = downloaded
3236 # Even if there were no downloads, it is being merged only now
3237 info_dict['__real_download'] = True
3238 else:
3239 for file in downloaded:
3240 files_to_move[file] = None
4340deca
P
3241 else:
3242 # Just a single file
e04938ab 3243 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3244 if dl_filename is None or dl_filename == temp_filename:
3245 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3246 # So we should try to resume the download
e8e73840 3247 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3248 info_dict['__real_download'] = real_download
6c7274ec 3249 else:
3250 self.report_file_already_downloaded(dl_filename)
0202b52a 3251
0202b52a 3252 dl_filename = dl_filename or temp_filename
c571435f 3253 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3254
3158150c 3255 except network_exceptions as err:
7960b056 3256 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3257 return
86e5f3ed 3258 except OSError as err:
4340deca
P
3259 raise UnavailableVideoError(err)
3260 except (ContentTooShortError, ) as err:
86e5f3ed 3261 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3262 return
8222d8de 3263
415f8d51 3264 self._raise_pending_errors(info_dict)
de6000d9 3265 if success and full_filename != '-':
f17f8651 3266
fd7cfb64 3267 def fixup():
3268 do_fixup = True
3269 fixup_policy = self.params.get('fixup')
3270 vid = info_dict['id']
3271
3272 if fixup_policy in ('ignore', 'never'):
3273 return
3274 elif fixup_policy == 'warn':
3fe75fdc 3275 do_fixup = 'warn'
f89b3e2d 3276 elif fixup_policy != 'force':
3277 assert fixup_policy in ('detect_or_warn', None)
3278 if not info_dict.get('__real_download'):
3279 do_fixup = False
fd7cfb64 3280
3281 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3282 if not (do_fixup and cndn):
fd7cfb64 3283 return
3fe75fdc 3284 elif do_fixup == 'warn':
fd7cfb64 3285 self.report_warning(f'{vid}: {msg}')
3286 return
3287 pp = cls(self)
3288 if pp.available:
3289 info_dict['__postprocessors'].append(pp)
3290 else:
3291 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3292
3293 stretched_ratio = info_dict.get('stretched_ratio')
ca9def71
LNO
3294 ffmpeg_fixup(stretched_ratio not in (1, None),
3295 f'Non-uniform pixel ratio {stretched_ratio}',
3296 FFmpegFixupStretchedPP)
fd7cfb64 3297
993191c0 3298 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3299 downloader = downloader.FD_NAME if downloader else None
adbc4ec4 3300
ca9def71
LNO
3301 ext = info_dict.get('ext')
3302 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3303 isinstance(pp, FFmpegVideoConvertorPP)
3304 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3305 ) for pp in self._pps['post_process'])
3306
3307 if not postprocessed_by_ffmpeg:
3308 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
f2df4071 3309 'writing DASH m4a. Only some players support this container',
3310 FFmpegFixupM4aPP)
24146491 3311 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3312 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3313 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3314 FFmpegFixupM3u8PP)
3315 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3316 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3317
24146491 3318 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3319 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3320
3321 fixup()
8222d8de 3322 try:
f46e2f9d 3323 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3324 except PostProcessingError as err:
3325 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3326 return
ab8e5e51
AM
3327 try:
3328 for ph in self._post_hooks:
23c1a667 3329 ph(info_dict['filepath'])
ab8e5e51
AM
3330 except Exception as err:
3331 self.report_error('post hooks: %s' % str(err))
3332 return
9e907ebd 3333 info_dict['__write_download_archive'] = True
2d30509f 3334
c487cf00 3335 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3336 if self.params.get('force_write_download_archive'):
9e907ebd 3337 info_dict['__write_download_archive'] = True
ca6d59d2 3338 check_max_downloads()
8222d8de 3339
aa9369a2 3340 def __download_wrapper(self, func):
3341 @functools.wraps(func)
3342 def wrapper(*args, **kwargs):
3343 try:
3344 res = func(*args, **kwargs)
3345 except UnavailableVideoError as e:
3346 self.report_error(e)
b222c271 3347 except DownloadCancelled as e:
3348 self.to_screen(f'[info] {e}')
3349 if not self.params.get('break_per_url'):
3350 raise
fd404bec 3351 self._num_downloads = 0
aa9369a2 3352 else:
3353 if self.params.get('dump_single_json', False):
3354 self.post_extract(res)
3355 self.to_stdout(json.dumps(self.sanitize_info(res)))
3356 return wrapper
3357
8222d8de
JMF
3358 def download(self, url_list):
3359 """Download a given list of URLs."""
aa9369a2 3360 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3361 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3362 if (len(url_list) > 1
3363 and outtmpl != '-'
3364 and '%' not in outtmpl
3365 and self.params.get('max_downloads') != 1):
acd69589 3366 raise SameFileError(outtmpl)
8222d8de
JMF
3367
3368 for url in url_list:
aa9369a2 3369 self.__download_wrapper(self.extract_info)(
3370 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3371
3372 return self._download_retcode
3373
1dcc4c0c 3374 def download_with_info_file(self, info_filename):
31bd3925
JMF
3375 with contextlib.closing(fileinput.FileInput(
3376 [info_filename], mode='r',
3377 openhook=fileinput.hook_encoded('utf-8'))) as f:
3378 # FileInput doesn't have a read method, we can't call json.load
8012d892 3379 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3380 try:
aa9369a2 3381 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3382 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3383 if not isinstance(e, EntryNotInPlaylist):
3384 self.to_stderr('\r')
d4943898
JMF
3385 webpage_url = info.get('webpage_url')
3386 if webpage_url is not None:
aa9369a2 3387 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3388 return self.download([webpage_url])
3389 else:
3390 raise
3391 return self._download_retcode
1dcc4c0c 3392
cb202fd2 3393 @staticmethod
8012d892 3394 def sanitize_info(info_dict, remove_private_keys=False):
3395 ''' Sanitize the infodict for converting to json '''
3ad56b42 3396 if info_dict is None:
3397 return info_dict
6e84b215 3398 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3399 info_dict.setdefault('_type', 'video')
b5e7a2e6 3400 info_dict.setdefault('_version', {
3401 'version': __version__,
3402 'current_git_head': current_git_head(),
3403 'release_git_head': RELEASE_GIT_HEAD,
3404 'repository': REPOSITORY,
3405 })
09b49e1f 3406
8012d892 3407 if remove_private_keys:
0a5a191a 3408 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3409 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
0a5a191a 3410 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
08e29b9f 3411 '_format_sort_fields',
6e84b215 3412 }
ae8f99e6 3413 else:
09b49e1f 3414 reject = lambda k, v: False
adbc4ec4
THD
3415
3416 def filter_fn(obj):
3417 if isinstance(obj, dict):
3418 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3419 elif isinstance(obj, (list, tuple, set, LazyList)):
3420 return list(map(filter_fn, obj))
3421 elif obj is None or isinstance(obj, (str, int, float, bool)):
3422 return obj
3423 else:
3424 return repr(obj)
3425
5226731e 3426 return filter_fn(info_dict)
cb202fd2 3427
8012d892 3428 @staticmethod
3429 def filter_requested_info(info_dict, actually_filter=True):
3430 ''' Alias of sanitize_info for backward compatibility '''
3431 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3432
43d7f5a5 3433 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3434 for filename in set(filter(None, files_to_delete)):
3435 if msg:
3436 self.to_screen(msg % filename)
3437 try:
3438 os.remove(filename)
3439 except OSError:
3440 self.report_warning(f'Unable to delete file {filename}')
3441 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3442 del info['__files_to_move'][filename]
3443
ed5835b4 3444 @staticmethod
3445 def post_extract(info_dict):
3446 def actual_post_extract(info_dict):
3447 if info_dict.get('_type') in ('playlist', 'multi_video'):
3448 for video_dict in info_dict.get('entries', {}):
3449 actual_post_extract(video_dict or {})
3450 return
3451
09b49e1f 3452 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3453 info_dict.update(post_extractor())
ed5835b4 3454
3455 actual_post_extract(info_dict or {})
3456
dcf64d43 3457 def run_pp(self, pp, infodict):
5bfa4862 3458 files_to_delete = []
dcf64d43 3459 if '__files_to_move' not in infodict:
3460 infodict['__files_to_move'] = {}
b1940459 3461 try:
3462 files_to_delete, infodict = pp.run(infodict)
3463 except PostProcessingError as e:
3464 # Must be True and not 'only_download'
3465 if self.params.get('ignoreerrors') is True:
3466 self.report_error(e)
3467 return infodict
3468 raise
3469
5bfa4862 3470 if not files_to_delete:
dcf64d43 3471 return infodict
5bfa4862 3472 if self.params.get('keepvideo', False):
3473 for f in files_to_delete:
dcf64d43 3474 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3475 else:
43d7f5a5 3476 self._delete_downloaded_files(
3477 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3478 return infodict
5bfa4862 3479
ed5835b4 3480 def run_all_pps(self, key, info, *, additional_pps=None):
193fb150 3481 if key != 'video':
3482 self._forceprint(key, info)
ed5835b4 3483 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3484 info = self.run_pp(pp, info)
ed5835b4 3485 return info
277d6ff5 3486
56d868db 3487 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3488 info = dict(ie_info)
56d868db 3489 info['__files_to_move'] = files_to_move or {}
415f8d51 3490 try:
3491 info = self.run_all_pps(key, info)
3492 except PostProcessingError as err:
3493 msg = f'Preprocessing: {err}'
3494 info.setdefault('__pending_error', msg)
3495 self.report_error(msg, is_error=False)
56d868db 3496 return info, info.pop('__files_to_move', None)
5bfa4862 3497
f46e2f9d 3498 def post_process(self, filename, info, files_to_move=None):
8222d8de 3499 """Run all the postprocessors on the given file."""
8222d8de 3500 info['filepath'] = filename
dcf64d43 3501 info['__files_to_move'] = files_to_move or {}
ed5835b4 3502 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3503 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3504 del info['__files_to_move']
ed5835b4 3505 return self.run_all_pps('after_move', info)
c1c9a79c 3506
5db07df6 3507 def _make_archive_id(self, info_dict):
e9fef7ee
S
3508 video_id = info_dict.get('id')
3509 if not video_id:
3510 return
5db07df6
PH
3511 # Future-proof against any change in case
3512 # and backwards compatibility with prior versions
e9fef7ee 3513 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3514 if extractor is None:
1211bb6d
S
3515 url = str_or_none(info_dict.get('url'))
3516 if not url:
3517 return
e9fef7ee 3518 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3519 for ie_key, ie in self._ies.items():
1211bb6d 3520 if ie.suitable(url):
8b7491c8 3521 extractor = ie_key
e9fef7ee
S
3522 break
3523 else:
3524 return
0647d925 3525 return make_archive_id(extractor, video_id)
5db07df6
PH
3526
3527 def in_download_archive(self, info_dict):
ae103564 3528 if not self.archive:
5db07df6
PH
3529 return False
3530
1e8fe57e 3531 vid_ids = [self._make_archive_id(info_dict)]
c200096c 3532 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
1e8fe57e 3533 return any(id_ in self.archive for id_ in vid_ids)
c1c9a79c
PH
3534
3535 def record_download_archive(self, info_dict):
3536 fn = self.params.get('download_archive')
3537 if fn is None:
3538 return
5db07df6
PH
3539 vid_id = self._make_archive_id(info_dict)
3540 assert vid_id
ae103564 3541
a13e6848 3542 self.write_debug(f'Adding to archive: {vid_id}')
9c935fbc 3543 if is_path_like(fn):
ae103564 3544 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3545 archive_file.write(vid_id + '\n')
a45e8619 3546 self.archive.add(vid_id)
dd82ffea 3547
8c51aa65 3548 @staticmethod
8abeeb94 3549 def format_resolution(format, default='unknown'):
9359f3d4 3550 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3551 return 'audio only'
f49d89ee
PH
3552 if format.get('resolution') is not None:
3553 return format['resolution']
35615307 3554 if format.get('width') and format.get('height'):
ff51ed58 3555 return '%dx%d' % (format['width'], format['height'])
35615307 3556 elif format.get('height'):
ff51ed58 3557 return '%sp' % format['height']
35615307 3558 elif format.get('width'):
ff51ed58 3559 return '%dx?' % format['width']
3560 return default
8c51aa65 3561
8130779d 3562 def _list_format_headers(self, *headers):
3563 if self.params.get('listformats_table', True) is not False:
591bb9d3 3564 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3565 return headers
3566
c57f7757
PH
3567 def _format_note(self, fdict):
3568 res = ''
3569 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3570 res += '(unsupported)'
32f90364
PH
3571 if fdict.get('language'):
3572 if res:
3573 res += ' '
f304da8a 3574 res += '[%s]' % fdict['language']
c57f7757 3575 if fdict.get('format_note') is not None:
f304da8a 3576 if res:
3577 res += ' '
3578 res += fdict['format_note']
c57f7757 3579 if fdict.get('tbr') is not None:
f304da8a 3580 if res:
3581 res += ', '
3582 res += '%4dk' % fdict['tbr']
c57f7757
PH
3583 if fdict.get('container') is not None:
3584 if res:
3585 res += ', '
3586 res += '%s container' % fdict['container']
3089bc74
S
3587 if (fdict.get('vcodec') is not None
3588 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3589 if res:
3590 res += ', '
3591 res += fdict['vcodec']
91c7271a 3592 if fdict.get('vbr') is not None:
c57f7757
PH
3593 res += '@'
3594 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3595 res += 'video@'
3596 if fdict.get('vbr') is not None:
3597 res += '%4dk' % fdict['vbr']
fbb21cf5 3598 if fdict.get('fps') is not None:
5d583bdf
S
3599 if res:
3600 res += ', '
3601 res += '%sfps' % fdict['fps']
c57f7757
PH
3602 if fdict.get('acodec') is not None:
3603 if res:
3604 res += ', '
3605 if fdict['acodec'] == 'none':
3606 res += 'video only'
3607 else:
3608 res += '%-5s' % fdict['acodec']
3609 elif fdict.get('abr') is not None:
3610 if res:
3611 res += ', '
3612 res += 'audio'
3613 if fdict.get('abr') is not None:
3614 res += '@%3dk' % fdict['abr']
3615 if fdict.get('asr') is not None:
3616 res += ' (%5dHz)' % fdict['asr']
3617 if fdict.get('filesize') is not None:
3618 if res:
3619 res += ', '
3620 res += format_bytes(fdict['filesize'])
9732d77e
PH
3621 elif fdict.get('filesize_approx') is not None:
3622 if res:
3623 res += ', '
3624 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3625 return res
91c7271a 3626
aebb4f4b 3627 def _get_formats(self, info_dict):
3628 if info_dict.get('formats') is None:
3629 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3630 return [info_dict]
3631 return []
3632 return info_dict['formats']
b69fd25c 3633
aebb4f4b 3634 def render_formats_table(self, info_dict):
3635 formats = self._get_formats(info_dict)
3636 if not formats:
3637 return
8130779d 3638 if not self.params.get('listformats_table', True) is not False:
76d321f6 3639 table = [
3640 [
3641 format_field(f, 'format_id'),
3642 format_field(f, 'ext'),
3643 self.format_resolution(f),
8130779d 3644 self._format_note(f)
d5d1df8a 3645 ] for f in formats if (f.get('preference') or 0) >= -1000]
8130779d 3646 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3647
d816f61f 3648 def simplified_codec(f, field):
3649 assert field in ('acodec', 'vcodec')
3650 codec = f.get(field, 'unknown')
f5ea4748 3651 if not codec:
3652 return 'unknown'
3653 elif codec != 'none':
d816f61f 3654 return '.'.join(codec.split('.')[:4])
3655
3656 if field == 'vcodec' and f.get('acodec') == 'none':
3657 return 'images'
3658 elif field == 'acodec' and f.get('vcodec') == 'none':
3659 return ''
3660 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3661 self.Styles.SUPPRESS)
3662
591bb9d3 3663 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3664 table = [
3665 [
591bb9d3 3666 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3667 format_field(f, 'ext'),
3668 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
563e0bf8 3669 format_field(f, 'fps', '\t%d', func=round),
8130779d 3670 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
b8ed0f15 3671 format_field(f, 'audio_channels', '\t%s'),
8130779d 3672 delim,
3673 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
563e0bf8 3674 format_field(f, 'tbr', '\t%dk', func=round),
8130779d 3675 shorten_protocol_name(f.get('protocol', '')),
3676 delim,
d816f61f 3677 simplified_codec(f, 'vcodec'),
563e0bf8 3678 format_field(f, 'vbr', '\t%dk', func=round),
d816f61f 3679 simplified_codec(f, 'acodec'),
563e0bf8 3680 format_field(f, 'abr', '\t%dk', func=round),
ae61d108 3681 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
8130779d 3682 join_nonempty(
591bb9d3 3683 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
a5387729 3684 self._format_out('DRM', 'light red') if f.get('has_drm') else None,
8130779d 3685 format_field(f, 'language', '[%s]'),
3686 join_nonempty(format_field(f, 'format_note'),
3687 format_field(f, 'container', ignore=(None, f.get('ext'))),
3688 delim=', '),
3689 delim=' '),
3690 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3691 header_line = self._list_format_headers(
b8ed0f15 3692 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
8130779d 3693 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3694
3695 return render_table(
3696 header_line, table, hide_empty=True,
591bb9d3 3697 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3698
3699 def render_thumbnails_table(self, info_dict):
88f23a18 3700 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3701 if not thumbnails:
8130779d 3702 return None
3703 return render_table(
ec11a9f4 3704 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
177662e0 3705 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
2412044c 3706
8130779d 3707 def render_subtitles_table(self, video_id, subtitles):
2412044c 3708 def _row(lang, formats):
49c258e1 3709 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3710 if len(set(names)) == 1:
7aee40c1 3711 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3712 return [lang, ', '.join(names), ', '.join(exts)]
3713
8130779d 3714 if not subtitles:
3715 return None
3716 return render_table(
ec11a9f4 3717 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3718 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3719 hide_empty=True)
3720
3721 def __list_table(self, video_id, name, func, *args):
3722 table = func(*args)
3723 if not table:
3724 self.to_screen(f'{video_id} has no {name}')
3725 return
3726 self.to_screen(f'[info] Available {name} for {video_id}:')
3727 self.to_stdout(table)
3728
3729 def list_formats(self, info_dict):
3730 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3731
3732 def list_thumbnails(self, info_dict):
3733 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3734
3735 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3736 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3737
dca08720
PH
3738 def urlopen(self, req):
3739 """ Start an HTTP download """
f9934b96 3740 if isinstance(req, str):
67dda517 3741 req = sanitized_Request(req)
19a41fc6 3742 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3743
3744 def print_debug_header(self):
3745 if not self.params.get('verbose'):
3746 return
49a57e70 3747
a057779d 3748 from . import _IN_CLI # Must be delayed import
3749
560738f3 3750 # These imports can be slow. So import them only as needed
3751 from .extractor.extractors import _LAZY_LOADER
e756f45b
M
3752 from .extractor.extractors import (
3753 _PLUGIN_CLASSES as plugin_ies,
3754 _PLUGIN_OVERRIDES as plugin_ie_overrides
3755 )
560738f3 3756
49a57e70 3757 def get_encoding(stream):
2a938746 3758 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3759 if not supports_terminal_sequences(stream):
53973b4d 3760 from .utils import WINDOWS_VT_MODE # Must be imported locally
e3c7d495 3761 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3762 return ret
3763
591bb9d3 3764 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3765 locale.getpreferredencoding(),
3766 sys.getfilesystemencoding(),
591bb9d3 3767 self.get_encoding(),
3768 ', '.join(
64fa820c 3769 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
591bb9d3 3770 if stream is not None and key != 'console')
3771 )
883d4b1e 3772
3773 logger = self.params.get('logger')
3774 if logger:
3775 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3776 write_debug(encoding_str)
3777 else:
96565c7e 3778 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3779 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3780
4c88ff87 3781 source = detect_variant()
70b23409 3782 if VARIANT not in (None, 'pip'):
3783 source += '*'
a5387729 3784 klass = type(self)
36eaf303 3785 write_debug(join_nonempty(
b5e7a2e6 3786 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
392389b7 3787 f'{CHANNEL}@{__version__}',
29cb20bd 3788 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
36eaf303 3789 '' if source == 'unknown' else f'({source})',
a5387729 3790 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
36eaf303 3791 delim=' '))
497074f0 3792
3793 if not _IN_CLI:
3794 write_debug(f'params: {self.params}')
3795
6e21fdd2 3796 if not _LAZY_LOADER:
3797 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3798 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3799 else:
49a57e70 3800 write_debug('Lazy loading extractors is disabled')
8a82af35 3801 if self.params['compat_opts']:
3802 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
36eaf303 3803
b5e7a2e6 3804 if current_git_head():
3805 write_debug(f'Git HEAD: {current_git_head()}')
b1f94422 3806 write_debug(system_identifier())
d28b5171 3807
8913ef74 3808 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3809 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3810 if ffmpeg_features:
19a03940 3811 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3812
4c83c967 3813 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3814 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3815 exe_str = ', '.join(
2831b468 3816 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3817 ) or 'none'
49a57e70 3818 write_debug('exe versions: %s' % exe_str)
dca08720 3819
1d485a1a 3820 from .compat.compat_utils import get_package_info
9b8ee23b 3821 from .dependencies import available_dependencies
3822
3823 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3824 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3825 })) or 'none'))
2831b468 3826
97ec5bc5 3827 self._setup_opener()
dca08720
PH
3828 proxy_map = {}
3829 for handler in self._opener.handlers:
3830 if hasattr(handler, 'proxies'):
3831 proxy_map.update(handler.proxies)
49a57e70 3832 write_debug(f'Proxy map: {proxy_map}')
dca08720 3833
e756f45b
M
3834 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3835 display_list = ['%s%s' % (
8e40b9d1 3836 klass.__name__, '' if klass.__name__ == name else f' as {name}')
e756f45b
M
3837 for name, klass in plugins.items()]
3838 if plugin_type == 'Extractor':
3839 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3840 for parent, plugins in plugin_ie_overrides.items())
3841 if not display_list:
3842 continue
3843 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3844
8e40b9d1
M
3845 plugin_dirs = plugin_directories()
3846 if plugin_dirs:
3847 write_debug(f'Plugin directories: {plugin_dirs}')
3848
49a57e70 3849 # Not implemented
3850 if False and self.params.get('call_home'):
0f06bcd7 3851 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 3852 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 3853 latest_version = self.urlopen(
0f06bcd7 3854 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
3855 if version_tuple(latest_version) > version_tuple(__version__):
3856 self.report_warning(
3857 'You are using an outdated version (newest version: %s)! '
3858 'See https://yt-dl.org/update if you need help updating.' %
3859 latest_version)
3860
e344693b 3861 def _setup_opener(self):
97ec5bc5 3862 if hasattr(self, '_opener'):
3863 return
6ad14cab 3864 timeout_val = self.params.get('socket_timeout')
17bddf3e 3865 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3866
982ee69a 3867 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3868 opts_cookiefile = self.params.get('cookiefile')
3869 opts_proxy = self.params.get('proxy')
3870
982ee69a 3871 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3872
6a3f4c3f 3873 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3874 if opts_proxy is not None:
3875 if opts_proxy == '':
3876 proxies = {}
3877 else:
3878 proxies = {'http': opts_proxy, 'https': opts_proxy}
3879 else:
ac668111 3880 proxies = urllib.request.getproxies()
067aa17e 3881 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3882 if 'http' in proxies and 'https' not in proxies:
3883 proxies['https'] = proxies['http']
91410c9b 3884 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3885
3886 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3887 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3888 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3889 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3890 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3891
3892 # When passing our own FileHandler instance, build_opener won't add the
3893 # default FileHandler and allows us to disable the file protocol, which
3894 # can be used for malicious purposes (see
067aa17e 3895 # https://github.com/ytdl-org/youtube-dl/issues/8227)
ac668111 3896 file_handler = urllib.request.FileHandler()
6240b0a2 3897
8300774c
M
3898 if not self.params.get('enable_file_urls'):
3899 def file_open(*args, **kwargs):
3900 raise urllib.error.URLError(
3901 'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
3902 'Use --enable-file-urls to enable at your own risk.')
3903 file_handler.file_open = file_open
6240b0a2 3904
ac668111 3905 opener = urllib.request.build_opener(
fca6dba8 3906 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3907
dca08720
PH
3908 # Delete the default user-agent header, which would otherwise apply in
3909 # cases where our custom HTTP handler doesn't come into play
067aa17e 3910 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3911 opener.addheaders = []
3912 self._opener = opener
62fec3b2
PH
3913
3914 def encode(self, s):
3915 if isinstance(s, bytes):
3916 return s # Already encoded
3917
3918 try:
3919 return s.encode(self.get_encoding())
3920 except UnicodeEncodeError as err:
3921 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3922 raise
3923
3924 def get_encoding(self):
3925 encoding = self.params.get('encoding')
3926 if encoding is None:
3927 encoding = preferredencoding()
3928 return encoding
ec82d85a 3929
e08a85d8 3930 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3931 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3932 if overwrite is None:
3933 overwrite = self.params.get('overwrites', True)
80c03fa9 3934 if not self.params.get('writeinfojson'):
3935 return False
3936 elif not infofn:
3937 self.write_debug(f'Skipping writing {label} infojson')
3938 return False
3939 elif not self._ensure_dir_exists(infofn):
3940 return None
e08a85d8 3941 elif not overwrite and os.path.exists(infofn):
80c03fa9 3942 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3943 return 'exists'
3944
3945 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3946 try:
3947 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3948 return True
86e5f3ed 3949 except OSError:
cb96c5be 3950 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3951 return None
80c03fa9 3952
3953 def _write_description(self, label, ie_result, descfn):
3954 ''' Write description and returns True = written, False = skip, None = error '''
3955 if not self.params.get('writedescription'):
3956 return False
3957 elif not descfn:
3958 self.write_debug(f'Skipping writing {label} description')
3959 return False
3960 elif not self._ensure_dir_exists(descfn):
3961 return None
3962 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3963 self.to_screen(f'[info] {label.title()} description is already present')
3964 elif ie_result.get('description') is None:
88fb9425 3965 self.to_screen(f'[info] There\'s no {label} description to write')
80c03fa9 3966 return False
3967 else:
3968 try:
3969 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 3970 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 3971 descfile.write(ie_result['description'])
86e5f3ed 3972 except OSError:
80c03fa9 3973 self.report_error(f'Cannot write {label} description file {descfn}')
3974 return None
3975 return True
3976
3977 def _write_subtitles(self, info_dict, filename):
3978 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3979 ret = []
3980 subtitles = info_dict.get('requested_subtitles')
88fb9425 3981 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
80c03fa9 3982 # subtitles download errors are already managed as troubles in relevant IE
3983 # that way it will silently go on when used with unsupporting IE
3984 return ret
88fb9425 3985 elif not subtitles:
3986 self.to_screen('[info] There\'s no subtitles for the requested languages')
3987 return ret
80c03fa9 3988 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3989 if not sub_filename_base:
3990 self.to_screen('[info] Skipping writing video subtitles')
3991 return ret
88fb9425 3992
80c03fa9 3993 for sub_lang, sub_info in subtitles.items():
3994 sub_format = sub_info['ext']
3995 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3996 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3997 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3998 if existing_sub:
80c03fa9 3999 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 4000 sub_info['filepath'] = existing_sub
4001 ret.append((existing_sub, sub_filename_final))
80c03fa9 4002 continue
4003
4004 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4005 if sub_info.get('data') is not None:
4006 try:
4007 # Use newline='' to prevent conversion of newline characters
4008 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 4009 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 4010 subfile.write(sub_info['data'])
4011 sub_info['filepath'] = sub_filename
4012 ret.append((sub_filename, sub_filename_final))
4013 continue
86e5f3ed 4014 except OSError:
80c03fa9 4015 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4016 return None
4017
4018 try:
4019 sub_copy = sub_info.copy()
4020 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4021 self.dl(sub_filename, sub_copy, subtitle=True)
4022 sub_info['filepath'] = sub_filename
4023 ret.append((sub_filename, sub_filename_final))
6020e05d 4024 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 4025 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 4026 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 4027 if not self.params.get('ignoreerrors'):
4028 self.report_error(msg)
4029 raise DownloadError(msg)
4030 self.report_warning(msg)
519804a9 4031 return ret
80c03fa9 4032
4033 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4034 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 4035 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 4036 thumbnails, ret = [], []
6c4fd172 4037 if write_all or self.params.get('writethumbnail', False):
0202b52a 4038 thumbnails = info_dict.get('thumbnails') or []
88fb9425 4039 if not thumbnails:
4040 self.to_screen(f'[info] There\'s no {label} thumbnails to download')
4041 return ret
6c4fd172 4042 multiple = write_all and len(thumbnails) > 1
ec82d85a 4043
80c03fa9 4044 if thumb_filename_base is None:
4045 thumb_filename_base = filename
4046 if thumbnails and not thumb_filename_base:
4047 self.write_debug(f'Skipping writing {label} thumbnail')
4048 return ret
4049
dd0228ce 4050 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 4051 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 4052 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 4053 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4054 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 4055
e04938ab 4056 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4057 if existing_thumb:
aa9369a2 4058 self.to_screen('[info] %s is already present' % (
4059 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 4060 t['filepath'] = existing_thumb
4061 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 4062 else:
80c03fa9 4063 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 4064 try:
297e9952 4065 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 4066 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 4067 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 4068 shutil.copyfileobj(uf, thumbf)
80c03fa9 4069 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 4070 t['filepath'] = thumb_filename
3158150c 4071 except network_exceptions as err:
dd0228ce 4072 thumbnails.pop(idx)
80c03fa9 4073 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 4074 if ret and not write_all:
4075 break
0202b52a 4076 return ret