]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Improve plugin architecture (#5553)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
26e63931 1import collections
31bd3925 2import contextlib
9d2ecdbc 3import datetime
c1c9a79c 4import errno
31bd3925 5import fileinput
b5ae35ee 6import functools
8222d8de 7import io
b82f815f 8import itertools
8694c600 9import json
62fec3b2 10import locale
083c9df9 11import operator
8222d8de 12import os
f8271158 13import random
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de 17import sys
21cd8fae 18import tempfile
8222d8de 19import time
67134eab 20import tokenize
8222d8de 21import traceback
524e2e4f 22import unicodedata
f9934b96 23import urllib.request
961ea474
S
24from string import ascii_letters
25
f8271158 26from .cache import Cache
14f25df2 27from .compat import compat_os_name, compat_shlex_quote
982ee69a 28from .cookies import load_cookies
f8271158 29from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
30from .downloader.rtmp import rtmpdump_version
f8271158 31from .extractor import gen_extractor_classes, get_info_extractor
fe7866d0 32from .extractor.common import UnsupportedURLIE
f8271158 33from .extractor.openload import PhantomJSwrapper
34from .minicurses import format_text
8e40b9d1 35from .plugins import directories as plugin_directories
f8271158 36from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
37from .postprocessor import (
38 EmbedThumbnailPP,
39 FFmpegFixupDuplicateMoovPP,
40 FFmpegFixupDurationPP,
41 FFmpegFixupM3u8PP,
42 FFmpegFixupM4aPP,
43 FFmpegFixupStretchedPP,
44 FFmpegFixupTimestampPP,
45 FFmpegMergerPP,
46 FFmpegPostProcessor,
ca9def71 47 FFmpegVideoConvertorPP,
f8271158 48 MoveFilesAfterDownloadPP,
49 get_postprocessor,
50)
ca9def71 51from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
b5e7a2e6 52from .update import REPOSITORY, current_git_head, detect_variant
8c25f81b 53from .utils import (
f8271158 54 DEFAULT_OUTTMPL,
7b2c3f47 55 IDENTITY,
f8271158 56 LINK_TEMPLATES,
8dc59305 57 MEDIA_EXTENSIONS,
f8271158 58 NO_DEFAULT,
1d485a1a 59 NUMBER_RE,
f8271158 60 OUTTMPL_TYPES,
61 POSTPROCESS_WHEN,
62 STR_FORMAT_RE_TMPL,
63 STR_FORMAT_TYPES,
64 ContentTooShortError,
65 DateRange,
66 DownloadCancelled,
67 DownloadError,
68 EntryNotInPlaylist,
69 ExistingVideoReached,
70 ExtractorError,
784320c9 71 FormatSorter,
f8271158 72 GeoRestrictedError,
73 HEADRequest,
f8271158 74 ISO3166Utils,
75 LazyList,
76 MaxDownloadsReached,
19a03940 77 Namespace,
f8271158 78 PagedList,
79 PerRequestProxyHandler,
7e88d7d7 80 PlaylistEntries,
f8271158 81 Popen,
82 PostProcessingError,
83 ReExtractInfo,
84 RejectedVideoReached,
85 SameFileError,
86 UnavailableVideoError,
693f0600 87 UserNotLive,
f8271158 88 YoutubeDLCookieProcessor,
89 YoutubeDLHandler,
90 YoutubeDLRedirectHandler,
eedb7ba5
S
91 age_restricted,
92 args_to_str,
cb794ee0 93 bug_reports_message,
ce02ed60 94 date_from_str,
da4db748 95 deprecation_warning,
ce02ed60 96 determine_ext,
b5559424 97 determine_protocol,
c0384f22 98 encode_compat_str,
ce02ed60 99 encodeFilename,
a06916d9 100 error_to_compat_str,
47cdc68e 101 escapeHTML,
590bc6f6 102 expand_path,
90137ca4 103 filter_dict,
e29663c6 104 float_or_none,
02dbf93f 105 format_bytes,
e0fd9573 106 format_decimal_suffix,
f8271158 107 format_field,
525ef922 108 formatSeconds,
fc61aff4 109 get_compatible_ext,
0bb322b9 110 get_domain,
c9969434 111 int_or_none,
732044af 112 iri_to_uri,
941e881e 113 is_path_like,
34921b43 114 join_nonempty,
ce02ed60 115 locked_file,
0647d925 116 make_archive_id,
0202b52a 117 make_dir,
dca08720 118 make_HTTPS_handler,
8b7539d2 119 merge_headers,
3158150c 120 network_exceptions,
ec11a9f4 121 number_of_digits,
cd6fc19e 122 orderedSet,
5314b521 123 orderedSet_from_options,
083c9df9 124 parse_filesize,
ce02ed60 125 preferredencoding,
eedb7ba5 126 prepend_extension,
51fb4995 127 register_socks_protocols,
3efb96a6 128 remove_terminal_sequences,
cfb56d1a 129 render_table,
eedb7ba5 130 replace_extension,
ce02ed60 131 sanitize_filename,
1bb5c511 132 sanitize_path,
dcf77cf1 133 sanitize_url,
67dda517 134 sanitized_Request,
e5660ee6 135 std_headers,
1211bb6d 136 str_or_none,
e29663c6 137 strftime_or_none,
ce02ed60 138 subtitles_filename,
819e0531 139 supports_terminal_sequences,
b1f94422 140 system_identifier,
f2ebc5c7 141 timetuple_from_msec,
732044af 142 to_high_limit_path,
324ad820 143 traverse_obj,
fc61aff4 144 try_call,
6033d980 145 try_get,
29eb5174 146 url_basename,
7d1eb38a 147 variadic,
58b1f00d 148 version_tuple,
53973b4d 149 windows_enable_vt_mode,
ce02ed60
PH
150 write_json_file,
151 write_string,
4f026faf 152)
70b23409 153from .version import RELEASE_GIT_HEAD, VARIANT, __version__
8222d8de 154
e9c0cdd3
YCH
155if compat_os_name == 'nt':
156 import ctypes
157
2459b6e1 158
86e5f3ed 159class YoutubeDL:
8222d8de
JMF
160 """YoutubeDL class.
161
162 YoutubeDL objects are the ones responsible of downloading the
163 actual video file and writing it to disk if the user has requested
164 it, among some other tasks. In most cases there should be one per
165 program. As, given a video URL, the downloader doesn't know how to
166 extract all the needed information, task that InfoExtractors do, it
167 has to pass the URL to one of them.
168
169 For this, YoutubeDL objects have a method that allows
170 InfoExtractors to be registered in a given order. When it is passed
171 a URL, the YoutubeDL object handles it to the first InfoExtractor it
172 finds that reports being able to handle it. The InfoExtractor extracts
173 all the information about the video or videos the URL refers to, and
174 YoutubeDL process the extracted information, possibly using a File
175 Downloader to download the video.
176
177 YoutubeDL objects accept a lot of parameters. In order not to saturate
178 the object constructor with arguments, it receives a dictionary of
179 options instead. These options are available through the params
180 attribute for the InfoExtractors to use. The YoutubeDL also
181 registers itself as the downloader in charge for the InfoExtractors
182 that are added to it, so this is a "mutual registration".
183
184 Available options:
185
186 username: Username for authentication purposes.
187 password: Password for authentication purposes.
180940e0 188 videopassword: Password for accessing a video.
1da50aa3
S
189 ap_mso: Adobe Pass multiple-system operator identifier.
190 ap_username: Multiple-system operator account username.
191 ap_password: Multiple-system operator account password.
8222d8de
JMF
192 usenetrc: Use netrc for authentication instead.
193 verbose: Print additional info to stdout.
194 quiet: Do not print messages to stdout.
ad8915b7 195 no_warnings: Do not print out anything for warnings.
bb66c247 196 forceprint: A dict with keys WHEN mapped to a list of templates to
197 print to stdout. The allowed keys are video or any of the
198 items in utils.POSTPROCESS_WHEN.
ca30f449 199 For compatibility, a single list is also accepted
bb66c247 200 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
201 a list of tuples with (template, filename)
8694c600 202 forcejson: Force printing info_dict as JSON.
63e0be34
PH
203 dump_single_json: Force printing the info_dict of the whole playlist
204 (or video) as a single JSON line.
c25228e5 205 force_write_download_archive: Force writing download archive regardless
206 of 'skip_download' or 'simulate'.
b7b04c78 207 simulate: Do not download the video files. If unset (or None),
208 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 209 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 210 You can also pass a function. The function takes 'ctx' as
211 argument and returns the formats to download.
212 See "build_format_selector" for an implementation
63ad4d43 213 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 214 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
215 extracting metadata even if the video is not actually
216 available for download (experimental)
0930b11f 217 format_sort: A list of fields by which to sort the video formats.
218 See "Sorting Formats" for more details.
c25228e5 219 format_sort_force: Force the given format_sort. see "Sorting Formats"
220 for more details.
08d30158 221 prefer_free_formats: Whether to prefer video formats with free containers
222 over non-free ones of same quality.
c25228e5 223 allow_multiple_video_streams: Allow multiple video streams to be merged
224 into a single file
225 allow_multiple_audio_streams: Allow multiple audio streams to be merged
226 into a single file
0ba692ac 227 check_formats Whether to test if the formats are downloadable.
9f1a1c36 228 Can be True (check all), False (check none),
229 'selected' (check selected formats),
0ba692ac 230 or None (check only if requested by extractor)
4524baf0 231 paths: Dictionary of output paths. The allowed keys are 'home'
232 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 233 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 234 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 235 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
236 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
237 restrictfilenames: Do not allow "&" and spaces in file names
238 trim_file_name: Limit length of filename (extension excluded)
4524baf0 239 windowsfilenames: Force the filenames to be windows compatible
b1940459 240 ignoreerrors: Do not stop on download/postprocessing errors.
241 Can be 'only_download' to ignore only download errors.
242 Default is 'only_download' for CLI, but False for API
26e2805c 243 skip_playlist_after_errors: Number of allowed failures until the rest of
244 the playlist is skipped
fe7866d0 245 allowed_extractors: List of regexes to match against extractor names that are allowed
0c3d0f51 246 overwrites: Overwrite all video and metadata files if True,
247 overwrite only non-video files if None
248 and don't overwrite any file if False
34488702 249 For compatibility with youtube-dl,
250 "nooverwrites" may also be used instead
c14e88f0 251 playlist_items: Specific indices of playlist to download.
75822ca7 252 playlistrandom: Download playlist items in random order.
7e9a6125 253 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
254 matchtitle: Download only matching titles.
255 rejecttitle: Reject downloads for matching titles.
8bf9319e 256 logger: Log messages to a logging.Logger instance.
17ffed18 257 logtostderr: Print everything to stderr instead of stdout.
258 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
259 writedescription: Write the video description to a .description file
260 writeinfojson: Write the video description to a .info.json file
75d43ca0 261 clean_infojson: Remove private fields from the infojson
34488702 262 getcomments: Extract video comments. This will not be written to disk
06167fbb 263 unless writeinfojson is also given
1fb07d10 264 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 265 writethumbnail: Write the thumbnail image to a file
c25228e5 266 allow_playlist_files: Whether to write playlists' description, infojson etc
267 also to disk when using the 'write*' options
ec82d85a 268 write_all_thumbnails: Write all thumbnail formats to files
732044af 269 writelink: Write an internet shortcut file, depending on the
270 current platform (.url/.webloc/.desktop)
271 writeurllink: Write a Windows internet shortcut file (.url)
272 writewebloclink: Write a macOS internet shortcut file (.webloc)
273 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 274 writesubtitles: Write the video subtitles to a file
741dd8ea 275 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 276 listsubtitles: Lists all available subtitles for the video
a504ced0 277 subtitlesformat: The format code for subtitles
c32b0aab 278 subtitleslangs: List of languages of the subtitles to download (can be regex).
279 The list may contain "all" to refer to all the available
280 subtitles. The language can be prefixed with a "-" to
62b58c09 281 exclude it from the requested languages, e.g. ['all', '-live_chat']
8222d8de
JMF
282 keepvideo: Keep the video file after post-processing
283 daterange: A DateRange object, download only if the upload_date is in the range.
284 skip_download: Skip the actual download of the video file
c35f9e72 285 cachedir: Location of the cache files in the filesystem.
a0e07d31 286 False to disable filesystem cache.
47192f92 287 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
288 age_limit: An integer representing the user's age in years.
289 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
290 min_views: An integer representing the minimum view count the video
291 must have in order to not be skipped.
292 Videos without view count information are always
293 downloaded. None for no limit.
294 max_views: An integer representing the maximum view count.
295 Videos that are more popular than that are not
296 downloaded.
297 Videos without view count information are always
298 downloaded. None for no limit.
ae103564 299 download_archive: A set, or the name of a file where all downloads are recorded.
300 Videos already present in the file are not downloaded again.
8a51f564 301 break_on_existing: Stop the download process after attempting to download a
302 file that is in the archive.
303 break_on_reject: Stop the download process when encountering a video that
304 has been filtered out.
b222c271 305 break_per_url: Whether break_on_reject and break_on_existing
306 should act on each input URL as opposed to for the entire queue
d76fa1f3 307 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8 308 cookiesfrombrowser: A tuple containing the name of the browser, the profile
9bd13fe5 309 name/path from where cookies are loaded, the name of the keyring,
310 and the container name, e.g. ('chrome', ) or
311 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
f81c62a6 312 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
313 support RFC 5746 secure renegotiation
f59f5ef8 314 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 315 client_certificate: Path to client certificate file in PEM format. May include the private key
316 client_certificate_key: Path to private key file for client certificate
317 client_certificate_password: Password for client certificate private key, if encrypted.
318 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0 319 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
c6e07cf1 320 (Only supported by some extractors)
8b7539d2 321 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 322 proxy: URL of the proxy server to use
38cce791 323 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 324 on geo-restricted sites.
e344693b 325 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
326 bidi_workaround: Work around buggy terminals without bidirectional text
327 support, using fridibi
a0ddb8a2 328 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
329 default_search: Prepend this string if an input url is not valid.
330 'auto' for elaborate guessing
62fec3b2 331 encoding: Use this encoding instead of the system-specified.
134c913c 332 extract_flat: Whether to resolve and process url_results further
333 * False: Always process (default)
334 * True: Never process
335 * 'in_playlist': Do not process inside playlist/multi_video
336 * 'discard': Always process, but don't return the result
337 from inside playlist/multi_video
338 * 'discard_in_playlist': Same as "discard", but only for
339 playlists (not multi_video)
f2ebc5c7 340 wait_for_video: If given, wait for scheduled streams to become available.
341 The value should be a tuple containing the range
342 (min_secs, max_secs) to wait between retries
4f026faf 343 postprocessors: A list of dictionaries, each with an entry
71b640cc 344 * key: The name of the postprocessor. See
7a5c1cfe 345 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 346 * when: When to run the postprocessor. Allowed values are
347 the entries of utils.POSTPROCESS_WHEN
56d868db 348 Assumed to be 'post_process' if not given
71b640cc
PH
349 progress_hooks: A list of functions that get called on download
350 progress, with a dictionary with the entries
5cda4eda 351 * status: One of "downloading", "error", or "finished".
ee69b99a 352 Check this first and ignore unknown values.
3ba7740d 353 * info_dict: The extracted info_dict
71b640cc 354
5cda4eda 355 If status is one of "downloading", or "finished", the
ee69b99a
PH
356 following properties may also be present:
357 * filename: The final filename (always present)
5cda4eda 358 * tmpfilename: The filename we're currently writing to
71b640cc
PH
359 * downloaded_bytes: Bytes on disk
360 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
361 * total_bytes_estimate: Guess of the eventual file size,
362 None if unavailable.
363 * elapsed: The number of seconds since download started.
71b640cc
PH
364 * eta: The estimated time in seconds, None if unknown
365 * speed: The download speed in bytes/second, None if
366 unknown
5cda4eda
PH
367 * fragment_index: The counter of the currently
368 downloaded video fragment.
369 * fragment_count: The number of fragments (= individual
370 files that will be merged)
71b640cc
PH
371
372 Progress hooks are guaranteed to be called at least once
373 (with status "finished") if the download is successful.
819e0531 374 postprocessor_hooks: A list of functions that get called on postprocessing
375 progress, with a dictionary with the entries
376 * status: One of "started", "processing", or "finished".
377 Check this first and ignore unknown values.
378 * postprocessor: Name of the postprocessor
379 * info_dict: The extracted info_dict
380
381 Progress hooks are guaranteed to be called at least twice
382 (with status "started" and "finished") if the processing is successful.
fc61aff4 383 merge_output_format: "/" separated list of extensions to use when merging formats.
6b591b29 384 final_ext: Expected final extension; used to detect when the file was
59a7a13e 385 already downloaded and converted
6271f1ca
PH
386 fixup: Automatically correct known faults of the file.
387 One of:
388 - "never": do nothing
389 - "warn": only emit a warning
390 - "detect_or_warn": check whether we can do anything
62cd676c 391 about it, warn otherwise (default)
504f20dd 392 source_address: Client-side IP address to bind to.
1cf376f5 393 sleep_interval_requests: Number of seconds to sleep between requests
394 during extraction
7aa589a5
S
395 sleep_interval: Number of seconds to sleep before each download when
396 used alone or a lower bound of a range for randomized
397 sleep before each download (minimum possible number
398 of seconds to sleep) when used along with
399 max_sleep_interval.
400 max_sleep_interval:Upper bound of a range for randomized sleep before each
401 download (maximum possible number of seconds to sleep).
402 Must only be used along with sleep_interval.
403 Actual sleep time will be a random float from range
404 [sleep_interval; max_sleep_interval].
1cf376f5 405 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
406 listformats: Print an overview of available video formats and exit.
407 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 408 match_filter: A function that gets called for every video with the signature
409 (info_dict, *, incomplete: bool) -> Optional[str]
410 For backward compatibility with youtube-dl, the signature
411 (info_dict) -> Optional[str] is also allowed.
412 - If it returns a message, the video is ignored.
413 - If it returns None, the video is downloaded.
414 - If it returns utils.NO_DEFAULT, the user is interactively
415 asked whether to download the video.
347de493 416 match_filter_func in utils.py is one example for this.
7e5db8c9 417 no_color: Do not emit color codes in output.
0a840f58 418 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 419 HTTP header
0a840f58 420 geo_bypass_country:
773f291d
S
421 Two-letter ISO 3166-2 country code that will be used for
422 explicit geographic restriction bypassing via faking
504f20dd 423 X-Forwarded-For HTTP header
5f95927a
S
424 geo_bypass_ip_block:
425 IP range in CIDR notation that will be used similarly to
504f20dd 426 geo_bypass_country
52a8a1e1 427 external_downloader: A dictionary of protocol keys and the executable of the
428 external downloader to use for it. The allowed protocols
429 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
430 Set the value to 'native' to use the native downloader
53ed7066 431 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 432 The following options do not work when used through the API:
b5ae35ee 433 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 434 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 435 Refer __init__.py for their implementation
819e0531 436 progress_template: Dictionary of templates for progress outputs.
437 Allowed keys are 'download', 'postprocess',
438 'download-title' (console title) and 'postprocess-title'.
439 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 440 retry_sleep_functions: Dictionary of functions that takes the number of attempts
441 as argument and returns the time to sleep in seconds.
442 Allowed keys are 'http', 'fragment', 'file_access'
0f446365
SW
443 download_ranges: A callback function that gets called for every video with
444 the signature (info_dict, ydl) -> Iterable[Section].
445 Only the returned sections will be downloaded.
446 Each Section is a dict with the following keys:
5ec1b6b7 447 * start_time: Start time of the section in seconds
448 * end_time: End time of the section in seconds
449 * title: Section title (Optional)
450 * index: Section number (Optional)
0f446365 451 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
a7dc6a89 452 noprogress: Do not print the progress bar
a831c2ea 453 live_from_start: Whether to download livestreams videos from the start
fe7e0c98 454
8222d8de 455 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 456 the downloader (see yt_dlp/downloader/common.py):
51d9739f 457 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654 458 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
a7dc6a89 459 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 460 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
461
462 The following options are used by the post processors:
c0b7d117
S
463 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
464 to the binary or its containing directory.
43820c03 465 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 466 and a list of additional command-line arguments for the
467 postprocessor/executable. The dict can also have "PP+EXE" keys
468 which are used when the given exe is used by the given PP.
469 Use 'default' as the name for arguments to passed to all PP
470 For compatibility with youtube-dl, a single list of args
471 can also be used
e409895f 472
473 The following options are used by the extractors:
62bff2c1 474 extractor_retries: Number of times to retry for known errors
475 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 476 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 477 discontinuities such as ad breaks (default: False)
5d3a0e79 478 extractor_args: A dictionary of arguments to be passed to the extractors.
479 See "EXTRACTOR ARGUMENTS" for details.
62b58c09 480 E.g. {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 481 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 482
483 The following options are deprecated and may be removed in the future:
484
fe7866d0 485 force_generic_extractor: Force downloader to use the generic extractor
486 - Use allowed_extractors = ['generic', 'default']
7e9a6125 487 playliststart: - Use playlist_items
488 Playlist item to start at.
489 playlistend: - Use playlist_items
490 Playlist item to end at.
491 playlistreverse: - Use playlist_items
492 Download playlist items in reverse order.
1890fc63 493 forceurl: - Use forceprint
494 Force printing final URL.
495 forcetitle: - Use forceprint
496 Force printing title.
497 forceid: - Use forceprint
498 Force printing ID.
499 forcethumbnail: - Use forceprint
500 Force printing thumbnail URL.
501 forcedescription: - Use forceprint
502 Force printing description.
503 forcefilename: - Use forceprint
504 Force printing final filename.
505 forceduration: - Use forceprint
506 Force printing duration.
507 allsubtitles: - Use subtitleslangs = ['all']
508 Downloads all the subtitles of the video
509 (requires writesubtitles or writeautomaticsub)
510 include_ads: - Doesn't work
511 Download ads as well
512 call_home: - Not implemented
513 Boolean, true iff we are allowed to contact the
514 yt-dlp servers for debugging.
515 post_hooks: - Register a custom postprocessor
516 A list of functions that get called as the final step
517 for each video file, after all postprocessors have been
518 called. The filename will be passed as the only argument.
519 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
520 Use the native HLS downloader instead of ffmpeg/avconv
521 if True, otherwise use ffmpeg/avconv if False, otherwise
522 use downloader suggested by extractor if None.
523 prefer_ffmpeg: - avconv support is deprecated
524 If False, use avconv instead of ffmpeg if both are available,
525 otherwise prefer ffmpeg.
526 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 527 If True (default), DASH manifests and related
62bff2c1 528 data will be downloaded and processed by extractor.
529 You can reduce network I/O by disabling it if you don't
530 care about DASH. (only for youtube)
1890fc63 531 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 532 If True (default), HLS manifests and related
62bff2c1 533 data will be downloaded and processed by extractor.
534 You can reduce network I/O by disabling it if you don't
535 care about HLS. (only for youtube)
8222d8de
JMF
536 """
537
86e5f3ed 538 _NUMERIC_FIELDS = {
b8ed0f15 539 'width', 'height', 'asr', 'audio_channels', 'fps',
540 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
e6f21b3d 541 'timestamp', 'release_timestamp',
c9969434
S
542 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
543 'average_rating', 'comment_count', 'age_limit',
544 'start_time', 'end_time',
545 'chapter_number', 'season_number', 'episode_number',
546 'track_number', 'disc_number', 'release_year',
86e5f3ed 547 }
c9969434 548
6db9c4d5 549 _format_fields = {
550 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 551 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
105bfd90 552 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
d5d1df8a 553 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
6db9c4d5 554 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
555 'preference', 'language', 'language_preference', 'quality', 'source_preference',
556 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
557 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
558 }
48ee10ee 559 _format_selection_exts = {
8dc59305 560 'audio': set(MEDIA_EXTENSIONS.common_audio),
561 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
562 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
48ee10ee 563 }
564
3511266b 565 def __init__(self, params=None, auto_init=True):
883d4b1e 566 """Create a FileDownloader object with the given options.
567 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 568 Set to 'no_verbose_header' to not print the header
883d4b1e 569 """
e9f9a10f
JMF
570 if params is None:
571 params = {}
592b7485 572 self.params = params
8b7491c8 573 self._ies = {}
56c73665 574 self._ies_instances = {}
1e43a6f7 575 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 576 self._printed_messages = set()
1cf376f5 577 self._first_webpage_request = True
ab8e5e51 578 self._post_hooks = []
933605d7 579 self._progress_hooks = []
819e0531 580 self._postprocessor_hooks = []
8222d8de
JMF
581 self._download_retcode = 0
582 self._num_downloads = 0
9c906919 583 self._num_videos = 0
592b7485 584 self._playlist_level = 0
585 self._playlist_urls = set()
a0e07d31 586 self.cache = Cache(self)
34308b30 587
819e0531 588 windows_enable_vt_mode()
591bb9d3 589 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
590 self._out_files = Namespace(
591 out=stdout,
592 error=sys.stderr,
593 screen=sys.stderr if self.params.get('quiet') else stdout,
594 console=None if compat_os_name == 'nt' else next(
cf4f42cb 595 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 596 )
597 self._allow_colors = Namespace(**{
598 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
64fa820c 599 for type_, stream in self._out_files.items_ if type_ != 'console'
591bb9d3 600 })
819e0531 601
6929b41a 602 # The code is left like this to be reused for future deprecations
603 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
eff42759 604 current_version = sys.version_info[:2]
605 if current_version < MIN_RECOMMENDED:
9d339c41 606 msg = ('Support for Python version %d.%d has been deprecated. '
24093d52 607 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
c6e07cf1 608 '\n You will no longer receive updates on this version')
eff42759 609 if current_version < MIN_SUPPORTED:
610 msg = 'Python version %d.%d is no longer supported'
611 self.deprecation_warning(
612 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
a61f4b28 613
88acdbc2 614 if self.params.get('allow_unplayable_formats'):
615 self.report_warning(
ec11a9f4 616 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 617 'This is a developer option intended for debugging. \n'
618 ' If you experience any issues while using this option, '
ec11a9f4 619 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 620
497074f0 621 if self.params.get('bidi_workaround', False):
622 try:
623 import pty
624 master, slave = pty.openpty()
625 width = shutil.get_terminal_size().columns
626 width_args = [] if width is None else ['-w', str(width)]
627 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
628 try:
629 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
630 except OSError:
631 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
632 self._output_channel = os.fdopen(master, 'rb')
633 except OSError as ose:
634 if ose.errno == errno.ENOENT:
635 self.report_warning(
636 'Could not find fribidi executable, ignoring --bidi-workaround. '
637 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
638 else:
639 raise
640
641 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
642 if auto_init and auto_init != 'no_verbose_header':
643 self.print_debug_header()
644
be5df5ee
S
645 def check_deprecated(param, option, suggestion):
646 if self.params.get(param) is not None:
86e5f3ed 647 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
648 return True
649 return False
650
651 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
652 if self.params.get('geo_verification_proxy') is None:
653 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
654
0d1bb027 655 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
656 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 657 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 658
49a57e70 659 for msg in self.params.get('_warnings', []):
0d1bb027 660 self.report_warning(msg)
ee8dd27a 661 for msg in self.params.get('_deprecation_warnings', []):
da4db748 662 self.deprecated_feature(msg)
0d1bb027 663
8a82af35 664 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 665 self.params['listformats_table'] = False
666
b5ae35ee 667 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 668 # nooverwrites was unnecessarily changed to overwrites
669 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
670 # This ensures compatibility with both keys
671 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 672 elif self.params.get('overwrites') is None:
673 self.params.pop('overwrites', None)
b868936c 674 else:
675 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 676
e4221b70 677 if self.params.get('simulate') is None and any((
678 self.params.get('list_thumbnails'),
679 self.params.get('listformats'),
680 self.params.get('listsubtitles'),
681 )):
682 self.params['simulate'] = 'list_only'
683
455a15e2 684 self.params.setdefault('forceprint', {})
685 self.params.setdefault('print_to_file', {})
bb66c247 686
687 # Compatibility with older syntax
ca30f449 688 if not isinstance(params['forceprint'], dict):
455a15e2 689 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 690
97ec5bc5 691 if auto_init:
97ec5bc5 692 self.add_default_info_extractors()
693
3089bc74
S
694 if (sys.platform != 'win32'
695 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 696 and not self.params.get('restrictfilenames', False)):
e9137224 697 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 698 self.report_warning(
6febd1c1 699 'Assuming --restrict-filenames since file system encoding '
1b725173 700 'cannot encode all characters. '
6febd1c1 701 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 702 self.params['restrictfilenames'] = True
34308b30 703
bf1824b3 704 self._parse_outtmpl()
486dd09e 705
187986a8 706 # Creating format selector here allows us to catch syntax errors before the extraction
707 self.format_selector = (
fa9f30b8 708 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 709 else self.params['format'] if callable(self.params['format'])
187986a8 710 else self.build_format_selector(self.params['format']))
711
8b7539d2 712 # Set http_headers defaults according to std_headers
713 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
714
013b50b7 715 hooks = {
716 'post_hooks': self.add_post_hook,
717 'progress_hooks': self.add_progress_hook,
718 'postprocessor_hooks': self.add_postprocessor_hook,
719 }
720 for opt, fn in hooks.items():
721 for ph in self.params.get(opt, []):
722 fn(ph)
71b640cc 723
5bfc8bee 724 for pp_def_raw in self.params.get('postprocessors', []):
725 pp_def = dict(pp_def_raw)
726 when = pp_def.pop('when', 'post_process')
727 self.add_post_processor(
f9934b96 728 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 729 when=when)
730
97ec5bc5 731 self._setup_opener()
51fb4995
YCH
732 register_socks_protocols()
733
ed39cac5 734 def preload_download_archive(fn):
735 """Preload the archive, if any is specified"""
ae103564 736 archive = set()
ed39cac5 737 if fn is None:
ae103564 738 return archive
941e881e 739 elif not is_path_like(fn):
ae103564 740 return fn
741
49a57e70 742 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 743 try:
744 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
745 for line in archive_file:
ae103564 746 archive.add(line.strip())
86e5f3ed 747 except OSError as ioe:
ed39cac5 748 if ioe.errno != errno.ENOENT:
749 raise
ae103564 750 return archive
ed39cac5 751
ae103564 752 self.archive = preload_download_archive(self.params.get('download_archive'))
ed39cac5 753
7d4111ed
PH
754 def warn_if_short_id(self, argv):
755 # short YouTube ID starting with dash?
756 idxs = [
757 i for i, a in enumerate(argv)
758 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
759 if idxs:
760 correct_argv = (
7a5c1cfe 761 ['yt-dlp']
3089bc74
S
762 + [a for i, a in enumerate(argv) if i not in idxs]
763 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
764 )
765 self.report_warning(
766 'Long argument string detected. '
49a57e70 767 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
768 args_to_str(correct_argv))
769
8222d8de
JMF
770 def add_info_extractor(self, ie):
771 """Add an InfoExtractor object to the end of the list."""
8b7491c8 772 ie_key = ie.ie_key()
773 self._ies[ie_key] = ie
e52d7f85 774 if not isinstance(ie, type):
8b7491c8 775 self._ies_instances[ie_key] = ie
e52d7f85 776 ie.set_downloader(self)
8222d8de 777
56c73665
JMF
778 def get_info_extractor(self, ie_key):
779 """
780 Get an instance of an IE with name ie_key, it will try to get one from
781 the _ies list, if there's no instance it will create a new one and add
782 it to the extractor list.
783 """
784 ie = self._ies_instances.get(ie_key)
785 if ie is None:
786 ie = get_info_extractor(ie_key)()
787 self.add_info_extractor(ie)
788 return ie
789
023fa8c4
JMF
790 def add_default_info_extractors(self):
791 """
792 Add the InfoExtractors returned by gen_extractors to the end of the list
793 """
fe7866d0 794 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
795 all_ies['end'] = UnsupportedURLIE()
796 try:
797 ie_names = orderedSet_from_options(
798 self.params.get('allowed_extractors', ['default']), {
799 'all': list(all_ies),
800 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
801 }, use_regex=True)
802 except re.error as e:
803 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
804 for name in ie_names:
805 self.add_info_extractor(all_ies[name])
806 self.write_debug(f'Loaded {len(ie_names)} extractors')
023fa8c4 807
56d868db 808 def add_post_processor(self, pp, when='post_process'):
8222d8de 809 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 810 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 811 self._pps[when].append(pp)
8222d8de
JMF
812 pp.set_downloader(self)
813
ab8e5e51
AM
814 def add_post_hook(self, ph):
815 """Add the post hook"""
816 self._post_hooks.append(ph)
817
933605d7 818 def add_progress_hook(self, ph):
819e0531 819 """Add the download progress hook"""
933605d7 820 self._progress_hooks.append(ph)
8ab470f1 821
819e0531 822 def add_postprocessor_hook(self, ph):
823 """Add the postprocessing progress hook"""
824 self._postprocessor_hooks.append(ph)
5bfc8bee 825 for pps in self._pps.values():
826 for pp in pps:
827 pp.add_progress_hook(ph)
819e0531 828
1c088fa8 829 def _bidi_workaround(self, message):
5d681e96 830 if not hasattr(self, '_output_channel'):
1c088fa8
PH
831 return message
832
5d681e96 833 assert hasattr(self, '_output_process')
14f25df2 834 assert isinstance(message, str)
6febd1c1 835 line_count = message.count('\n') + 1
0f06bcd7 836 self._output_process.stdin.write((message + '\n').encode())
5d681e96 837 self._output_process.stdin.flush()
0f06bcd7 838 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 839 for _ in range(line_count))
6febd1c1 840 return res[:-len('\n')]
1c088fa8 841
b35496d8 842 def _write_string(self, message, out=None, only_once=False):
843 if only_once:
844 if message in self._printed_messages:
845 return
846 self._printed_messages.add(message)
847 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 848
cf4f42cb 849 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 850 """Print message to stdout"""
cf4f42cb 851 if quiet is not None:
da4db748 852 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
853 'Use "YoutubeDL.to_screen" instead')
8a82af35 854 if skip_eol is not False:
da4db748 855 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
856 'Use "YoutubeDL.to_screen" instead')
0bf9dc1e 857 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 858
dfea94f8 859 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
cf4f42cb 860 """Print message to screen if not in quiet mode"""
8bf9319e 861 if self.params.get('logger'):
43afe285 862 self.params['logger'].debug(message)
cf4f42cb 863 return
864 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
865 return
866 self._write_string(
867 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
dfea94f8 868 self._out_files.screen, only_once=only_once)
8222d8de 869
b35496d8 870 def to_stderr(self, message, only_once=False):
0760b0a7 871 """Print message to stderr"""
14f25df2 872 assert isinstance(message, str)
8bf9319e 873 if self.params.get('logger'):
43afe285
IB
874 self.params['logger'].error(message)
875 else:
5792c950 876 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 877
878 def _send_console_code(self, code):
591bb9d3 879 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 880 return
591bb9d3 881 self._write_string(code, self._out_files.console)
8222d8de 882
1e5b9a95
PH
883 def to_console_title(self, message):
884 if not self.params.get('consoletitle', False):
885 return
3efb96a6 886 message = remove_terminal_sequences(message)
4bede0d8
C
887 if compat_os_name == 'nt':
888 if ctypes.windll.kernel32.GetConsoleWindow():
889 # c_wchar_p() might not be necessary if `message` is
890 # already of type unicode()
891 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 892 else:
893 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 894
bdde425c 895 def save_console_title(self):
cf4f42cb 896 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 897 return
592b7485 898 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
899
900 def restore_console_title(self):
cf4f42cb 901 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 902 return
592b7485 903 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
904
905 def __enter__(self):
906 self.save_console_title()
907 return self
908
909 def __exit__(self, *args):
910 self.restore_console_title()
f89197d7 911
dca08720 912 if self.params.get('cookiefile') is not None:
1bab3437 913 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 914
fa9f30b8 915 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
916 """Determine action to take when a download problem appears.
917
918 Depending on if the downloader has been configured to ignore
919 download errors or not, this method may throw an exception or
920 not when errors are found, after printing the message.
921
fa9f30b8 922 @param tb If given, is additional traceback information
923 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
924 """
925 if message is not None:
926 self.to_stderr(message)
927 if self.params.get('verbose'):
928 if tb is None:
929 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 930 tb = ''
8222d8de 931 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 932 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 933 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
934 else:
935 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 936 tb = ''.join(tb_data)
c19bc311 937 if tb:
938 self.to_stderr(tb)
fa9f30b8 939 if not is_error:
940 return
b1940459 941 if not self.params.get('ignoreerrors'):
8222d8de
JMF
942 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
943 exc_info = sys.exc_info()[1].exc_info
944 else:
945 exc_info = sys.exc_info()
946 raise DownloadError(message, exc_info)
947 self._download_retcode = 1
948
19a03940 949 Styles = Namespace(
950 HEADERS='yellow',
951 EMPHASIS='light blue',
492272fe 952 FILENAME='green',
19a03940 953 ID='green',
954 DELIM='blue',
955 ERROR='red',
956 WARNING='yellow',
957 SUPPRESS='light black',
958 )
ec11a9f4 959
7578d77d 960 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 961 text = str(text)
ec11a9f4 962 if test_encoding:
963 original_text = text
5c104538 964 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
965 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 966 text = text.encode(encoding, 'ignore').decode(encoding)
967 if fallback is not None and text != original_text:
968 text = fallback
7578d77d 969 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 970
591bb9d3 971 def _format_out(self, *args, **kwargs):
972 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
973
ec11a9f4 974 def _format_screen(self, *args, **kwargs):
591bb9d3 975 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 976
977 def _format_err(self, *args, **kwargs):
591bb9d3 978 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 979
c84aeac6 980 def report_warning(self, message, only_once=False):
8222d8de
JMF
981 '''
982 Print the message to stderr, it will be prefixed with 'WARNING:'
983 If stderr is a tty file the 'WARNING:' will be colored
984 '''
6d07ce01
JMF
985 if self.params.get('logger') is not None:
986 self.params['logger'].warning(message)
8222d8de 987 else:
ad8915b7
PH
988 if self.params.get('no_warnings'):
989 return
ec11a9f4 990 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 991
da4db748 992 def deprecation_warning(self, message, *, stacklevel=0):
993 deprecation_warning(
994 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
995
996 def deprecated_feature(self, message):
ee8dd27a 997 if self.params.get('logger') is not None:
da4db748 998 self.params['logger'].warning(f'Deprecated Feature: {message}')
999 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
ee8dd27a 1000
fa9f30b8 1001 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
1002 '''
1003 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1004 in red if stderr is a tty file.
1005 '''
fa9f30b8 1006 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 1007
b35496d8 1008 def write_debug(self, message, only_once=False):
0760b0a7 1009 '''Log debug message or Print message to stderr'''
1010 if not self.params.get('verbose', False):
1011 return
8a82af35 1012 message = f'[debug] {message}'
0760b0a7 1013 if self.params.get('logger'):
1014 self.params['logger'].debug(message)
1015 else:
b35496d8 1016 self.to_stderr(message, only_once)
0760b0a7 1017
8222d8de
JMF
1018 def report_file_already_downloaded(self, file_name):
1019 """Report file has already been fully downloaded."""
1020 try:
6febd1c1 1021 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 1022 except UnicodeEncodeError:
6febd1c1 1023 self.to_screen('[download] The file has already been downloaded')
8222d8de 1024
0c3d0f51 1025 def report_file_delete(self, file_name):
1026 """Report that existing file will be deleted."""
1027 try:
c25228e5 1028 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 1029 except UnicodeEncodeError:
c25228e5 1030 self.to_screen('Deleting existing file')
0c3d0f51 1031
319b6059 1032 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 1033 has_drm = info.get('_has_drm')
319b6059 1034 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1035 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1036 if forced or not ignored:
1151c407 1037 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1038 expected=has_drm or ignored or expected)
88acdbc2 1039 else:
1040 self.report_warning(msg)
1041
de6000d9 1042 def parse_outtmpl(self):
bf1824b3 1043 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1044 self._parse_outtmpl()
1045 return self.params['outtmpl']
1046
1047 def _parse_outtmpl(self):
7b2c3f47 1048 sanitize = IDENTITY
bf1824b3 1049 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1050 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1051
1052 outtmpl = self.params.setdefault('outtmpl', {})
1053 if not isinstance(outtmpl, dict):
1054 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1055 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1056
21cd8fae 1057 def get_output_path(self, dir_type='', filename=None):
1058 paths = self.params.get('paths', {})
d2c8aadf 1059 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
21cd8fae 1060 path = os.path.join(
1061 expand_path(paths.get('home', '').strip()),
1062 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1063 filename or '')
21cd8fae 1064 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1065
76a264ac 1066 @staticmethod
901130bb 1067 def _outtmpl_expandpath(outtmpl):
1068 # expand_path translates '%%' into '%' and '$$' into '$'
1069 # correspondingly that is not what we want since we need to keep
1070 # '%%' intact for template dict substitution step. Working around
1071 # with boundary-alike separator hack.
efa944f4 1072 sep = ''.join(random.choices(ascii_letters, k=32))
86e5f3ed 1073 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1074
1075 # outtmpl should be expand_path'ed before template dict substitution
1076 # because meta fields may contain env variables we don't want to
62b58c09 1077 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
901130bb 1078 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1079 return expand_path(outtmpl).replace(sep, '')
1080
1081 @staticmethod
1082 def escape_outtmpl(outtmpl):
1083 ''' Escape any remaining strings like %s, %abc% etc. '''
1084 return re.sub(
1085 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1086 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1087 outtmpl)
1088
1089 @classmethod
1090 def validate_outtmpl(cls, outtmpl):
76a264ac 1091 ''' @return None or Exception object '''
7d1eb38a 1092 outtmpl = re.sub(
47cdc68e 1093 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
7d1eb38a 1094 lambda mobj: f'{mobj.group(0)[:-1]}s',
1095 cls._outtmpl_expandpath(outtmpl))
76a264ac 1096 try:
7d1eb38a 1097 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1098 return None
1099 except ValueError as err:
1100 return err
1101
03b4de72 1102 @staticmethod
1103 def _copy_infodict(info_dict):
1104 info_dict = dict(info_dict)
09b49e1f 1105 info_dict.pop('__postprocessors', None)
415f8d51 1106 info_dict.pop('__pending_error', None)
03b4de72 1107 return info_dict
1108
e0fd9573 1109 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1110 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1111 @param sanitize Whether to sanitize the output as a filename.
1112 For backward compatibility, a function can also be passed
1113 """
1114
6e84b215 1115 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1116
03b4de72 1117 info_dict = self._copy_infodict(info_dict)
752cda38 1118 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1119 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1120 if info_dict.get('duration', None) is not None
1121 else None)
1d485a1a 1122 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1123 info_dict['video_autonumber'] = self._num_videos
752cda38 1124 if info_dict.get('resolution') is None:
1125 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1126
e6f21b3d 1127 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1128 # of %(field)s to %(field)0Nd for backward compatibility
1129 field_size_compat_map = {
0a5a191a 1130 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1131 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1132 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1133 }
752cda38 1134
385a27fa 1135 TMPL_DICT = {}
47cdc68e 1136 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
385a27fa 1137 MATH_FUNCTIONS = {
1138 '+': float.__add__,
1139 '-': float.__sub__,
1140 }
e625be0d 1141 # Field is of the form key1.key2...
07a1250e 1142 # where keys (except first) can be string, int, slice or "{field, ...}"
1143 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1144 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1145 'inner': FIELD_INNER_RE,
1146 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1147 }
1d485a1a 1148 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1149 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1d485a1a 1150 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
e625be0d 1151 (?P<negate>-)?
1d485a1a 1152 (?P<fields>{FIELD_RE})
1153 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1154 (?:>(?P<strf_format>.+?))?
34baa9fd 1155 (?P<remaining>
1156 (?P<alternate>(?<!\\),[^|&)]+)?
1157 (?:&(?P<replacement>.*?))?
1158 (?:\|(?P<default>.*?))?
1d485a1a 1159 )$''')
752cda38 1160
07a1250e 1161 def _traverse_infodict(fields):
1162 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1163 for f in ([x] if x.startswith('{') else x.split('.'))]
1164 for i in (0, -1):
1165 if fields and not fields[i]:
1166 fields.pop(i)
1167
1168 for i, f in enumerate(fields):
1169 if not f.startswith('{'):
1170 continue
1171 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1172 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1173
1174 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
76a264ac 1175
752cda38 1176 def get_value(mdict):
1177 # Object traversal
2b8a2973 1178 value = _traverse_infodict(mdict['fields'])
752cda38 1179 # Negative
1180 if mdict['negate']:
1181 value = float_or_none(value)
1182 if value is not None:
1183 value *= -1
1184 # Do maths
385a27fa 1185 offset_key = mdict['maths']
1186 if offset_key:
752cda38 1187 value = float_or_none(value)
1188 operator = None
385a27fa 1189 while offset_key:
1190 item = re.match(
1191 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1192 offset_key).group(0)
1193 offset_key = offset_key[len(item):]
1194 if operator is None:
752cda38 1195 operator = MATH_FUNCTIONS[item]
385a27fa 1196 continue
1197 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1198 offset = float_or_none(item)
1199 if offset is None:
2b8a2973 1200 offset = float_or_none(_traverse_infodict(item))
385a27fa 1201 try:
1202 value = operator(value, multiplier * offset)
1203 except (TypeError, ZeroDivisionError):
1204 return None
1205 operator = None
752cda38 1206 # Datetime formatting
1207 if mdict['strf_format']:
7c37ff97 1208 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1209
a6bcaf71 1210 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1211 if sanitize and value == '':
1212 value = None
752cda38 1213 return value
1214
b868936c 1215 na = self.params.get('outtmpl_na_placeholder', 'NA')
1216
e0fd9573 1217 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1218 return sanitize_filename(str(value), restricted=restricted, is_id=(
1219 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1220 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1221 else NO_DEFAULT))
e0fd9573 1222
1223 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1224 sanitize = bool(sanitize)
1225
6e84b215 1226 def _dumpjson_default(obj):
1227 if isinstance(obj, (set, LazyList)):
1228 return list(obj)
adbc4ec4 1229 return repr(obj)
6e84b215 1230
752cda38 1231 def create_key(outer_mobj):
1232 if not outer_mobj.group('has_key'):
b836dc94 1233 return outer_mobj.group(0)
752cda38 1234 key = outer_mobj.group('key')
752cda38 1235 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1236 initial_field = mobj.group('fields') if mobj else ''
e978789f 1237 value, replacement, default = None, None, na
7c37ff97 1238 while mobj:
e625be0d 1239 mobj = mobj.groupdict()
7c37ff97 1240 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1241 value = get_value(mobj)
e978789f 1242 replacement = mobj['replacement']
7c37ff97 1243 if value is None and mobj['alternate']:
34baa9fd 1244 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1245 else:
1246 break
752cda38 1247
b868936c 1248 fmt = outer_mobj.group('format')
752cda38 1249 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1250 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1251
e978789f 1252 value = default if value is None else value if replacement is None else replacement
752cda38 1253
4476d2c7 1254 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1255 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1256 if fmt[-1] == 'l': # list
4476d2c7 1257 delim = '\n' if '#' in flags else ', '
9e907ebd 1258 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1259 elif fmt[-1] == 'j': # json
deae7c17 1260 value, fmt = json.dumps(
1261 value, default=_dumpjson_default,
9b9dad11 1262 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
47cdc68e 1263 elif fmt[-1] == 'h': # html
deae7c17 1264 value, fmt = escapeHTML(str(value)), str_fmt
524e2e4f 1265 elif fmt[-1] == 'q': # quoted
4476d2c7 1266 value = map(str, variadic(value) if '#' in flags else [value])
1267 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1268 elif fmt[-1] == 'B': # bytes
0f06bcd7 1269 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1270 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1271 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1272 value, fmt = unicodedata.normalize(
1273 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1274 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1275 value), str_fmt
e0fd9573 1276 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1277 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1278 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1279 factor=1024 if '#' in flags else 1000)
37893bb0 1280 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1281 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1282 elif fmt[-1] == 'c':
524e2e4f 1283 if value:
1284 value = str(value)[0]
76a264ac 1285 else:
524e2e4f 1286 fmt = str_fmt
76a264ac 1287 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1288 value = float_or_none(value)
752cda38 1289 if value is None:
1290 value, fmt = default, 's'
901130bb 1291
752cda38 1292 if sanitize:
1293 if fmt[-1] == 'r':
1294 # If value is an object, sanitize might convert it to a string
1295 # So we convert it to repr first
7d1eb38a 1296 value, fmt = repr(value), str_fmt
639f1cea 1297 if fmt[-1] in 'csr':
e0fd9573 1298 value = sanitizer(initial_field, value)
901130bb 1299
b868936c 1300 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1301 TMPL_DICT[key] = value
b868936c 1302 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1303
385a27fa 1304 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1305
819e0531 1306 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1307 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1308 return self.escape_outtmpl(outtmpl) % info_dict
1309
5127e92a 1310 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1311 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1312 if outtmpl is None:
bf1824b3 1313 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1314 try:
5127e92a 1315 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1316 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1317 if not filename:
1318 return None
15da37c7 1319
5127e92a 1320 if tmpl_type in ('', 'temp'):
6a0546e3 1321 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1322 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1323 filename = replace_extension(filename, ext, final_ext)
5127e92a 1324 elif tmpl_type:
6a0546e3 1325 force_ext = OUTTMPL_TYPES[tmpl_type]
1326 if force_ext:
1327 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1328
bdc3fd2f
U
1329 # https://github.com/blackjack4494/youtube-dlc/issues/85
1330 trim_file_name = self.params.get('trim_file_name', False)
1331 if trim_file_name:
5c22c63d 1332 no_ext, *ext = filename.rsplit('.', 2)
1333 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1334
0202b52a 1335 return filename
8222d8de 1336 except ValueError as err:
6febd1c1 1337 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1338 return None
1339
5127e92a 1340 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1341 """Generate the output filename"""
1342 if outtmpl:
1343 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1344 dir_type = None
1345 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1346 if not filename and dir_type not in ('', 'temp'):
1347 return ''
de6000d9 1348
c84aeac6 1349 if warn:
21cd8fae 1350 if not self.params.get('paths'):
de6000d9 1351 pass
1352 elif filename == '-':
c84aeac6 1353 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1354 elif os.path.isabs(filename):
c84aeac6 1355 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1356 if filename == '-' or not filename:
1357 return filename
1358
21cd8fae 1359 return self.get_output_path(dir_type, filename)
0202b52a 1360
120fe513 1361 def _match_entry(self, info_dict, incomplete=False, silent=False):
6368e2e6 1362 """Returns None if the file should be downloaded"""
d7b460d0 1363 _type = info_dict.get('_type', 'video')
1364 assert incomplete or _type == 'video', 'Only video result can be considered complete'
8222d8de 1365
3bec830a 1366 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
c77495e3 1367
8b0d7497 1368 def check_filter():
d7b460d0 1369 if _type in ('playlist', 'multi_video'):
1370 return
1371 elif _type in ('url', 'url_transparent') and not try_call(
1372 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1373 return
1374
8b0d7497 1375 if 'title' in info_dict:
1376 # This can happen when we're just evaluating the playlist
1377 title = info_dict['title']
1378 matchtitle = self.params.get('matchtitle', False)
1379 if matchtitle:
1380 if not re.search(matchtitle, title, re.IGNORECASE):
1381 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1382 rejecttitle = self.params.get('rejecttitle', False)
1383 if rejecttitle:
1384 if re.search(rejecttitle, title, re.IGNORECASE):
1385 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
6368e2e6 1386
8b0d7497 1387 date = info_dict.get('upload_date')
1388 if date is not None:
1389 dateRange = self.params.get('daterange', DateRange())
1390 if date not in dateRange:
86e5f3ed 1391 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1392 view_count = info_dict.get('view_count')
1393 if view_count is not None:
1394 min_views = self.params.get('min_views')
1395 if min_views is not None and view_count < min_views:
1396 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1397 max_views = self.params.get('max_views')
1398 if max_views is not None and view_count > max_views:
1399 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1400 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1401 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1402
8f18aca8 1403 match_filter = self.params.get('match_filter')
1404 if match_filter is not None:
1405 try:
1406 ret = match_filter(info_dict, incomplete=incomplete)
1407 except TypeError:
1408 # For backward compatibility
1409 ret = None if incomplete else match_filter(info_dict)
492272fe 1410 if ret is NO_DEFAULT:
1411 while True:
1412 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1413 reply = input(self._format_screen(
1414 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1415 if reply in {'y', ''}:
1416 return None
1417 elif reply == 'n':
1418 return f'Skipping {video_title}'
492272fe 1419 elif ret is not None:
8f18aca8 1420 return ret
8b0d7497 1421 return None
1422
c77495e3 1423 if self.in_download_archive(info_dict):
1424 reason = '%s has already been recorded in the archive' % video_title
1425 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1426 else:
1427 reason = check_filter()
1428 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1429 if reason is not None:
120fe513 1430 if not silent:
1431 self.to_screen('[download] ' + reason)
c77495e3 1432 if self.params.get(break_opt, False):
1433 raise break_err()
8b0d7497 1434 return reason
fe7e0c98 1435
b6c45014
JMF
1436 @staticmethod
1437 def add_extra_info(info_dict, extra_info):
1438 '''Set the keys from extra_info in info dict if they are missing'''
1439 for key, value in extra_info.items():
1440 info_dict.setdefault(key, value)
1441
409e1828 1442 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1443 process=True, force_generic_extractor=False):
41d1cca3 1444 """
17ffed18 1445 Extract and return the information dictionary of the URL
41d1cca3 1446
1447 Arguments:
17ffed18 1448 @param url URL to extract
41d1cca3 1449
1450 Keyword arguments:
17ffed18 1451 @param download Whether to download videos
1452 @param process Whether to resolve all unresolved references (URLs, playlist items).
1453 Must be True for download to work
1454 @param ie_key Use only the extractor with this key
1455
1456 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1457 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
41d1cca3 1458 """
fe7e0c98 1459
409e1828 1460 if extra_info is None:
1461 extra_info = {}
1462
61aa5ba3 1463 if not ie_key and force_generic_extractor:
d22dec74
S
1464 ie_key = 'Generic'
1465
8222d8de 1466 if ie_key:
fe7866d0 1467 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
8222d8de
JMF
1468 else:
1469 ies = self._ies
1470
fe7866d0 1471 for key, ie in ies.items():
8222d8de
JMF
1472 if not ie.suitable(url):
1473 continue
1474
1475 if not ie.working():
6febd1c1
PH
1476 self.report_warning('The program functionality for this site has been marked as broken, '
1477 'and will probably not work.')
8222d8de 1478
1151c407 1479 temp_id = ie.get_temp_id(url)
fe7866d0 1480 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1481 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
5e5be0c0 1482 if self.params.get('break_on_existing', False):
1483 raise ExistingVideoReached()
a0566bbf 1484 break
fe7866d0 1485 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
a0566bbf 1486 else:
fe7866d0 1487 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1488 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1489 tb=False if extractors_restricted else None)
a0566bbf 1490
7e88d7d7 1491 def _handle_extraction_exceptions(func):
b5ae35ee 1492 @functools.wraps(func)
a0566bbf 1493 def wrapper(self, *args, **kwargs):
6da22e7d 1494 while True:
1495 try:
1496 return func(self, *args, **kwargs)
1497 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1498 raise
6da22e7d 1499 except ReExtractInfo as e:
1500 if e.expected:
1501 self.to_screen(f'{e}; Re-extracting data')
1502 else:
1503 self.to_stderr('\r')
1504 self.report_warning(f'{e}; Re-extracting data')
1505 continue
1506 except GeoRestrictedError as e:
1507 msg = e.msg
1508 if e.countries:
1509 msg += '\nThis video is available in %s.' % ', '.join(
1510 map(ISO3166Utils.short2full, e.countries))
1511 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1512 self.report_error(msg)
1513 except ExtractorError as e: # An error we somewhat expected
1514 self.report_error(str(e), e.format_traceback())
1515 except Exception as e:
1516 if self.params.get('ignoreerrors'):
1517 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1518 else:
1519 raise
1520 break
a0566bbf 1521 return wrapper
1522
693f0600 1523 def _wait_for_video(self, ie_result={}):
f2ebc5c7 1524 if (not self.params.get('wait_for_video')
1525 or ie_result.get('_type', 'video') != 'video'
1526 or ie_result.get('formats') or ie_result.get('url')):
1527 return
1528
1529 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1530 last_msg = ''
1531
1532 def progress(msg):
1533 nonlocal last_msg
a7dc6a89 1534 full_msg = f'{msg}\n'
1535 if not self.params.get('noprogress'):
1536 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1537 elif last_msg:
1538 return
1539 self.to_screen(full_msg, skip_eol=True)
f2ebc5c7 1540 last_msg = msg
1541
1542 min_wait, max_wait = self.params.get('wait_for_video')
1543 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1544 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1545 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1546 self.report_warning('Release time of video is not known')
693f0600 1547 elif ie_result and (diff or 0) <= 0:
f2ebc5c7 1548 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1549 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1550 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1551
1552 wait_till = time.time() + diff
1553 try:
1554 while True:
1555 diff = wait_till - time.time()
1556 if diff <= 0:
1557 progress('')
1558 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1559 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1560 time.sleep(1)
1561 except KeyboardInterrupt:
1562 progress('')
1563 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1564 except BaseException as e:
1565 if not isinstance(e, ReExtractInfo):
1566 self.to_screen('')
1567 raise
1568
7e88d7d7 1569 @_handle_extraction_exceptions
58f197b7 1570 def __extract_info(self, url, ie, download, extra_info, process):
693f0600 1571 try:
1572 ie_result = ie.extract(url)
1573 except UserNotLive as e:
1574 if process:
1575 if self.params.get('wait_for_video'):
1576 self.report_warning(e)
1577 self._wait_for_video()
1578 raise
a0566bbf 1579 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
cb794ee0 1580 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
a0566bbf 1581 return
1582 if isinstance(ie_result, list):
1583 # Backwards compatibility: old IE result format
1584 ie_result = {
1585 '_type': 'compat_list',
1586 'entries': ie_result,
1587 }
e37d0efb 1588 if extra_info.get('original_url'):
1589 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1590 self.add_default_extra_info(ie_result, ie, url)
1591 if process:
f2ebc5c7 1592 self._wait_for_video(ie_result)
a0566bbf 1593 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1594 else:
a0566bbf 1595 return ie_result
fe7e0c98 1596
ea38e55f 1597 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1598 if url is not None:
1599 self.add_extra_info(ie_result, {
1600 'webpage_url': url,
1601 'original_url': url,
57ebfca3 1602 })
1603 webpage_url = ie_result.get('webpage_url')
1604 if webpage_url:
1605 self.add_extra_info(ie_result, {
1606 'webpage_url_basename': url_basename(webpage_url),
1607 'webpage_url_domain': get_domain(webpage_url),
6033d980 1608 })
1609 if ie is not None:
1610 self.add_extra_info(ie_result, {
1611 'extractor': ie.IE_NAME,
1612 'extractor_key': ie.ie_key(),
1613 })
ea38e55f 1614
58adec46 1615 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1616 """
1617 Take the result of the ie(may be modified) and resolve all unresolved
1618 references (URLs, playlist items).
1619
1620 It will also download the videos if 'download'.
1621 Returns the resolved ie_result.
1622 """
58adec46 1623 if extra_info is None:
1624 extra_info = {}
e8ee972c
PH
1625 result_type = ie_result.get('_type', 'video')
1626
057a5206 1627 if result_type in ('url', 'url_transparent'):
8f97a15d 1628 ie_result['url'] = sanitize_url(
1629 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
8791e78c 1630 if ie_result.get('original_url') and not extra_info.get('original_url'):
1631 extra_info = {'original_url': ie_result['original_url'], **extra_info}
e37d0efb 1632
057a5206 1633 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1634 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1635 or extract_flat is True):
ecb54191 1636 info_copy = ie_result.copy()
6033d980 1637 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1638 if ie and not ie_result.get('id'):
4614bc22 1639 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1640 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1641 self.add_extra_info(info_copy, extra_info)
b5475f11 1642 info_copy, _ = self.pre_process(info_copy)
94dc8604 1643 self._fill_common_fields(info_copy, False)
ecb54191 1644 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
415f8d51 1645 self._raise_pending_errors(info_copy)
4614bc22 1646 if self.params.get('force_write_download_archive', False):
1647 self.record_download_archive(info_copy)
e8ee972c
PH
1648 return ie_result
1649
8222d8de 1650 if result_type == 'video':
b6c45014 1651 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1652 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1653 self._raise_pending_errors(ie_result)
28b0eb0f 1654 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1655 if additional_urls:
e9f4ccd1 1656 # TODO: Improve MetadataParserPP to allow setting a list
14f25df2 1657 if isinstance(additional_urls, str):
9c2b75b5 1658 additional_urls = [additional_urls]
1659 self.to_screen(
1660 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1661 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1662 ie_result['additional_entries'] = [
1663 self.extract_info(
b69fd25c 1664 url, download, extra_info=extra_info,
9c2b75b5 1665 force_generic_extractor=self.params.get('force_generic_extractor'))
1666 for url in additional_urls
1667 ]
1668 return ie_result
8222d8de
JMF
1669 elif result_type == 'url':
1670 # We have to add extra_info to the results because it may be
1671 # contained in a playlist
07cce701 1672 return self.extract_info(
1673 ie_result['url'], download,
1674 ie_key=ie_result.get('ie_key'),
1675 extra_info=extra_info)
7fc3fa05
PH
1676 elif result_type == 'url_transparent':
1677 # Use the information from the embedding page
1678 info = self.extract_info(
1679 ie_result['url'], ie_key=ie_result.get('ie_key'),
1680 extra_info=extra_info, download=False, process=False)
1681
1640eb09
S
1682 # extract_info may return None when ignoreerrors is enabled and
1683 # extraction failed with an error, don't crash and return early
1684 # in this case
1685 if not info:
1686 return info
1687
3975b4d2 1688 exempted_fields = {'_type', 'url', 'ie_key'}
1689 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1690 # For video clips, the id etc of the clip extractor should be used
1691 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1692
412c617d 1693 new_result = info.copy()
3975b4d2 1694 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1695
0563f7ac
S
1696 # Extracted info may not be a video result (i.e.
1697 # info.get('_type', 'video') != video) but rather an url or
1698 # url_transparent. In such cases outer metadata (from ie_result)
1699 # should be propagated to inner one (info). For this to happen
1700 # _type of info should be overridden with url_transparent. This
067aa17e 1701 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1702 if new_result.get('_type') == 'url':
1703 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1704
1705 return self.process_ie_result(
1706 new_result, download=download, extra_info=extra_info)
40fcba5e 1707 elif result_type in ('playlist', 'multi_video'):
30a074c2 1708 # Protect from infinite recursion due to recursively nested playlists
1709 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
0bd5a039 1710 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1711 if webpage_url and webpage_url in self._playlist_urls:
7e85e872 1712 self.to_screen(
30a074c2 1713 '[download] Skipping already downloaded playlist: %s'
1714 % ie_result.get('title') or ie_result.get('id'))
1715 return
7e85e872 1716
30a074c2 1717 self._playlist_level += 1
1718 self._playlist_urls.add(webpage_url)
03f83004 1719 self._fill_common_fields(ie_result, False)
bc516a3f 1720 self._sanitize_thumbnails(ie_result)
30a074c2 1721 try:
1722 return self.__process_playlist(ie_result, download)
1723 finally:
1724 self._playlist_level -= 1
1725 if not self._playlist_level:
1726 self._playlist_urls.clear()
8222d8de 1727 elif result_type == 'compat_list':
c9bf4114
PH
1728 self.report_warning(
1729 'Extractor %s returned a compat_list result. '
1730 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1731
8222d8de 1732 def _fixup(r):
b868936c 1733 self.add_extra_info(r, {
1734 'extractor': ie_result['extractor'],
1735 'webpage_url': ie_result['webpage_url'],
1736 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1737 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1738 'extractor_key': ie_result['extractor_key'],
1739 })
8222d8de
JMF
1740 return r
1741 ie_result['entries'] = [
b6c45014 1742 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1743 for r in ie_result['entries']
1744 ]
1745 return ie_result
1746 else:
1747 raise Exception('Invalid result type: %s' % result_type)
1748
e92caff5 1749 def _ensure_dir_exists(self, path):
1750 return make_dir(path, self.report_error)
1751
3b603dbd 1752 @staticmethod
3bec830a 1753 def _playlist_infodict(ie_result, strict=False, **kwargs):
1754 info = {
1755 'playlist_count': ie_result.get('playlist_count'),
3b603dbd 1756 'playlist': ie_result.get('title') or ie_result.get('id'),
1757 'playlist_id': ie_result.get('id'),
1758 'playlist_title': ie_result.get('title'),
1759 'playlist_uploader': ie_result.get('uploader'),
1760 'playlist_uploader_id': ie_result.get('uploader_id'),
3b603dbd 1761 **kwargs,
1762 }
3bec830a 1763 if strict:
1764 return info
0bd5a039 1765 if ie_result.get('webpage_url'):
1766 info.update({
1767 'webpage_url': ie_result['webpage_url'],
1768 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1769 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1770 })
3bec830a 1771 return {
1772 **info,
1773 'playlist_index': 0,
1774 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1775 'extractor': ie_result['extractor'],
3bec830a 1776 'extractor_key': ie_result['extractor_key'],
1777 }
3b603dbd 1778
30a074c2 1779 def __process_playlist(self, ie_result, download):
7e88d7d7 1780 """Process each entry in the playlist"""
f5ea4748 1781 assert ie_result['_type'] in ('playlist', 'multi_video')
1782
3bec830a 1783 common_info = self._playlist_infodict(ie_result, strict=True)
3955b207 1784 title = common_info.get('playlist') or '<Untitled>'
3bec830a 1785 if self._match_entry(common_info, incomplete=True) is not None:
1786 return
c6e07cf1 1787 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
f0d785d3 1788
7e88d7d7 1789 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1790 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1791
1792 lazy = self.params.get('lazy_playlist')
1793 if lazy:
1794 resolved_entries, n_entries = [], 'N/A'
1795 ie_result['requested_entries'], ie_result['entries'] = None, None
1796 else:
1797 entries = resolved_entries = list(entries)
1798 n_entries = len(resolved_entries)
1799 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1800 if not ie_result.get('playlist_count'):
1801 # Better to do this after potentially exhausting entries
1802 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1803
0647d925 1804 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1805 ie_copy = collections.ChainMap(ie_result, extra)
3bec830a 1806
e08a85d8 1807 _infojson_written = False
0bfc53d0 1808 write_playlist_files = self.params.get('allow_playlist_files', True)
1809 if write_playlist_files and self.params.get('list_thumbnails'):
1810 self.list_thumbnails(ie_result)
1811 if write_playlist_files and not self.params.get('simulate'):
e08a85d8 1812 _infojson_written = self._write_info_json(
1813 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1814 if _infojson_written is None:
80c03fa9 1815 return
1816 if self._write_description('playlist', ie_result,
1817 self.prepare_filename(ie_copy, 'pl_description')) is None:
1818 return
681de68e 1819 # TODO: This should be passed to ThumbnailsConvertor if necessary
3bec830a 1820 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1821
7e9a6125 1822 if lazy:
1823 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1824 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1825 elif self.params.get('playlistreverse'):
1826 entries.reverse()
1827 elif self.params.get('playlistrandom'):
30a074c2 1828 random.shuffle(entries)
1829
bc5c2f8a 1830 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
7e88d7d7 1831 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1832
134c913c 1833 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1834 if self.params.get('extract_flat') == 'discard_in_playlist':
1835 keep_resolved_entries = ie_result['_type'] != 'playlist'
1836 if keep_resolved_entries:
1837 self.write_debug('The information of all playlist entries will be held in memory')
1838
26e2805c 1839 failures = 0
1840 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 1841 for i, (playlist_index, entry) in enumerate(entries):
1842 if lazy:
1843 resolved_entries.append((playlist_index, entry))
3bec830a 1844 if not entry:
7e88d7d7 1845 continue
1846
7e88d7d7 1847 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
7e9a6125 1848 if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1849 playlist_index = ie_result['requested_entries'][i]
1850
0647d925 1851 entry_copy = collections.ChainMap(entry, {
3bec830a 1852 **common_info,
3955b207 1853 'n_entries': int_or_none(n_entries),
71729754 1854 'playlist_index': playlist_index,
7e9a6125 1855 'playlist_autonumber': i + 1,
0647d925 1856 })
3bec830a 1857
0647d925 1858 if self._match_entry(entry_copy, incomplete=True) is not None:
f0ad6f8c 1859 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1860 resolved_entries[i] = (playlist_index, NO_DEFAULT)
3bec830a 1861 continue
1862
bc5c2f8a 1863 self.to_screen('[download] Downloading item %s of %s' % (
3bec830a 1864 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1865
ec54bd43 1866 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
a6ca61d4 1867 'playlist_index': playlist_index,
1868 'playlist_autonumber': i + 1,
ec54bd43 1869 }, extra))
26e2805c 1870 if not entry_result:
1871 failures += 1
1872 if failures >= max_failures:
1873 self.report_error(
7e88d7d7 1874 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 1875 break
134c913c 1876 if keep_resolved_entries:
1877 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 1878
1879 # Update with processed data
f0ad6f8c 1880 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
bc5c2f8a 1881 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1882 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1883 # Do not set for full playlist
1884 ie_result.pop('requested_entries')
e08a85d8 1885
1886 # Write the updated info to json
cb96c5be 1887 if _infojson_written is True and self._write_info_json(
e08a85d8 1888 'updated playlist', ie_result,
1889 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1890 return
ca30f449 1891
ed5835b4 1892 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 1893 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 1894 return ie_result
1895
7e88d7d7 1896 @_handle_extraction_exceptions
a0566bbf 1897 def __process_iterable_entry(self, entry, download, extra_info):
1898 return self.process_ie_result(
1899 entry, download=download, extra_info=extra_info)
1900
67134eab
JMF
1901 def _build_format_filter(self, filter_spec):
1902 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1903
1904 OPERATORS = {
1905 '<': operator.lt,
1906 '<=': operator.le,
1907 '>': operator.gt,
1908 '>=': operator.ge,
1909 '=': operator.eq,
1910 '!=': operator.ne,
1911 }
67134eab 1912 operator_rex = re.compile(r'''(?x)\s*
187986a8 1913 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1914 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1915 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1916 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1917 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1918 if m:
1919 try:
1920 comparison_value = int(m.group('value'))
1921 except ValueError:
1922 comparison_value = parse_filesize(m.group('value'))
1923 if comparison_value is None:
1924 comparison_value = parse_filesize(m.group('value') + 'B')
1925 if comparison_value is None:
1926 raise ValueError(
1927 'Invalid value %r in format specification %r' % (
67134eab 1928 m.group('value'), filter_spec))
9ddb6925
S
1929 op = OPERATORS[m.group('op')]
1930
083c9df9 1931 if not m:
9ddb6925
S
1932 STR_OPERATORS = {
1933 '=': operator.eq,
10d33b34
YCH
1934 '^=': lambda attr, value: attr.startswith(value),
1935 '$=': lambda attr, value: attr.endswith(value),
1936 '*=': lambda attr, value: value in attr,
1ce9a3cb 1937 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1938 }
187986a8 1939 str_operator_rex = re.compile(r'''(?x)\s*
1940 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1941 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1942 (?P<quote>["'])?
1943 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1944 (?(quote)(?P=quote))\s*
9ddb6925 1945 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1946 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1947 if m:
1ce9a3cb
LF
1948 if m.group('op') == '~=':
1949 comparison_value = re.compile(m.group('value'))
1950 else:
1951 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1952 str_op = STR_OPERATORS[m.group('op')]
1953 if m.group('negation'):
e118a879 1954 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1955 else:
1956 op = str_op
083c9df9 1957
9ddb6925 1958 if not m:
187986a8 1959 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1960
1961 def _filter(f):
1962 actual_value = f.get(m.group('key'))
1963 if actual_value is None:
1964 return m.group('none_inclusive')
1965 return op(actual_value, comparison_value)
67134eab
JMF
1966 return _filter
1967
9f1a1c36 1968 def _check_formats(self, formats):
1969 for f in formats:
1970 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1971 path = self.get_output_path('temp')
1972 if not self._ensure_dir_exists(f'{path}/'):
1973 continue
1974 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1975 temp_file.close()
1976 try:
1977 success, _ = self.dl(temp_file.name, f, test=True)
8a82af35 1978 except (DownloadError, OSError, ValueError) + network_exceptions:
9f1a1c36 1979 success = False
1980 finally:
1981 if os.path.exists(temp_file.name):
1982 try:
1983 os.remove(temp_file.name)
1984 except OSError:
1985 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1986 if success:
1987 yield f
1988 else:
1989 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1990
0017d9ad 1991 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1992
af0f7428
S
1993 def can_merge():
1994 merger = FFmpegMergerPP(self)
1995 return merger.available and merger.can_merge()
1996
91ebc640 1997 prefer_best = (
b7b04c78 1998 not self.params.get('simulate')
91ebc640 1999 and download
2000 and (
2001 not can_merge()
21633673 2002 or info_dict.get('is_live') and not self.params.get('live_from_start')
bf1824b3 2003 or self.params['outtmpl']['default'] == '-'))
53ed7066 2004 compat = (
2005 prefer_best
2006 or self.params.get('allow_multiple_audio_streams', False)
8a82af35 2007 or 'format-spec' in self.params['compat_opts'])
91ebc640 2008
2009 return (
53ed7066 2010 'best/bestvideo+bestaudio' if prefer_best
2011 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 2012 else 'bestvideo+bestaudio/best')
0017d9ad 2013
67134eab
JMF
2014 def build_format_selector(self, format_spec):
2015 def syntax_error(note, start):
2016 message = (
2017 'Invalid format specification: '
86e5f3ed 2018 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
2019 return SyntaxError(message)
2020
2021 PICKFIRST = 'PICKFIRST'
2022 MERGE = 'MERGE'
2023 SINGLE = 'SINGLE'
0130afb7 2024 GROUP = 'GROUP'
67134eab
JMF
2025 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2026
91ebc640 2027 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2028 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 2029
9f1a1c36 2030 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 2031
67134eab
JMF
2032 def _parse_filter(tokens):
2033 filter_parts = []
2034 for type, string, start, _, _ in tokens:
2035 if type == tokenize.OP and string == ']':
2036 return ''.join(filter_parts)
2037 else:
2038 filter_parts.append(string)
2039
232541df 2040 def _remove_unused_ops(tokens):
62b58c09
L
2041 # Remove operators that we don't use and join them with the surrounding strings.
2042 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
232541df
JMF
2043 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2044 last_string, last_start, last_end, last_line = None, None, None, None
2045 for type, string, start, end, line in tokens:
2046 if type == tokenize.OP and string == '[':
2047 if last_string:
2048 yield tokenize.NAME, last_string, last_start, last_end, last_line
2049 last_string = None
2050 yield type, string, start, end, line
2051 # everything inside brackets will be handled by _parse_filter
2052 for type, string, start, end, line in tokens:
2053 yield type, string, start, end, line
2054 if type == tokenize.OP and string == ']':
2055 break
2056 elif type == tokenize.OP and string in ALLOWED_OPS:
2057 if last_string:
2058 yield tokenize.NAME, last_string, last_start, last_end, last_line
2059 last_string = None
2060 yield type, string, start, end, line
2061 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2062 if not last_string:
2063 last_string = string
2064 last_start = start
2065 last_end = end
2066 else:
2067 last_string += string
2068 if last_string:
2069 yield tokenize.NAME, last_string, last_start, last_end, last_line
2070
cf2ac6df 2071 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2072 selectors = []
2073 current_selector = None
2074 for type, string, start, _, _ in tokens:
2075 # ENCODING is only defined in python 3.x
2076 if type == getattr(tokenize, 'ENCODING', None):
2077 continue
2078 elif type in [tokenize.NAME, tokenize.NUMBER]:
2079 current_selector = FormatSelector(SINGLE, string, [])
2080 elif type == tokenize.OP:
cf2ac6df
JMF
2081 if string == ')':
2082 if not inside_group:
2083 # ')' will be handled by the parentheses group
2084 tokens.restore_last_token()
67134eab 2085 break
cf2ac6df 2086 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2087 tokens.restore_last_token()
2088 break
cf2ac6df
JMF
2089 elif inside_choice and string == ',':
2090 tokens.restore_last_token()
2091 break
2092 elif string == ',':
0a31a350
JMF
2093 if not current_selector:
2094 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2095 selectors.append(current_selector)
2096 current_selector = None
2097 elif string == '/':
d96d604e
JMF
2098 if not current_selector:
2099 raise syntax_error('"/" must follow a format selector', start)
67134eab 2100 first_choice = current_selector
cf2ac6df 2101 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2102 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2103 elif string == '[':
2104 if not current_selector:
2105 current_selector = FormatSelector(SINGLE, 'best', [])
2106 format_filter = _parse_filter(tokens)
2107 current_selector.filters.append(format_filter)
0130afb7
JMF
2108 elif string == '(':
2109 if current_selector:
2110 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2111 group = _parse_format_selection(tokens, inside_group=True)
2112 current_selector = FormatSelector(GROUP, group, [])
67134eab 2113 elif string == '+':
d03cfdce 2114 if not current_selector:
2115 raise syntax_error('Unexpected "+"', start)
2116 selector_1 = current_selector
2117 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2118 if not selector_2:
2119 raise syntax_error('Expected a selector', start)
2120 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2121 else:
86e5f3ed 2122 raise syntax_error(f'Operator not recognized: "{string}"', start)
67134eab
JMF
2123 elif type == tokenize.ENDMARKER:
2124 break
2125 if current_selector:
2126 selectors.append(current_selector)
2127 return selectors
2128
f8d4ad9a 2129 def _merge(formats_pair):
2130 format_1, format_2 = formats_pair
2131
2132 formats_info = []
2133 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2134 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2135
2136 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2137 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2138 for (i, fmt_info) in enumerate(formats_info):
551f9388 2139 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2140 formats_info.pop(i)
2141 continue
2142 for aud_vid in ['audio', 'video']:
f8d4ad9a 2143 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2144 if get_no_more[aud_vid]:
2145 formats_info.pop(i)
f5510afe 2146 break
f8d4ad9a 2147 get_no_more[aud_vid] = True
2148
2149 if len(formats_info) == 1:
2150 return formats_info[0]
2151
2152 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2153 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2154
2155 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2156 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2157
fc61aff4
LL
2158 output_ext = get_compatible_ext(
2159 vcodecs=[f.get('vcodec') for f in video_fmts],
2160 acodecs=[f.get('acodec') for f in audio_fmts],
2161 vexts=[f['ext'] for f in video_fmts],
2162 aexts=[f['ext'] for f in audio_fmts],
2163 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2164 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
f8d4ad9a 2165
975a0d0d 2166 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2167
f8d4ad9a 2168 new_dict = {
2169 'requested_formats': formats_info,
975a0d0d 2170 'format': '+'.join(filtered('format')),
2171 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2172 'ext': output_ext,
975a0d0d 2173 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2174 'language': '+'.join(orderedSet(filtered('language'))) or None,
2175 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2176 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2177 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2178 }
2179
2180 if the_only_video:
2181 new_dict.update({
2182 'width': the_only_video.get('width'),
2183 'height': the_only_video.get('height'),
2184 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2185 'fps': the_only_video.get('fps'),
49a57e70 2186 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2187 'vcodec': the_only_video.get('vcodec'),
2188 'vbr': the_only_video.get('vbr'),
2189 'stretched_ratio': the_only_video.get('stretched_ratio'),
105bfd90 2190 'aspect_ratio': the_only_video.get('aspect_ratio'),
f8d4ad9a 2191 })
2192
2193 if the_only_audio:
2194 new_dict.update({
2195 'acodec': the_only_audio.get('acodec'),
2196 'abr': the_only_audio.get('abr'),
975a0d0d 2197 'asr': the_only_audio.get('asr'),
b8ed0f15 2198 'audio_channels': the_only_audio.get('audio_channels')
f8d4ad9a 2199 })
2200
2201 return new_dict
2202
e8e73840 2203 def _check_formats(formats):
981052c9 2204 if not check_formats:
2205 yield from formats
b5ac45b1 2206 return
9f1a1c36 2207 yield from self._check_formats(formats)
e8e73840 2208
67134eab 2209 def _build_selector_function(selector):
909d24dd 2210 if isinstance(selector, list): # ,
67134eab
JMF
2211 fs = [_build_selector_function(s) for s in selector]
2212
317f7ab6 2213 def selector_function(ctx):
67134eab 2214 for f in fs:
981052c9 2215 yield from f(ctx)
67134eab 2216 return selector_function
909d24dd 2217
2218 elif selector.type == GROUP: # ()
0130afb7 2219 selector_function = _build_selector_function(selector.selector)
909d24dd 2220
2221 elif selector.type == PICKFIRST: # /
67134eab
JMF
2222 fs = [_build_selector_function(s) for s in selector.selector]
2223
317f7ab6 2224 def selector_function(ctx):
67134eab 2225 for f in fs:
317f7ab6 2226 picked_formats = list(f(ctx))
67134eab
JMF
2227 if picked_formats:
2228 return picked_formats
2229 return []
67134eab 2230
981052c9 2231 elif selector.type == MERGE: # +
2232 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2233
2234 def selector_function(ctx):
adbc4ec4 2235 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2236 yield _merge(pair)
2237
909d24dd 2238 elif selector.type == SINGLE: # atom
598d185d 2239 format_spec = selector.selector or 'best'
909d24dd 2240
f8d4ad9a 2241 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2242 if format_spec == 'all':
2243 def selector_function(ctx):
9222c381 2244 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2245 elif format_spec == 'mergeall':
2246 def selector_function(ctx):
316f2650 2247 formats = list(_check_formats(
2248 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2249 if not formats:
2250 return
921b76ca 2251 merged_format = formats[-1]
2252 for f in formats[-2::-1]:
f8d4ad9a 2253 merged_format = _merge((merged_format, f))
2254 yield merged_format
909d24dd 2255
2256 else:
85e801a9 2257 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2258 mobj = re.match(
2259 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2260 format_spec)
2261 if mobj is not None:
2262 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2263 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2264 format_type = (mobj.group('type') or [None])[0]
2265 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2266 format_modified = mobj.group('mod') is not None
909d24dd 2267
2268 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2269 _filter_f = (
eff63539 2270 (lambda f: f.get('%scodec' % format_type) != 'none')
2271 if format_type and format_modified # bv*, ba*, wv*, wa*
2272 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2273 if format_type # bv, ba, wv, wa
2274 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2275 if not format_modified # b, w
8326b00a 2276 else lambda f: True) # b*, w*
2277 filter_f = lambda f: _filter_f(f) and (
2278 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2279 else:
48ee10ee 2280 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2281 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2282 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2283 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2284 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2285 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2286 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2287 else:
b5ae35ee 2288 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2289
2290 def selector_function(ctx):
2291 formats = list(ctx['formats'])
909d24dd 2292 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2293 if not matches:
2294 if format_fallback and ctx['incomplete_formats']:
2295 # for extractors with incomplete formats (audio only (soundcloud)
2296 # or video only (imgur)) best/worst will fallback to
2297 # best/worst {video,audio}-only format
2298 matches = formats
2299 elif seperate_fallback and not ctx['has_merged_format']:
2300 # for compatibility with youtube-dl when there is no pre-merged format
2301 matches = list(filter(seperate_fallback, formats))
981052c9 2302 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2303 try:
e8e73840 2304 yield matches[format_idx - 1]
4abea8ca 2305 except LazyList.IndexError:
981052c9 2306 return
083c9df9 2307
67134eab 2308 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2309
317f7ab6 2310 def final_selector(ctx):
adbc4ec4 2311 ctx_copy = dict(ctx)
67134eab 2312 for _filter in filters:
317f7ab6
S
2313 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2314 return selector_function(ctx_copy)
67134eab 2315 return final_selector
083c9df9 2316
0f06bcd7 2317 stream = io.BytesIO(format_spec.encode())
0130afb7 2318 try:
f9934b96 2319 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2320 except tokenize.TokenError:
2321 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2322
86e5f3ed 2323 class TokenIterator:
0130afb7
JMF
2324 def __init__(self, tokens):
2325 self.tokens = tokens
2326 self.counter = 0
2327
2328 def __iter__(self):
2329 return self
2330
2331 def __next__(self):
2332 if self.counter >= len(self.tokens):
2333 raise StopIteration()
2334 value = self.tokens[self.counter]
2335 self.counter += 1
2336 return value
2337
2338 next = __next__
2339
2340 def restore_last_token(self):
2341 self.counter -= 1
2342
2343 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2344 return _build_selector_function(parsed_selector)
a9c58ad9 2345
e5660ee6 2346 def _calc_headers(self, info_dict):
8b7539d2 2347 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6 2348
c487cf00 2349 cookies = self._calc_cookies(info_dict['url'])
e5660ee6
JMF
2350 if cookies:
2351 res['Cookie'] = cookies
2352
0016b84e
S
2353 if 'X-Forwarded-For' not in res:
2354 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2355 if x_forwarded_for_ip:
2356 res['X-Forwarded-For'] = x_forwarded_for_ip
2357
e5660ee6
JMF
2358 return res
2359
c487cf00 2360 def _calc_cookies(self, url):
2361 pr = sanitized_Request(url)
e5660ee6 2362 self.cookiejar.add_cookie_header(pr)
662435f7 2363 return pr.get_header('Cookie')
e5660ee6 2364
9f1a1c36 2365 def _sort_thumbnails(self, thumbnails):
2366 thumbnails.sort(key=lambda t: (
2367 t.get('preference') if t.get('preference') is not None else -1,
2368 t.get('width') if t.get('width') is not None else -1,
2369 t.get('height') if t.get('height') is not None else -1,
2370 t.get('id') if t.get('id') is not None else '',
2371 t.get('url')))
2372
b0249bca 2373 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2374 thumbnails = info_dict.get('thumbnails')
2375 if thumbnails is None:
2376 thumbnail = info_dict.get('thumbnail')
2377 if thumbnail:
2378 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2379 if not thumbnails:
2380 return
2381
2382 def check_thumbnails(thumbnails):
2383 for t in thumbnails:
2384 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2385 try:
2386 self.urlopen(HEADRequest(t['url']))
2387 except network_exceptions as err:
2388 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2389 continue
2390 yield t
2391
2392 self._sort_thumbnails(thumbnails)
2393 for i, t in enumerate(thumbnails):
2394 if t.get('id') is None:
2395 t['id'] = '%d' % i
2396 if t.get('width') and t.get('height'):
2397 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2398 t['url'] = sanitize_url(t['url'])
2399
2400 if self.params.get('check_formats') is True:
282f5709 2401 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2402 else:
2403 info_dict['thumbnails'] = thumbnails
bc516a3f 2404
94dc8604 2405 def _fill_common_fields(self, info_dict, final=True):
03f83004 2406 # TODO: move sanitization here
94dc8604 2407 if final:
d4736fdb 2408 title = info_dict.get('title', NO_DEFAULT)
2409 if title is NO_DEFAULT:
03f83004
LNO
2410 raise ExtractorError('Missing "title" field in extractor result',
2411 video_id=info_dict['id'], ie=info_dict['extractor'])
d4736fdb 2412 info_dict['fulltitle'] = title
2413 if not title:
2414 if title == '':
2415 self.write_debug('Extractor gave empty title. Creating a generic title')
2416 else:
2417 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2418 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2419
2420 if info_dict.get('duration') is not None:
2421 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2422
2423 for ts_key, date_key in (
2424 ('timestamp', 'upload_date'),
2425 ('release_timestamp', 'release_date'),
2426 ('modified_timestamp', 'modified_date'),
2427 ):
2428 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2429 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2430 # see http://bugs.python.org/issue1646728)
19a03940 2431 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2432 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2433 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2434
2435 live_keys = ('is_live', 'was_live')
2436 live_status = info_dict.get('live_status')
2437 if live_status is None:
2438 for key in live_keys:
2439 if info_dict.get(key) is False:
2440 continue
2441 if info_dict.get(key):
2442 live_status = key
2443 break
2444 if all(info_dict.get(key) is False for key in live_keys):
2445 live_status = 'not_live'
2446 if live_status:
2447 info_dict['live_status'] = live_status
2448 for key in live_keys:
2449 if info_dict.get(key) is None:
2450 info_dict[key] = (live_status == key)
a057779d 2451 if live_status == 'post_live':
2452 info_dict['was_live'] = True
03f83004
LNO
2453
2454 # Auto generate title fields corresponding to the *_number fields when missing
2455 # in order to always have clean titles. This is very common for TV series.
2456 for field in ('chapter', 'season', 'episode'):
94dc8604 2457 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
03f83004
LNO
2458 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2459
415f8d51 2460 def _raise_pending_errors(self, info):
2461 err = info.pop('__pending_error', None)
2462 if err:
2463 self.report_error(err, tb=False)
2464
784320c9 2465 def sort_formats(self, info_dict):
2466 formats = self._get_formats(info_dict)
2467 if not formats:
2468 return
2469 # Backward compatibility with InfoExtractor._sort_formats
2470 field_preference = formats[0].pop('__sort_fields', None)
2471 if field_preference:
2472 info_dict['_format_sort_fields'] = field_preference
2473
2474 formats.sort(key=FormatSorter(
2475 self, info_dict.get('_format_sort_fields', [])).calculate_preference)
2476
dd82ffea
JMF
2477 def process_video_result(self, info_dict, download=True):
2478 assert info_dict.get('_type', 'video') == 'video'
9c906919 2479 self._num_videos += 1
dd82ffea 2480
bec1fad2 2481 if 'id' not in info_dict:
fc08bdd6 2482 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2483 elif not info_dict.get('id'):
2484 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2485
c9969434
S
2486 def report_force_conversion(field, field_not, conversion):
2487 self.report_warning(
2488 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2489 % (field, field_not, conversion))
2490
2491 def sanitize_string_field(info, string_field):
2492 field = info.get(string_field)
14f25df2 2493 if field is None or isinstance(field, str):
c9969434
S
2494 return
2495 report_force_conversion(string_field, 'a string', 'string')
14f25df2 2496 info[string_field] = str(field)
c9969434
S
2497
2498 def sanitize_numeric_fields(info):
2499 for numeric_field in self._NUMERIC_FIELDS:
2500 field = info.get(numeric_field)
f9934b96 2501 if field is None or isinstance(field, (int, float)):
c9969434
S
2502 continue
2503 report_force_conversion(numeric_field, 'numeric', 'int')
2504 info[numeric_field] = int_or_none(field)
2505
2506 sanitize_string_field(info_dict, 'id')
2507 sanitize_numeric_fields(info_dict)
3975b4d2 2508 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2509 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2510 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2511 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2512
9eef7c4e 2513 chapters = info_dict.get('chapters') or []
a3976e07 2514 if chapters and chapters[0].get('start_time'):
2515 chapters.insert(0, {'start_time': 0})
2516
9eef7c4e 2517 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
a3976e07 2518 for idx, (prev, current, next_) in enumerate(zip(
2519 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
9eef7c4e 2520 if current.get('start_time') is None:
2521 current['start_time'] = prev.get('end_time')
2522 if not current.get('end_time'):
2523 current['end_time'] = next_.get('start_time')
a3976e07 2524 if not current.get('title'):
2525 current['title'] = f'<Untitled Chapter {idx}>'
9eef7c4e 2526
dd82ffea
JMF
2527 if 'playlist' not in info_dict:
2528 # It isn't part of a playlist
2529 info_dict['playlist'] = None
2530 info_dict['playlist_index'] = None
2531
bc516a3f 2532 self._sanitize_thumbnails(info_dict)
d5519808 2533
536a55da 2534 thumbnail = info_dict.get('thumbnail')
bc516a3f 2535 thumbnails = info_dict.get('thumbnails')
536a55da
S
2536 if thumbnail:
2537 info_dict['thumbnail'] = sanitize_url(thumbnail)
2538 elif thumbnails:
d5519808
PH
2539 info_dict['thumbnail'] = thumbnails[-1]['url']
2540
ae30b840 2541 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2542 info_dict['display_id'] = info_dict['id']
2543
03f83004 2544 self._fill_common_fields(info_dict)
33d2fc2f 2545
05108a49
S
2546 for cc_kind in ('subtitles', 'automatic_captions'):
2547 cc = info_dict.get(cc_kind)
2548 if cc:
2549 for _, subtitle in cc.items():
2550 for subtitle_format in subtitle:
2551 if subtitle_format.get('url'):
2552 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2553 if subtitle_format.get('ext') is None:
2554 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2555
2556 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2557 subtitles = info_dict.get('subtitles')
4bba3716 2558
360e1ca5 2559 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2560 info_dict['id'], subtitles, automatic_captions)
a504ced0 2561
784320c9 2562 self.sort_formats(info_dict)
aebb4f4b 2563 formats = self._get_formats(info_dict)
dd82ffea 2564
0a5a191a 2565 # or None ensures --clean-infojson removes it
2566 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2567 if not self.params.get('allow_unplayable_formats'):
2568 formats = [f for f in formats if not f.get('has_drm')]
17ffed18 2569
2570 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2571 self.report_warning(
2572 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2573 'only images are available for download. Use --list-formats to see them'.capitalize())
88acdbc2 2574
319b6059 2575 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2576 if not get_from_start:
2577 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2578 if info_dict.get('is_live') and formats:
adbc4ec4 2579 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2580 if get_from_start and not formats:
a44ca5a4 2581 self.raise_no_formats(info_dict, msg=(
2582 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2583 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2584
73af5cc8
S
2585 def is_wellformed(f):
2586 url = f.get('url')
a5ac0c47 2587 if not url:
73af5cc8
S
2588 self.report_warning(
2589 '"url" field is missing or empty - skipping format, '
2590 'there is an error in extractor')
a5ac0c47
S
2591 return False
2592 if isinstance(url, bytes):
2593 sanitize_string_field(f, 'url')
2594 return True
73af5cc8
S
2595
2596 # Filter out malformed formats for better extraction robustness
1ac7f461 2597 formats = list(filter(is_wellformed, formats or []))
2598
2599 if not formats:
2600 self.raise_no_formats(info_dict)
73af5cc8 2601
181c7053
S
2602 formats_dict = {}
2603
dd82ffea 2604 # We check that all the formats have the format and format_id fields
db95dc13 2605 for i, format in enumerate(formats):
c9969434
S
2606 sanitize_string_field(format, 'format_id')
2607 sanitize_numeric_fields(format)
dcf77cf1 2608 format['url'] = sanitize_url(format['url'])
e74e3b63 2609 if not format.get('format_id'):
14f25df2 2610 format['format_id'] = str(i)
e2effb08
S
2611 else:
2612 # Sanitize format_id from characters used in format selector expression
ec85ded8 2613 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2614 format_id = format['format_id']
2615 if format_id not in formats_dict:
2616 formats_dict[format_id] = []
2617 formats_dict[format_id].append(format)
2618
2619 # Make sure all formats have unique format_id
03b4de72 2620 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2621 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2622 ambigious_id = len(ambiguous_formats) > 1
2623 for i, format in enumerate(ambiguous_formats):
2624 if ambigious_id:
181c7053 2625 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2626 if format.get('ext') is None:
2627 format['ext'] = determine_ext(format['url']).lower()
2628 # Ensure there is no conflict between id and ext in format selection
2629 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2630 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2631 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2632
2633 for i, format in enumerate(formats):
8c51aa65 2634 if format.get('format') is None:
6febd1c1 2635 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2636 id=format['format_id'],
2637 res=self.format_resolution(format),
b868936c 2638 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2639 )
6f0be937 2640 if format.get('protocol') is None:
b5559424 2641 format['protocol'] = determine_protocol(format)
239df021 2642 if format.get('resolution') is None:
2643 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2644 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2645 format['dynamic_range'] = 'SDR'
105bfd90 2646 if format.get('aspect_ratio') is None:
2647 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
f2fe69c7 2648 if (info_dict.get('duration') and format.get('tbr')
2649 and not format.get('filesize') and not format.get('filesize_approx')):
56ba69e4 2650 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
f2fe69c7 2651
e5660ee6
JMF
2652 # Add HTTP headers, so that external programs can use them from the
2653 # json output
2654 full_format_info = info_dict.copy()
2655 full_format_info.update(format)
2656 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2657 # Remove private housekeeping stuff
2658 if '__x_forwarded_for_ip' in info_dict:
2659 del info_dict['__x_forwarded_for_ip']
dd82ffea 2660
9f1a1c36 2661 if self.params.get('check_formats') is True:
282f5709 2662 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2663
88acdbc2 2664 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2665 # only set the 'formats' fields if the original info_dict list them
2666 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2667 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2668 # which can't be exported to json
b3d9ef88 2669 info_dict['formats'] = formats
4ec82a72 2670
2671 info_dict, _ = self.pre_process(info_dict)
2672
6db9c4d5 2673 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2674 return info_dict
2675
2676 self.post_extract(info_dict)
2677 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2678
093a1710 2679 # The pre-processors may have modified the formats
aebb4f4b 2680 formats = self._get_formats(info_dict)
093a1710 2681
e4221b70 2682 list_only = self.params.get('simulate') == 'list_only'
fa9f30b8 2683 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2684 if self.params.get('list_thumbnails'):
2685 self.list_thumbnails(info_dict)
b7b04c78 2686 if self.params.get('listsubtitles'):
2687 if 'automatic_captions' in info_dict:
2688 self.list_subtitles(
2689 info_dict['id'], automatic_captions, 'automatic captions')
2690 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2691 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2692 self.list_formats(info_dict)
169dbde9 2693 if list_only:
b7b04c78 2694 # Without this printing, -F --print-json will not work
169dbde9 2695 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
c487cf00 2696 return info_dict
bfaae0a7 2697
187986a8 2698 format_selector = self.format_selector
2699 if format_selector is None:
0017d9ad 2700 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2701 self.write_debug('Default format spec: %s' % req_format)
187986a8 2702 format_selector = self.build_format_selector(req_format)
317f7ab6 2703
fa9f30b8 2704 while True:
2705 if interactive_format_selection:
2706 req_format = input(
2707 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2708 try:
2709 format_selector = self.build_format_selector(req_format)
2710 except SyntaxError as err:
2711 self.report_error(err, tb=False, is_error=False)
2712 continue
2713
85e801a9 2714 formats_to_download = list(format_selector({
fa9f30b8 2715 'formats': formats,
85e801a9 2716 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2717 'incomplete_formats': (
2718 # All formats are video-only or
2719 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2720 # all formats are audio-only
2721 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2722 }))
fa9f30b8 2723 if interactive_format_selection and not formats_to_download:
2724 self.report_error('Requested format is not available', tb=False, is_error=False)
2725 continue
2726 break
317f7ab6 2727
dd82ffea 2728 if not formats_to_download:
b7da73eb 2729 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2730 raise ExtractorError(
2731 'Requested format is not available. Use --list-formats for a list of available formats',
2732 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2733 self.report_warning('Requested format is not available')
2734 # Process what we can, even without any available formats.
2735 formats_to_download = [{}]
a13e6848 2736
0500ee3d 2737 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
5ec1b6b7 2738 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2739 if download:
0500ee3d 2740 if best_format and requested_ranges:
5ec1b6b7 2741 def to_screen(*msg):
2742 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2743
2744 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2745 (f['format_id'] for f in formats_to_download))
0500ee3d 2746 if requested_ranges != ({}, ):
5ec1b6b7 2747 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
fc2ba496 2748 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
a13e6848 2749 max_downloads_reached = False
5ec1b6b7 2750
0500ee3d 2751 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
5ec1b6b7 2752 new_info = self._copy_infodict(info_dict)
b7da73eb 2753 new_info.update(fmt)
3975b4d2 2754 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
fc2ba496 2755 end_time = offset + min(chapter.get('end_time', duration), duration)
3975b4d2 2756 if chapter or offset:
5ec1b6b7 2757 new_info.update({
3975b4d2 2758 'section_start': offset + chapter.get('start_time', 0),
2576d53a 2759 # duration may not be accurate. So allow deviations <1sec
2760 'section_end': end_time if end_time <= offset + duration + 1 else None,
5ec1b6b7 2761 'section_title': chapter.get('title'),
2762 'section_number': chapter.get('index'),
2763 })
2764 downloaded_formats.append(new_info)
a13e6848 2765 try:
2766 self.process_info(new_info)
2767 except MaxDownloadsReached:
2768 max_downloads_reached = True
415f8d51 2769 self._raise_pending_errors(new_info)
f46e2f9d 2770 # Remove copied info
2771 for key, val in tuple(new_info.items()):
2772 if info_dict.get(key) == val:
2773 new_info.pop(key)
a13e6848 2774 if max_downloads_reached:
2775 break
ebed8b37 2776
5ec1b6b7 2777 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 2778 assert write_archive.issubset({True, False, 'ignore'})
2779 if True in write_archive and False not in write_archive:
2780 self.record_download_archive(info_dict)
be72c624 2781
5ec1b6b7 2782 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 2783 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2784 if max_downloads_reached:
2785 raise MaxDownloadsReached()
ebed8b37 2786
49a57e70 2787 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2788 info_dict.update(best_format)
dd82ffea
JMF
2789 return info_dict
2790
98c70d6f 2791 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2792 """Select the requested subtitles and their format"""
d8a58ddc 2793 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2794 if normal_subtitles and self.params.get('writesubtitles'):
2795 available_subs.update(normal_subtitles)
d8a58ddc 2796 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2797 if automatic_captions and self.params.get('writeautomaticsub'):
2798 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2799 if lang not in available_subs:
2800 available_subs[lang] = cap_info
2801
d2c8aadf 2802 if not available_subs or (
2803 not self.params.get('writesubtitles')
2804 and not self.params.get('writeautomaticsub')):
4d171848 2805 return None
a504ced0 2806
d8a58ddc 2807 all_sub_langs = tuple(available_subs.keys())
a504ced0 2808 if self.params.get('allsubtitles', False):
c32b0aab 2809 requested_langs = all_sub_langs
2810 elif self.params.get('subtitleslangs', False):
5314b521 2811 try:
2812 requested_langs = orderedSet_from_options(
2813 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2814 except re.error as e:
2815 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
d8a58ddc 2816 elif normal_sub_langs:
2817 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
a504ced0 2818 else:
d8a58ddc 2819 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
ad3dc496 2820 if requested_langs:
d2c8aadf 2821 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
a504ced0
JMF
2822
2823 formats_query = self.params.get('subtitlesformat', 'best')
2824 formats_preference = formats_query.split('/') if formats_query else []
2825 subs = {}
2826 for lang in requested_langs:
2827 formats = available_subs.get(lang)
2828 if formats is None:
86e5f3ed 2829 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2830 continue
a504ced0
JMF
2831 for ext in formats_preference:
2832 if ext == 'best':
2833 f = formats[-1]
2834 break
2835 matches = list(filter(lambda f: f['ext'] == ext, formats))
2836 if matches:
2837 f = matches[-1]
2838 break
2839 else:
2840 f = formats[-1]
2841 self.report_warning(
2842 'No subtitle format found matching "%s" for language %s, '
2843 'using %s' % (formats_query, lang, f['ext']))
2844 subs[lang] = f
2845 return subs
2846
bb66c247 2847 def _forceprint(self, key, info_dict):
2848 if info_dict is None:
2849 return
2850 info_copy = info_dict.copy()
2851 info_copy['formats_table'] = self.render_formats_table(info_dict)
2852 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2853 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2854 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2855
2856 def format_tmpl(tmpl):
48c8424b 2857 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
07a1250e 2858 if not mobj:
2859 return tmpl
48c8424b 2860
2861 fmt = '%({})s'
2862 if tmpl.startswith('{'):
2863 tmpl = f'.{tmpl}'
2864 if tmpl.endswith('='):
2865 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
2866 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
8130779d 2867
bb66c247 2868 for tmpl in self.params['forceprint'].get(key, []):
2869 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2870
2871 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2872 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2873 tmpl = format_tmpl(tmpl)
2874 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2875 if self._ensure_dir_exists(filename):
86e5f3ed 2876 with open(filename, 'a', encoding='utf-8') as f:
8d93e69d 2877 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2878
d06daf23 2879 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2880 def print_mandatory(field, actual_field=None):
2881 if actual_field is None:
2882 actual_field = field
d06daf23 2883 if (self.params.get('force%s' % field, False)
53c18592 2884 and (not incomplete or info_dict.get(actual_field) is not None)):
2885 self.to_stdout(info_dict[actual_field])
d06daf23
S
2886
2887 def print_optional(field):
2888 if (self.params.get('force%s' % field, False)
2889 and info_dict.get(field) is not None):
2890 self.to_stdout(info_dict[field])
2891
53c18592 2892 info_dict = info_dict.copy()
2893 if filename is not None:
2894 info_dict['filename'] = filename
2895 if info_dict.get('requested_formats') is not None:
2896 # For RTMP URLs, also include the playpath
2897 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2898 elif info_dict.get('url'):
53c18592 2899 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2900
bb66c247 2901 if (self.params.get('forcejson')
2902 or self.params['forceprint'].get('video')
2903 or self.params['print_to_file'].get('video')):
2b8a2973 2904 self.post_extract(info_dict)
bb66c247 2905 self._forceprint('video', info_dict)
53c18592 2906
d06daf23
S
2907 print_mandatory('title')
2908 print_mandatory('id')
53c18592 2909 print_mandatory('url', 'urls')
d06daf23
S
2910 print_optional('thumbnail')
2911 print_optional('description')
53c18592 2912 print_optional('filename')
b868936c 2913 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2914 self.to_stdout(formatSeconds(info_dict['duration']))
2915 print_mandatory('format')
53c18592 2916
2b8a2973 2917 if self.params.get('forcejson'):
6e84b215 2918 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2919
e8e73840 2920 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2921 if not info.get('url'):
1151c407 2922 self.raise_no_formats(info, True)
e8e73840 2923
2924 if test:
2925 verbose = self.params.get('verbose')
2926 params = {
2927 'test': True,
a169858f 2928 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2929 'verbose': verbose,
2930 'noprogress': not verbose,
2931 'nopart': True,
2932 'skip_unavailable_fragments': False,
2933 'keep_fragments': False,
2934 'overwrites': True,
2935 '_no_ytdl_file': True,
2936 }
2937 else:
2938 params = self.params
96fccc10 2939 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2940 if not test:
2941 for ph in self._progress_hooks:
2942 fd.add_progress_hook(ph)
42676437
M
2943 urls = '", "'.join(
2944 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2945 for f in info.get('requested_formats', []) or [info])
3a408f9d 2946 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 2947
adbc4ec4
THD
2948 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2949 # But it may contain objects that are not deep-copyable
2950 new_info = self._copy_infodict(info)
e8e73840 2951 if new_info.get('http_headers') is None:
2952 new_info['http_headers'] = self._calc_headers(new_info)
2953 return fd.download(name, new_info, subtitle)
2954
e04938ab 2955 def existing_file(self, filepaths, *, default_overwrite=True):
2956 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2957 if existing_files and not self.params.get('overwrites', default_overwrite):
2958 return existing_files[0]
2959
2960 for file in existing_files:
2961 self.report_file_delete(file)
2962 os.remove(file)
2963 return None
2964
8222d8de 2965 def process_info(self, info_dict):
09b49e1f 2966 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2967
2968 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2969 original_infodict = info_dict
fd288278 2970
4513a41a 2971 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2972 info_dict['format'] = info_dict['ext']
2973
c77495e3 2974 if self._match_entry(info_dict) is not None:
9e907ebd 2975 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2976 return
2977
09b49e1f 2978 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2979 self.post_extract(info_dict)
119e40ef 2980
2981 def replace_info_dict(new_info):
2982 nonlocal info_dict
2983 if new_info == info_dict:
2984 return
2985 info_dict.clear()
2986 info_dict.update(new_info)
2987
2988 new_info, _ = self.pre_process(info_dict, 'video')
2989 replace_info_dict(new_info)
0c14d66a 2990 self._num_downloads += 1
8222d8de 2991
dcf64d43 2992 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2993 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2994 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2995 files_to_move = {}
8222d8de
JMF
2996
2997 # Forced printings
4513a41a 2998 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2999
ca6d59d2 3000 def check_max_downloads():
3001 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3002 raise MaxDownloadsReached()
3003
b7b04c78 3004 if self.params.get('simulate'):
9e907ebd 3005 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 3006 check_max_downloads()
8222d8de
JMF
3007 return
3008
de6000d9 3009 if full_filename is None:
8222d8de 3010 return
e92caff5 3011 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 3012 return
e92caff5 3013 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
3014 return
3015
80c03fa9 3016 if self._write_description('video', info_dict,
3017 self.prepare_filename(info_dict, 'description')) is None:
3018 return
3019
3020 sub_files = self._write_subtitles(info_dict, temp_filename)
3021 if sub_files is None:
3022 return
3023 files_to_move.update(dict(sub_files))
3024
3025 thumb_files = self._write_thumbnails(
3026 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3027 if thumb_files is None:
3028 return
3029 files_to_move.update(dict(thumb_files))
8222d8de 3030
80c03fa9 3031 infofn = self.prepare_filename(info_dict, 'infojson')
3032 _infojson_written = self._write_info_json('video', info_dict, infofn)
3033 if _infojson_written:
dac5df5a 3034 info_dict['infojson_filename'] = infofn
e75bb0d6 3035 # For backward compatibility, even though it was a private field
80c03fa9 3036 info_dict['__infojson_filename'] = infofn
3037 elif _infojson_written is None:
3038 return
3039
3040 # Note: Annotations are deprecated
3041 annofn = None
1fb07d10 3042 if self.params.get('writeannotations', False):
de6000d9 3043 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 3044 if annofn:
e92caff5 3045 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 3046 return
0c3d0f51 3047 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 3048 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
3049 elif not info_dict.get('annotations'):
3050 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
3051 else:
3052 try:
6febd1c1 3053 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 3054 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
3055 annofile.write(info_dict['annotations'])
3056 except (KeyError, TypeError):
6febd1c1 3057 self.report_warning('There are no annotations to write.')
86e5f3ed 3058 except OSError:
6febd1c1 3059 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 3060 return
1fb07d10 3061
732044af 3062 # Write internet shortcut files
08438d2c 3063 def _write_link_file(link_type):
60f3e995 3064 url = try_get(info_dict['webpage_url'], iri_to_uri)
3065 if not url:
3066 self.report_warning(
3067 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3068 return True
08438d2c 3069 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
3070 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3071 return False
10e3742e 3072 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 3073 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3074 return True
3075 try:
3076 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 3077 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3078 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 3079 template_vars = {'url': url}
08438d2c 3080 if link_type == 'desktop':
3081 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3082 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 3083 except OSError:
08438d2c 3084 self.report_error(f'Cannot write internet shortcut {linkfn}')
3085 return False
732044af 3086 return True
3087
08438d2c 3088 write_links = {
3089 'url': self.params.get('writeurllink'),
3090 'webloc': self.params.get('writewebloclink'),
3091 'desktop': self.params.get('writedesktoplink'),
3092 }
3093 if self.params.get('writelink'):
3094 link_type = ('webloc' if sys.platform == 'darwin'
3095 else 'desktop' if sys.platform.startswith('linux')
3096 else 'url')
3097 write_links[link_type] = True
3098
3099 if any(should_write and not _write_link_file(link_type)
3100 for link_type, should_write in write_links.items()):
3101 return
732044af 3102
415f8d51 3103 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3104 replace_info_dict(new_info)
56d868db 3105
a13e6848 3106 if self.params.get('skip_download'):
56d868db 3107 info_dict['filepath'] = temp_filename
3108 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3109 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3110 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3111 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3112 else:
3113 # Download
b868936c 3114 info_dict.setdefault('__postprocessors', [])
4340deca 3115 try:
0202b52a 3116
e04938ab 3117 def existing_video_file(*filepaths):
6b591b29 3118 ext = info_dict.get('ext')
e04938ab 3119 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3120 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3121 default_overwrite=False)
3122 if file:
3123 info_dict['ext'] = os.path.splitext(file)[1][1:]
3124 return file
0202b52a 3125
7b2c3f47 3126 fd, success = None, True
fccf90e7 3127 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3128 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
71df9b7f 3129 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
56ba69e4 3130 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3131 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3132 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3133 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3134 return
5ec1b6b7 3135
4340deca 3136 if info_dict.get('requested_formats') is not None:
81cd954a 3137 requested_formats = info_dict['requested_formats']
0202b52a 3138 old_ext = info_dict['ext']
4e3b637d 3139 if self.params.get('merge_output_format') is None:
4e3b637d 3140 if (info_dict['ext'] == 'webm'
3141 and info_dict.get('thumbnails')
3142 # check with type instead of pp_key, __name__, or isinstance
3143 # since we dont want any custom PPs to trigger this
c487cf00 3144 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3145 info_dict['ext'] = 'mkv'
3146 self.report_warning(
3147 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3148 new_ext = info_dict['ext']
0202b52a 3149
124bc071 3150 def correct_ext(filename, ext=new_ext):
96fccc10 3151 if filename == '-':
3152 return filename
0202b52a 3153 filename_real_ext = os.path.splitext(filename)[1][1:]
3154 filename_wo_ext = (
3155 os.path.splitext(filename)[0]
124bc071 3156 if filename_real_ext in (old_ext, new_ext)
0202b52a 3157 else filename)
86e5f3ed 3158 return f'{filename_wo_ext}.{ext}'
0202b52a 3159
38c6902b 3160 # Ensure filename always has a correct extension for successful merge
0202b52a 3161 full_filename = correct_ext(full_filename)
3162 temp_filename = correct_ext(temp_filename)
e04938ab 3163 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3164 info_dict['__real_download'] = False
18e674b4 3165
7b2c3f47 3166 merger = FFmpegMergerPP(self)
adbc4ec4 3167 downloaded = []
dbf5416a 3168 if dl_filename is not None:
6c7274ec 3169 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3170 elif fd:
3171 for f in requested_formats if fd != FFmpegFD else []:
3172 f['filepath'] = fname = prepend_extension(
3173 correct_ext(temp_filename, info_dict['ext']),
3174 'f%s' % f['format_id'], info_dict['ext'])
3175 downloaded.append(fname)
dbf5416a 3176 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3177 success, real_download = self.dl(temp_filename, info_dict)
3178 info_dict['__real_download'] = real_download
18e674b4 3179 else:
18e674b4 3180 if self.params.get('allow_unplayable_formats'):
3181 self.report_warning(
3182 'You have requested merging of multiple formats '
3183 'while also allowing unplayable formats to be downloaded. '
3184 'The formats won\'t be merged to prevent data corruption.')
3185 elif not merger.available:
e8969bda 3186 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3187 if not self.params.get('ignoreerrors'):
3188 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3189 return
3190 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3191
96fccc10 3192 if temp_filename == '-':
adbc4ec4 3193 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3194 else 'but the formats are incompatible for simultaneous download' if merger.available
3195 else 'but ffmpeg is not installed')
3196 self.report_warning(
3197 f'You have requested downloading multiple formats to stdout {reason}. '
3198 'The formats will be streamed one after the other')
3199 fname = temp_filename
dbf5416a 3200 for f in requested_formats:
3201 new_info = dict(info_dict)
3202 del new_info['requested_formats']
3203 new_info.update(f)
96fccc10 3204 if temp_filename != '-':
124bc071 3205 fname = prepend_extension(
3206 correct_ext(temp_filename, new_info['ext']),
3207 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3208 if not self._ensure_dir_exists(fname):
3209 return
a21e0ab1 3210 f['filepath'] = fname
96fccc10 3211 downloaded.append(fname)
dbf5416a 3212 partial_success, real_download = self.dl(fname, new_info)
3213 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3214 success = success and partial_success
adbc4ec4
THD
3215
3216 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3217 info_dict['__postprocessors'].append(merger)
3218 info_dict['__files_to_merge'] = downloaded
3219 # Even if there were no downloads, it is being merged only now
3220 info_dict['__real_download'] = True
3221 else:
3222 for file in downloaded:
3223 files_to_move[file] = None
4340deca
P
3224 else:
3225 # Just a single file
e04938ab 3226 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3227 if dl_filename is None or dl_filename == temp_filename:
3228 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3229 # So we should try to resume the download
e8e73840 3230 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3231 info_dict['__real_download'] = real_download
6c7274ec 3232 else:
3233 self.report_file_already_downloaded(dl_filename)
0202b52a 3234
0202b52a 3235 dl_filename = dl_filename or temp_filename
c571435f 3236 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3237
3158150c 3238 except network_exceptions as err:
7960b056 3239 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3240 return
86e5f3ed 3241 except OSError as err:
4340deca
P
3242 raise UnavailableVideoError(err)
3243 except (ContentTooShortError, ) as err:
86e5f3ed 3244 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3245 return
8222d8de 3246
415f8d51 3247 self._raise_pending_errors(info_dict)
de6000d9 3248 if success and full_filename != '-':
f17f8651 3249
fd7cfb64 3250 def fixup():
3251 do_fixup = True
3252 fixup_policy = self.params.get('fixup')
3253 vid = info_dict['id']
3254
3255 if fixup_policy in ('ignore', 'never'):
3256 return
3257 elif fixup_policy == 'warn':
3fe75fdc 3258 do_fixup = 'warn'
f89b3e2d 3259 elif fixup_policy != 'force':
3260 assert fixup_policy in ('detect_or_warn', None)
3261 if not info_dict.get('__real_download'):
3262 do_fixup = False
fd7cfb64 3263
3264 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3265 if not (do_fixup and cndn):
fd7cfb64 3266 return
3fe75fdc 3267 elif do_fixup == 'warn':
fd7cfb64 3268 self.report_warning(f'{vid}: {msg}')
3269 return
3270 pp = cls(self)
3271 if pp.available:
3272 info_dict['__postprocessors'].append(pp)
3273 else:
3274 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3275
3276 stretched_ratio = info_dict.get('stretched_ratio')
ca9def71
LNO
3277 ffmpeg_fixup(stretched_ratio not in (1, None),
3278 f'Non-uniform pixel ratio {stretched_ratio}',
3279 FFmpegFixupStretchedPP)
fd7cfb64 3280
993191c0 3281 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3282 downloader = downloader.FD_NAME if downloader else None
adbc4ec4 3283
ca9def71
LNO
3284 ext = info_dict.get('ext')
3285 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3286 isinstance(pp, FFmpegVideoConvertorPP)
3287 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3288 ) for pp in self._pps['post_process'])
3289
3290 if not postprocessed_by_ffmpeg:
3291 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
f2df4071 3292 'writing DASH m4a. Only some players support this container',
3293 FFmpegFixupM4aPP)
24146491 3294 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3295 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3296 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3297 FFmpegFixupM3u8PP)
3298 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3299 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3300
24146491 3301 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3302 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3303
3304 fixup()
8222d8de 3305 try:
f46e2f9d 3306 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3307 except PostProcessingError as err:
3308 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3309 return
ab8e5e51
AM
3310 try:
3311 for ph in self._post_hooks:
23c1a667 3312 ph(info_dict['filepath'])
ab8e5e51
AM
3313 except Exception as err:
3314 self.report_error('post hooks: %s' % str(err))
3315 return
9e907ebd 3316 info_dict['__write_download_archive'] = True
2d30509f 3317
c487cf00 3318 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3319 if self.params.get('force_write_download_archive'):
9e907ebd 3320 info_dict['__write_download_archive'] = True
ca6d59d2 3321 check_max_downloads()
8222d8de 3322
aa9369a2 3323 def __download_wrapper(self, func):
3324 @functools.wraps(func)
3325 def wrapper(*args, **kwargs):
3326 try:
3327 res = func(*args, **kwargs)
3328 except UnavailableVideoError as e:
3329 self.report_error(e)
b222c271 3330 except DownloadCancelled as e:
3331 self.to_screen(f'[info] {e}')
3332 if not self.params.get('break_per_url'):
3333 raise
fd404bec 3334 self._num_downloads = 0
aa9369a2 3335 else:
3336 if self.params.get('dump_single_json', False):
3337 self.post_extract(res)
3338 self.to_stdout(json.dumps(self.sanitize_info(res)))
3339 return wrapper
3340
8222d8de
JMF
3341 def download(self, url_list):
3342 """Download a given list of URLs."""
aa9369a2 3343 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3344 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3345 if (len(url_list) > 1
3346 and outtmpl != '-'
3347 and '%' not in outtmpl
3348 and self.params.get('max_downloads') != 1):
acd69589 3349 raise SameFileError(outtmpl)
8222d8de
JMF
3350
3351 for url in url_list:
aa9369a2 3352 self.__download_wrapper(self.extract_info)(
3353 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3354
3355 return self._download_retcode
3356
1dcc4c0c 3357 def download_with_info_file(self, info_filename):
31bd3925
JMF
3358 with contextlib.closing(fileinput.FileInput(
3359 [info_filename], mode='r',
3360 openhook=fileinput.hook_encoded('utf-8'))) as f:
3361 # FileInput doesn't have a read method, we can't call json.load
8012d892 3362 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3363 try:
aa9369a2 3364 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3365 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3366 if not isinstance(e, EntryNotInPlaylist):
3367 self.to_stderr('\r')
d4943898
JMF
3368 webpage_url = info.get('webpage_url')
3369 if webpage_url is not None:
aa9369a2 3370 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3371 return self.download([webpage_url])
3372 else:
3373 raise
3374 return self._download_retcode
1dcc4c0c 3375
cb202fd2 3376 @staticmethod
8012d892 3377 def sanitize_info(info_dict, remove_private_keys=False):
3378 ''' Sanitize the infodict for converting to json '''
3ad56b42 3379 if info_dict is None:
3380 return info_dict
6e84b215 3381 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3382 info_dict.setdefault('_type', 'video')
b5e7a2e6 3383 info_dict.setdefault('_version', {
3384 'version': __version__,
3385 'current_git_head': current_git_head(),
3386 'release_git_head': RELEASE_GIT_HEAD,
3387 'repository': REPOSITORY,
3388 })
09b49e1f 3389
8012d892 3390 if remove_private_keys:
0a5a191a 3391 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3392 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
0a5a191a 3393 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3394 }
ae8f99e6 3395 else:
09b49e1f 3396 reject = lambda k, v: False
adbc4ec4
THD
3397
3398 def filter_fn(obj):
3399 if isinstance(obj, dict):
3400 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3401 elif isinstance(obj, (list, tuple, set, LazyList)):
3402 return list(map(filter_fn, obj))
3403 elif obj is None or isinstance(obj, (str, int, float, bool)):
3404 return obj
3405 else:
3406 return repr(obj)
3407
5226731e 3408 return filter_fn(info_dict)
cb202fd2 3409
8012d892 3410 @staticmethod
3411 def filter_requested_info(info_dict, actually_filter=True):
3412 ''' Alias of sanitize_info for backward compatibility '''
3413 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3414
43d7f5a5 3415 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3416 for filename in set(filter(None, files_to_delete)):
3417 if msg:
3418 self.to_screen(msg % filename)
3419 try:
3420 os.remove(filename)
3421 except OSError:
3422 self.report_warning(f'Unable to delete file {filename}')
3423 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3424 del info['__files_to_move'][filename]
3425
ed5835b4 3426 @staticmethod
3427 def post_extract(info_dict):
3428 def actual_post_extract(info_dict):
3429 if info_dict.get('_type') in ('playlist', 'multi_video'):
3430 for video_dict in info_dict.get('entries', {}):
3431 actual_post_extract(video_dict or {})
3432 return
3433
09b49e1f 3434 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3435 info_dict.update(post_extractor())
ed5835b4 3436
3437 actual_post_extract(info_dict or {})
3438
dcf64d43 3439 def run_pp(self, pp, infodict):
5bfa4862 3440 files_to_delete = []
dcf64d43 3441 if '__files_to_move' not in infodict:
3442 infodict['__files_to_move'] = {}
b1940459 3443 try:
3444 files_to_delete, infodict = pp.run(infodict)
3445 except PostProcessingError as e:
3446 # Must be True and not 'only_download'
3447 if self.params.get('ignoreerrors') is True:
3448 self.report_error(e)
3449 return infodict
3450 raise
3451
5bfa4862 3452 if not files_to_delete:
dcf64d43 3453 return infodict
5bfa4862 3454 if self.params.get('keepvideo', False):
3455 for f in files_to_delete:
dcf64d43 3456 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3457 else:
43d7f5a5 3458 self._delete_downloaded_files(
3459 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3460 return infodict
5bfa4862 3461
ed5835b4 3462 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3463 self._forceprint(key, info)
ed5835b4 3464 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3465 info = self.run_pp(pp, info)
ed5835b4 3466 return info
277d6ff5 3467
56d868db 3468 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3469 info = dict(ie_info)
56d868db 3470 info['__files_to_move'] = files_to_move or {}
415f8d51 3471 try:
3472 info = self.run_all_pps(key, info)
3473 except PostProcessingError as err:
3474 msg = f'Preprocessing: {err}'
3475 info.setdefault('__pending_error', msg)
3476 self.report_error(msg, is_error=False)
56d868db 3477 return info, info.pop('__files_to_move', None)
5bfa4862 3478
f46e2f9d 3479 def post_process(self, filename, info, files_to_move=None):
8222d8de 3480 """Run all the postprocessors on the given file."""
8222d8de 3481 info['filepath'] = filename
dcf64d43 3482 info['__files_to_move'] = files_to_move or {}
ed5835b4 3483 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3484 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3485 del info['__files_to_move']
ed5835b4 3486 return self.run_all_pps('after_move', info)
c1c9a79c 3487
5db07df6 3488 def _make_archive_id(self, info_dict):
e9fef7ee
S
3489 video_id = info_dict.get('id')
3490 if not video_id:
3491 return
5db07df6
PH
3492 # Future-proof against any change in case
3493 # and backwards compatibility with prior versions
e9fef7ee 3494 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3495 if extractor is None:
1211bb6d
S
3496 url = str_or_none(info_dict.get('url'))
3497 if not url:
3498 return
e9fef7ee 3499 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3500 for ie_key, ie in self._ies.items():
1211bb6d 3501 if ie.suitable(url):
8b7491c8 3502 extractor = ie_key
e9fef7ee
S
3503 break
3504 else:
3505 return
0647d925 3506 return make_archive_id(extractor, video_id)
5db07df6
PH
3507
3508 def in_download_archive(self, info_dict):
ae103564 3509 if not self.archive:
5db07df6
PH
3510 return False
3511
1e8fe57e 3512 vid_ids = [self._make_archive_id(info_dict)]
c200096c 3513 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
1e8fe57e 3514 return any(id_ in self.archive for id_ in vid_ids)
c1c9a79c
PH
3515
3516 def record_download_archive(self, info_dict):
3517 fn = self.params.get('download_archive')
3518 if fn is None:
3519 return
5db07df6
PH
3520 vid_id = self._make_archive_id(info_dict)
3521 assert vid_id
ae103564 3522
a13e6848 3523 self.write_debug(f'Adding to archive: {vid_id}')
9c935fbc 3524 if is_path_like(fn):
ae103564 3525 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3526 archive_file.write(vid_id + '\n')
a45e8619 3527 self.archive.add(vid_id)
dd82ffea 3528
8c51aa65 3529 @staticmethod
8abeeb94 3530 def format_resolution(format, default='unknown'):
9359f3d4 3531 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3532 return 'audio only'
f49d89ee
PH
3533 if format.get('resolution') is not None:
3534 return format['resolution']
35615307 3535 if format.get('width') and format.get('height'):
ff51ed58 3536 return '%dx%d' % (format['width'], format['height'])
35615307 3537 elif format.get('height'):
ff51ed58 3538 return '%sp' % format['height']
35615307 3539 elif format.get('width'):
ff51ed58 3540 return '%dx?' % format['width']
3541 return default
8c51aa65 3542
8130779d 3543 def _list_format_headers(self, *headers):
3544 if self.params.get('listformats_table', True) is not False:
591bb9d3 3545 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3546 return headers
3547
c57f7757
PH
3548 def _format_note(self, fdict):
3549 res = ''
3550 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3551 res += '(unsupported)'
32f90364
PH
3552 if fdict.get('language'):
3553 if res:
3554 res += ' '
f304da8a 3555 res += '[%s]' % fdict['language']
c57f7757 3556 if fdict.get('format_note') is not None:
f304da8a 3557 if res:
3558 res += ' '
3559 res += fdict['format_note']
c57f7757 3560 if fdict.get('tbr') is not None:
f304da8a 3561 if res:
3562 res += ', '
3563 res += '%4dk' % fdict['tbr']
c57f7757
PH
3564 if fdict.get('container') is not None:
3565 if res:
3566 res += ', '
3567 res += '%s container' % fdict['container']
3089bc74
S
3568 if (fdict.get('vcodec') is not None
3569 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3570 if res:
3571 res += ', '
3572 res += fdict['vcodec']
91c7271a 3573 if fdict.get('vbr') is not None:
c57f7757
PH
3574 res += '@'
3575 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3576 res += 'video@'
3577 if fdict.get('vbr') is not None:
3578 res += '%4dk' % fdict['vbr']
fbb21cf5 3579 if fdict.get('fps') is not None:
5d583bdf
S
3580 if res:
3581 res += ', '
3582 res += '%sfps' % fdict['fps']
c57f7757
PH
3583 if fdict.get('acodec') is not None:
3584 if res:
3585 res += ', '
3586 if fdict['acodec'] == 'none':
3587 res += 'video only'
3588 else:
3589 res += '%-5s' % fdict['acodec']
3590 elif fdict.get('abr') is not None:
3591 if res:
3592 res += ', '
3593 res += 'audio'
3594 if fdict.get('abr') is not None:
3595 res += '@%3dk' % fdict['abr']
3596 if fdict.get('asr') is not None:
3597 res += ' (%5dHz)' % fdict['asr']
3598 if fdict.get('filesize') is not None:
3599 if res:
3600 res += ', '
3601 res += format_bytes(fdict['filesize'])
9732d77e
PH
3602 elif fdict.get('filesize_approx') is not None:
3603 if res:
3604 res += ', '
3605 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3606 return res
91c7271a 3607
aebb4f4b 3608 def _get_formats(self, info_dict):
3609 if info_dict.get('formats') is None:
3610 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3611 return [info_dict]
3612 return []
3613 return info_dict['formats']
b69fd25c 3614
aebb4f4b 3615 def render_formats_table(self, info_dict):
3616 formats = self._get_formats(info_dict)
3617 if not formats:
3618 return
8130779d 3619 if not self.params.get('listformats_table', True) is not False:
76d321f6 3620 table = [
3621 [
3622 format_field(f, 'format_id'),
3623 format_field(f, 'ext'),
3624 self.format_resolution(f),
8130779d 3625 self._format_note(f)
d5d1df8a 3626 ] for f in formats if (f.get('preference') or 0) >= -1000]
8130779d 3627 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3628
d816f61f 3629 def simplified_codec(f, field):
3630 assert field in ('acodec', 'vcodec')
3631 codec = f.get(field, 'unknown')
f5ea4748 3632 if not codec:
3633 return 'unknown'
3634 elif codec != 'none':
d816f61f 3635 return '.'.join(codec.split('.')[:4])
3636
3637 if field == 'vcodec' and f.get('acodec') == 'none':
3638 return 'images'
3639 elif field == 'acodec' and f.get('vcodec') == 'none':
3640 return ''
3641 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3642 self.Styles.SUPPRESS)
3643
591bb9d3 3644 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3645 table = [
3646 [
591bb9d3 3647 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3648 format_field(f, 'ext'),
3649 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
563e0bf8 3650 format_field(f, 'fps', '\t%d', func=round),
8130779d 3651 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
b8ed0f15 3652 format_field(f, 'audio_channels', '\t%s'),
8130779d 3653 delim,
3654 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
563e0bf8 3655 format_field(f, 'tbr', '\t%dk', func=round),
8130779d 3656 shorten_protocol_name(f.get('protocol', '')),
3657 delim,
d816f61f 3658 simplified_codec(f, 'vcodec'),
563e0bf8 3659 format_field(f, 'vbr', '\t%dk', func=round),
d816f61f 3660 simplified_codec(f, 'acodec'),
563e0bf8 3661 format_field(f, 'abr', '\t%dk', func=round),
ae61d108 3662 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
8130779d 3663 join_nonempty(
591bb9d3 3664 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
8130779d 3665 format_field(f, 'language', '[%s]'),
3666 join_nonempty(format_field(f, 'format_note'),
3667 format_field(f, 'container', ignore=(None, f.get('ext'))),
3668 delim=', '),
3669 delim=' '),
3670 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3671 header_line = self._list_format_headers(
b8ed0f15 3672 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
8130779d 3673 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3674
3675 return render_table(
3676 header_line, table, hide_empty=True,
591bb9d3 3677 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3678
3679 def render_thumbnails_table(self, info_dict):
88f23a18 3680 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3681 if not thumbnails:
8130779d 3682 return None
3683 return render_table(
ec11a9f4 3684 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
177662e0 3685 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
2412044c 3686
8130779d 3687 def render_subtitles_table(self, video_id, subtitles):
2412044c 3688 def _row(lang, formats):
49c258e1 3689 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3690 if len(set(names)) == 1:
7aee40c1 3691 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3692 return [lang, ', '.join(names), ', '.join(exts)]
3693
8130779d 3694 if not subtitles:
3695 return None
3696 return render_table(
ec11a9f4 3697 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3698 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3699 hide_empty=True)
3700
3701 def __list_table(self, video_id, name, func, *args):
3702 table = func(*args)
3703 if not table:
3704 self.to_screen(f'{video_id} has no {name}')
3705 return
3706 self.to_screen(f'[info] Available {name} for {video_id}:')
3707 self.to_stdout(table)
3708
3709 def list_formats(self, info_dict):
3710 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3711
3712 def list_thumbnails(self, info_dict):
3713 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3714
3715 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3716 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3717
dca08720
PH
3718 def urlopen(self, req):
3719 """ Start an HTTP download """
f9934b96 3720 if isinstance(req, str):
67dda517 3721 req = sanitized_Request(req)
19a41fc6 3722 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3723
3724 def print_debug_header(self):
3725 if not self.params.get('verbose'):
3726 return
49a57e70 3727
a057779d 3728 from . import _IN_CLI # Must be delayed import
3729
560738f3 3730 # These imports can be slow. So import them only as needed
3731 from .extractor.extractors import _LAZY_LOADER
3732 from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3733
49a57e70 3734 def get_encoding(stream):
2a938746 3735 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3736 if not supports_terminal_sequences(stream):
53973b4d 3737 from .utils import WINDOWS_VT_MODE # Must be imported locally
e3c7d495 3738 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3739 return ret
3740
591bb9d3 3741 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3742 locale.getpreferredencoding(),
3743 sys.getfilesystemencoding(),
591bb9d3 3744 self.get_encoding(),
3745 ', '.join(
64fa820c 3746 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
591bb9d3 3747 if stream is not None and key != 'console')
3748 )
883d4b1e 3749
3750 logger = self.params.get('logger')
3751 if logger:
3752 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3753 write_debug(encoding_str)
3754 else:
96565c7e 3755 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3756 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3757
4c88ff87 3758 source = detect_variant()
70b23409 3759 if VARIANT not in (None, 'pip'):
3760 source += '*'
36eaf303 3761 write_debug(join_nonempty(
b5e7a2e6 3762 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3763 __version__,
36eaf303 3764 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3765 '' if source == 'unknown' else f'({source})',
a057779d 3766 '' if _IN_CLI else 'API',
36eaf303 3767 delim=' '))
497074f0 3768
3769 if not _IN_CLI:
3770 write_debug(f'params: {self.params}')
3771
6e21fdd2 3772 if not _LAZY_LOADER:
3773 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3774 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3775 else:
49a57e70 3776 write_debug('Lazy loading extractors is disabled')
8a82af35 3777 if self.params['compat_opts']:
3778 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
36eaf303 3779
b5e7a2e6 3780 if current_git_head():
3781 write_debug(f'Git HEAD: {current_git_head()}')
b1f94422 3782 write_debug(system_identifier())
d28b5171 3783
8913ef74 3784 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3785 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3786 if ffmpeg_features:
19a03940 3787 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3788
4c83c967 3789 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3790 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3791 exe_str = ', '.join(
2831b468 3792 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3793 ) or 'none'
49a57e70 3794 write_debug('exe versions: %s' % exe_str)
dca08720 3795
1d485a1a 3796 from .compat.compat_utils import get_package_info
9b8ee23b 3797 from .dependencies import available_dependencies
3798
3799 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3800 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3801 })) or 'none'))
2831b468 3802
97ec5bc5 3803 self._setup_opener()
dca08720
PH
3804 proxy_map = {}
3805 for handler in self._opener.handlers:
3806 if hasattr(handler, 'proxies'):
3807 proxy_map.update(handler.proxies)
49a57e70 3808 write_debug(f'Proxy map: {proxy_map}')
dca08720 3809
8e40b9d1
M
3810 for plugin_type, plugins in {'Extractor': plugin_extractors, 'Post-Processor': plugin_postprocessors}.items():
3811 if not plugins:
3812 continue
3813 write_debug(f'{plugin_type} Plugins: %s' % (', '.join(sorted(('%s%s' % (
3814 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3815 for name, klass in plugins.items())))))
3816 plugin_dirs = plugin_directories()
3817 if plugin_dirs:
3818 write_debug(f'Plugin directories: {plugin_dirs}')
3819
49a57e70 3820 # Not implemented
3821 if False and self.params.get('call_home'):
0f06bcd7 3822 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 3823 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 3824 latest_version = self.urlopen(
0f06bcd7 3825 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
3826 if version_tuple(latest_version) > version_tuple(__version__):
3827 self.report_warning(
3828 'You are using an outdated version (newest version: %s)! '
3829 'See https://yt-dl.org/update if you need help updating.' %
3830 latest_version)
3831
e344693b 3832 def _setup_opener(self):
97ec5bc5 3833 if hasattr(self, '_opener'):
3834 return
6ad14cab 3835 timeout_val = self.params.get('socket_timeout')
17bddf3e 3836 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3837
982ee69a 3838 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3839 opts_cookiefile = self.params.get('cookiefile')
3840 opts_proxy = self.params.get('proxy')
3841
982ee69a 3842 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3843
6a3f4c3f 3844 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3845 if opts_proxy is not None:
3846 if opts_proxy == '':
3847 proxies = {}
3848 else:
3849 proxies = {'http': opts_proxy, 'https': opts_proxy}
3850 else:
ac668111 3851 proxies = urllib.request.getproxies()
067aa17e 3852 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3853 if 'http' in proxies and 'https' not in proxies:
3854 proxies['https'] = proxies['http']
91410c9b 3855 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3856
3857 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3858 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3859 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3860 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3861 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3862
3863 # When passing our own FileHandler instance, build_opener won't add the
3864 # default FileHandler and allows us to disable the file protocol, which
3865 # can be used for malicious purposes (see
067aa17e 3866 # https://github.com/ytdl-org/youtube-dl/issues/8227)
ac668111 3867 file_handler = urllib.request.FileHandler()
6240b0a2
JMF
3868
3869 def file_open(*args, **kwargs):
ac668111 3870 raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3871 file_handler.file_open = file_open
3872
ac668111 3873 opener = urllib.request.build_opener(
fca6dba8 3874 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3875
dca08720
PH
3876 # Delete the default user-agent header, which would otherwise apply in
3877 # cases where our custom HTTP handler doesn't come into play
067aa17e 3878 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3879 opener.addheaders = []
3880 self._opener = opener
62fec3b2
PH
3881
3882 def encode(self, s):
3883 if isinstance(s, bytes):
3884 return s # Already encoded
3885
3886 try:
3887 return s.encode(self.get_encoding())
3888 except UnicodeEncodeError as err:
3889 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3890 raise
3891
3892 def get_encoding(self):
3893 encoding = self.params.get('encoding')
3894 if encoding is None:
3895 encoding = preferredencoding()
3896 return encoding
ec82d85a 3897
e08a85d8 3898 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3899 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3900 if overwrite is None:
3901 overwrite = self.params.get('overwrites', True)
80c03fa9 3902 if not self.params.get('writeinfojson'):
3903 return False
3904 elif not infofn:
3905 self.write_debug(f'Skipping writing {label} infojson')
3906 return False
3907 elif not self._ensure_dir_exists(infofn):
3908 return None
e08a85d8 3909 elif not overwrite and os.path.exists(infofn):
80c03fa9 3910 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3911 return 'exists'
3912
3913 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3914 try:
3915 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3916 return True
86e5f3ed 3917 except OSError:
cb96c5be 3918 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3919 return None
80c03fa9 3920
3921 def _write_description(self, label, ie_result, descfn):
3922 ''' Write description and returns True = written, False = skip, None = error '''
3923 if not self.params.get('writedescription'):
3924 return False
3925 elif not descfn:
3926 self.write_debug(f'Skipping writing {label} description')
3927 return False
3928 elif not self._ensure_dir_exists(descfn):
3929 return None
3930 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3931 self.to_screen(f'[info] {label.title()} description is already present')
3932 elif ie_result.get('description') is None:
3933 self.report_warning(f'There\'s no {label} description to write')
3934 return False
3935 else:
3936 try:
3937 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 3938 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 3939 descfile.write(ie_result['description'])
86e5f3ed 3940 except OSError:
80c03fa9 3941 self.report_error(f'Cannot write {label} description file {descfn}')
3942 return None
3943 return True
3944
3945 def _write_subtitles(self, info_dict, filename):
3946 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3947 ret = []
3948 subtitles = info_dict.get('requested_subtitles')
3949 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3950 # subtitles download errors are already managed as troubles in relevant IE
3951 # that way it will silently go on when used with unsupporting IE
3952 return ret
3953
3954 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3955 if not sub_filename_base:
3956 self.to_screen('[info] Skipping writing video subtitles')
3957 return ret
3958 for sub_lang, sub_info in subtitles.items():
3959 sub_format = sub_info['ext']
3960 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3961 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3962 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3963 if existing_sub:
80c03fa9 3964 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3965 sub_info['filepath'] = existing_sub
3966 ret.append((existing_sub, sub_filename_final))
80c03fa9 3967 continue
3968
3969 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3970 if sub_info.get('data') is not None:
3971 try:
3972 # Use newline='' to prevent conversion of newline characters
3973 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 3974 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 3975 subfile.write(sub_info['data'])
3976 sub_info['filepath'] = sub_filename
3977 ret.append((sub_filename, sub_filename_final))
3978 continue
86e5f3ed 3979 except OSError:
80c03fa9 3980 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3981 return None
3982
3983 try:
3984 sub_copy = sub_info.copy()
3985 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3986 self.dl(sub_filename, sub_copy, subtitle=True)
3987 sub_info['filepath'] = sub_filename
3988 ret.append((sub_filename, sub_filename_final))
6020e05d 3989 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3990 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3991 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3992 if not self.params.get('ignoreerrors'):
3993 self.report_error(msg)
3994 raise DownloadError(msg)
3995 self.report_warning(msg)
519804a9 3996 return ret
80c03fa9 3997
3998 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3999 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 4000 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 4001 thumbnails, ret = [], []
6c4fd172 4002 if write_all or self.params.get('writethumbnail', False):
0202b52a 4003 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 4004 multiple = write_all and len(thumbnails) > 1
ec82d85a 4005
80c03fa9 4006 if thumb_filename_base is None:
4007 thumb_filename_base = filename
4008 if thumbnails and not thumb_filename_base:
4009 self.write_debug(f'Skipping writing {label} thumbnail')
4010 return ret
4011
dd0228ce 4012 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 4013 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 4014 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 4015 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4016 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 4017
e04938ab 4018 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4019 if existing_thumb:
aa9369a2 4020 self.to_screen('[info] %s is already present' % (
4021 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 4022 t['filepath'] = existing_thumb
4023 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 4024 else:
80c03fa9 4025 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 4026 try:
297e9952 4027 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 4028 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 4029 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 4030 shutil.copyfileobj(uf, thumbf)
80c03fa9 4031 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 4032 t['filepath'] = thumb_filename
3158150c 4033 except network_exceptions as err:
dd0228ce 4034 thumbnails.pop(idx)
80c03fa9 4035 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 4036 if ret and not write_all:
4037 break
0202b52a 4038 return ret