]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[cleanup] Misc
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
26e63931 1import collections
31bd3925 2import contextlib
9d2ecdbc 3import datetime
c1c9a79c 4import errno
31bd3925 5import fileinput
b5ae35ee 6import functools
8222d8de 7import io
b82f815f 8import itertools
8694c600 9import json
62fec3b2 10import locale
083c9df9 11import operator
8222d8de 12import os
f8271158 13import random
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de 17import sys
21cd8fae 18import tempfile
8222d8de 19import time
67134eab 20import tokenize
8222d8de 21import traceback
524e2e4f 22import unicodedata
f9934b96 23import urllib.request
961ea474
S
24from string import ascii_letters
25
f8271158 26from .cache import Cache
14f25df2 27from .compat import compat_os_name, compat_shlex_quote
982ee69a 28from .cookies import load_cookies
f8271158 29from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
30from .downloader.rtmp import rtmpdump_version
f8271158 31from .extractor import gen_extractor_classes, get_info_extractor
fe7866d0 32from .extractor.common import UnsupportedURLIE
f8271158 33from .extractor.openload import PhantomJSwrapper
34from .minicurses import format_text
35from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
36from .postprocessor import (
37 EmbedThumbnailPP,
38 FFmpegFixupDuplicateMoovPP,
39 FFmpegFixupDurationPP,
40 FFmpegFixupM3u8PP,
41 FFmpegFixupM4aPP,
42 FFmpegFixupStretchedPP,
43 FFmpegFixupTimestampPP,
44 FFmpegMergerPP,
45 FFmpegPostProcessor,
ca9def71 46 FFmpegVideoConvertorPP,
f8271158 47 MoveFilesAfterDownloadPP,
48 get_postprocessor,
49)
ca9def71 50from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
b5e7a2e6 51from .update import REPOSITORY, current_git_head, detect_variant
8c25f81b 52from .utils import (
f8271158 53 DEFAULT_OUTTMPL,
7b2c3f47 54 IDENTITY,
f8271158 55 LINK_TEMPLATES,
8dc59305 56 MEDIA_EXTENSIONS,
f8271158 57 NO_DEFAULT,
1d485a1a 58 NUMBER_RE,
f8271158 59 OUTTMPL_TYPES,
60 POSTPROCESS_WHEN,
61 STR_FORMAT_RE_TMPL,
62 STR_FORMAT_TYPES,
63 ContentTooShortError,
64 DateRange,
65 DownloadCancelled,
66 DownloadError,
67 EntryNotInPlaylist,
68 ExistingVideoReached,
69 ExtractorError,
784320c9 70 FormatSorter,
f8271158 71 GeoRestrictedError,
72 HEADRequest,
f8271158 73 ISO3166Utils,
74 LazyList,
75 MaxDownloadsReached,
19a03940 76 Namespace,
f8271158 77 PagedList,
78 PerRequestProxyHandler,
7e88d7d7 79 PlaylistEntries,
f8271158 80 Popen,
81 PostProcessingError,
82 ReExtractInfo,
83 RejectedVideoReached,
84 SameFileError,
85 UnavailableVideoError,
693f0600 86 UserNotLive,
f8271158 87 YoutubeDLCookieProcessor,
88 YoutubeDLHandler,
89 YoutubeDLRedirectHandler,
eedb7ba5
S
90 age_restricted,
91 args_to_str,
cb794ee0 92 bug_reports_message,
ce02ed60 93 date_from_str,
da4db748 94 deprecation_warning,
ce02ed60 95 determine_ext,
b5559424 96 determine_protocol,
c0384f22 97 encode_compat_str,
ce02ed60 98 encodeFilename,
a06916d9 99 error_to_compat_str,
47cdc68e 100 escapeHTML,
590bc6f6 101 expand_path,
90137ca4 102 filter_dict,
e29663c6 103 float_or_none,
02dbf93f 104 format_bytes,
e0fd9573 105 format_decimal_suffix,
f8271158 106 format_field,
525ef922 107 formatSeconds,
fc61aff4 108 get_compatible_ext,
0bb322b9 109 get_domain,
c9969434 110 int_or_none,
732044af 111 iri_to_uri,
941e881e 112 is_path_like,
34921b43 113 join_nonempty,
ce02ed60 114 locked_file,
0647d925 115 make_archive_id,
0202b52a 116 make_dir,
dca08720 117 make_HTTPS_handler,
8b7539d2 118 merge_headers,
3158150c 119 network_exceptions,
ec11a9f4 120 number_of_digits,
cd6fc19e 121 orderedSet,
5314b521 122 orderedSet_from_options,
083c9df9 123 parse_filesize,
ce02ed60 124 preferredencoding,
eedb7ba5 125 prepend_extension,
51fb4995 126 register_socks_protocols,
3efb96a6 127 remove_terminal_sequences,
cfb56d1a 128 render_table,
eedb7ba5 129 replace_extension,
ce02ed60 130 sanitize_filename,
1bb5c511 131 sanitize_path,
dcf77cf1 132 sanitize_url,
67dda517 133 sanitized_Request,
e5660ee6 134 std_headers,
1211bb6d 135 str_or_none,
e29663c6 136 strftime_or_none,
ce02ed60 137 subtitles_filename,
819e0531 138 supports_terminal_sequences,
b1f94422 139 system_identifier,
f2ebc5c7 140 timetuple_from_msec,
732044af 141 to_high_limit_path,
324ad820 142 traverse_obj,
fc61aff4 143 try_call,
6033d980 144 try_get,
29eb5174 145 url_basename,
7d1eb38a 146 variadic,
58b1f00d 147 version_tuple,
53973b4d 148 windows_enable_vt_mode,
ce02ed60
PH
149 write_json_file,
150 write_string,
4f026faf 151)
70b23409 152from .version import RELEASE_GIT_HEAD, VARIANT, __version__
8222d8de 153
e9c0cdd3
YCH
154if compat_os_name == 'nt':
155 import ctypes
156
2459b6e1 157
86e5f3ed 158class YoutubeDL:
8222d8de
JMF
159 """YoutubeDL class.
160
161 YoutubeDL objects are the ones responsible of downloading the
162 actual video file and writing it to disk if the user has requested
163 it, among some other tasks. In most cases there should be one per
164 program. As, given a video URL, the downloader doesn't know how to
165 extract all the needed information, task that InfoExtractors do, it
166 has to pass the URL to one of them.
167
168 For this, YoutubeDL objects have a method that allows
169 InfoExtractors to be registered in a given order. When it is passed
170 a URL, the YoutubeDL object handles it to the first InfoExtractor it
171 finds that reports being able to handle it. The InfoExtractor extracts
172 all the information about the video or videos the URL refers to, and
173 YoutubeDL process the extracted information, possibly using a File
174 Downloader to download the video.
175
176 YoutubeDL objects accept a lot of parameters. In order not to saturate
177 the object constructor with arguments, it receives a dictionary of
178 options instead. These options are available through the params
179 attribute for the InfoExtractors to use. The YoutubeDL also
180 registers itself as the downloader in charge for the InfoExtractors
181 that are added to it, so this is a "mutual registration".
182
183 Available options:
184
185 username: Username for authentication purposes.
186 password: Password for authentication purposes.
180940e0 187 videopassword: Password for accessing a video.
1da50aa3
S
188 ap_mso: Adobe Pass multiple-system operator identifier.
189 ap_username: Multiple-system operator account username.
190 ap_password: Multiple-system operator account password.
8222d8de
JMF
191 usenetrc: Use netrc for authentication instead.
192 verbose: Print additional info to stdout.
193 quiet: Do not print messages to stdout.
ad8915b7 194 no_warnings: Do not print out anything for warnings.
bb66c247 195 forceprint: A dict with keys WHEN mapped to a list of templates to
196 print to stdout. The allowed keys are video or any of the
197 items in utils.POSTPROCESS_WHEN.
ca30f449 198 For compatibility, a single list is also accepted
bb66c247 199 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
200 a list of tuples with (template, filename)
8694c600 201 forcejson: Force printing info_dict as JSON.
63e0be34
PH
202 dump_single_json: Force printing the info_dict of the whole playlist
203 (or video) as a single JSON line.
c25228e5 204 force_write_download_archive: Force writing download archive regardless
205 of 'skip_download' or 'simulate'.
b7b04c78 206 simulate: Do not download the video files. If unset (or None),
207 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 208 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 209 You can also pass a function. The function takes 'ctx' as
210 argument and returns the formats to download.
211 See "build_format_selector" for an implementation
63ad4d43 212 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 213 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
214 extracting metadata even if the video is not actually
215 available for download (experimental)
0930b11f 216 format_sort: A list of fields by which to sort the video formats.
217 See "Sorting Formats" for more details.
c25228e5 218 format_sort_force: Force the given format_sort. see "Sorting Formats"
219 for more details.
08d30158 220 prefer_free_formats: Whether to prefer video formats with free containers
221 over non-free ones of same quality.
c25228e5 222 allow_multiple_video_streams: Allow multiple video streams to be merged
223 into a single file
224 allow_multiple_audio_streams: Allow multiple audio streams to be merged
225 into a single file
0ba692ac 226 check_formats Whether to test if the formats are downloadable.
9f1a1c36 227 Can be True (check all), False (check none),
228 'selected' (check selected formats),
0ba692ac 229 or None (check only if requested by extractor)
4524baf0 230 paths: Dictionary of output paths. The allowed keys are 'home'
231 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 232 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 233 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 234 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
235 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
236 restrictfilenames: Do not allow "&" and spaces in file names
237 trim_file_name: Limit length of filename (extension excluded)
4524baf0 238 windowsfilenames: Force the filenames to be windows compatible
b1940459 239 ignoreerrors: Do not stop on download/postprocessing errors.
240 Can be 'only_download' to ignore only download errors.
241 Default is 'only_download' for CLI, but False for API
26e2805c 242 skip_playlist_after_errors: Number of allowed failures until the rest of
243 the playlist is skipped
fe7866d0 244 allowed_extractors: List of regexes to match against extractor names that are allowed
0c3d0f51 245 overwrites: Overwrite all video and metadata files if True,
246 overwrite only non-video files if None
247 and don't overwrite any file if False
34488702 248 For compatibility with youtube-dl,
249 "nooverwrites" may also be used instead
c14e88f0 250 playlist_items: Specific indices of playlist to download.
75822ca7 251 playlistrandom: Download playlist items in random order.
7e9a6125 252 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
253 matchtitle: Download only matching titles.
254 rejecttitle: Reject downloads for matching titles.
8bf9319e 255 logger: Log messages to a logging.Logger instance.
17ffed18 256 logtostderr: Print everything to stderr instead of stdout.
257 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
258 writedescription: Write the video description to a .description file
259 writeinfojson: Write the video description to a .info.json file
75d43ca0 260 clean_infojson: Remove private fields from the infojson
34488702 261 getcomments: Extract video comments. This will not be written to disk
06167fbb 262 unless writeinfojson is also given
1fb07d10 263 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 264 writethumbnail: Write the thumbnail image to a file
c25228e5 265 allow_playlist_files: Whether to write playlists' description, infojson etc
266 also to disk when using the 'write*' options
ec82d85a 267 write_all_thumbnails: Write all thumbnail formats to files
732044af 268 writelink: Write an internet shortcut file, depending on the
269 current platform (.url/.webloc/.desktop)
270 writeurllink: Write a Windows internet shortcut file (.url)
271 writewebloclink: Write a macOS internet shortcut file (.webloc)
272 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 273 writesubtitles: Write the video subtitles to a file
741dd8ea 274 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 275 listsubtitles: Lists all available subtitles for the video
a504ced0 276 subtitlesformat: The format code for subtitles
c32b0aab 277 subtitleslangs: List of languages of the subtitles to download (can be regex).
278 The list may contain "all" to refer to all the available
279 subtitles. The language can be prefixed with a "-" to
62b58c09 280 exclude it from the requested languages, e.g. ['all', '-live_chat']
8222d8de
JMF
281 keepvideo: Keep the video file after post-processing
282 daterange: A DateRange object, download only if the upload_date is in the range.
283 skip_download: Skip the actual download of the video file
c35f9e72 284 cachedir: Location of the cache files in the filesystem.
a0e07d31 285 False to disable filesystem cache.
47192f92 286 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
287 age_limit: An integer representing the user's age in years.
288 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
289 min_views: An integer representing the minimum view count the video
290 must have in order to not be skipped.
291 Videos without view count information are always
292 downloaded. None for no limit.
293 max_views: An integer representing the maximum view count.
294 Videos that are more popular than that are not
295 downloaded.
296 Videos without view count information are always
297 downloaded. None for no limit.
ae103564 298 download_archive: A set, or the name of a file where all downloads are recorded.
299 Videos already present in the file are not downloaded again.
8a51f564 300 break_on_existing: Stop the download process after attempting to download a
301 file that is in the archive.
302 break_on_reject: Stop the download process when encountering a video that
303 has been filtered out.
b222c271 304 break_per_url: Whether break_on_reject and break_on_existing
305 should act on each input URL as opposed to for the entire queue
d76fa1f3 306 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8 307 cookiesfrombrowser: A tuple containing the name of the browser, the profile
9bd13fe5 308 name/path from where cookies are loaded, the name of the keyring,
309 and the container name, e.g. ('chrome', ) or
310 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
f81c62a6 311 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
312 support RFC 5746 secure renegotiation
f59f5ef8 313 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 314 client_certificate: Path to client certificate file in PEM format. May include the private key
315 client_certificate_key: Path to private key file for client certificate
316 client_certificate_password: Password for client certificate private key, if encrypted.
317 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0 318 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
c6e07cf1 319 (Only supported by some extractors)
8b7539d2 320 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 321 proxy: URL of the proxy server to use
38cce791 322 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 323 on geo-restricted sites.
e344693b 324 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
325 bidi_workaround: Work around buggy terminals without bidirectional text
326 support, using fridibi
a0ddb8a2 327 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
328 default_search: Prepend this string if an input url is not valid.
329 'auto' for elaborate guessing
62fec3b2 330 encoding: Use this encoding instead of the system-specified.
134c913c 331 extract_flat: Whether to resolve and process url_results further
332 * False: Always process (default)
333 * True: Never process
334 * 'in_playlist': Do not process inside playlist/multi_video
335 * 'discard': Always process, but don't return the result
336 from inside playlist/multi_video
337 * 'discard_in_playlist': Same as "discard", but only for
338 playlists (not multi_video)
f2ebc5c7 339 wait_for_video: If given, wait for scheduled streams to become available.
340 The value should be a tuple containing the range
341 (min_secs, max_secs) to wait between retries
4f026faf 342 postprocessors: A list of dictionaries, each with an entry
71b640cc 343 * key: The name of the postprocessor. See
7a5c1cfe 344 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 345 * when: When to run the postprocessor. Allowed values are
346 the entries of utils.POSTPROCESS_WHEN
56d868db 347 Assumed to be 'post_process' if not given
71b640cc
PH
348 progress_hooks: A list of functions that get called on download
349 progress, with a dictionary with the entries
5cda4eda 350 * status: One of "downloading", "error", or "finished".
ee69b99a 351 Check this first and ignore unknown values.
3ba7740d 352 * info_dict: The extracted info_dict
71b640cc 353
5cda4eda 354 If status is one of "downloading", or "finished", the
ee69b99a
PH
355 following properties may also be present:
356 * filename: The final filename (always present)
5cda4eda 357 * tmpfilename: The filename we're currently writing to
71b640cc
PH
358 * downloaded_bytes: Bytes on disk
359 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
360 * total_bytes_estimate: Guess of the eventual file size,
361 None if unavailable.
362 * elapsed: The number of seconds since download started.
71b640cc
PH
363 * eta: The estimated time in seconds, None if unknown
364 * speed: The download speed in bytes/second, None if
365 unknown
5cda4eda
PH
366 * fragment_index: The counter of the currently
367 downloaded video fragment.
368 * fragment_count: The number of fragments (= individual
369 files that will be merged)
71b640cc
PH
370
371 Progress hooks are guaranteed to be called at least once
372 (with status "finished") if the download is successful.
819e0531 373 postprocessor_hooks: A list of functions that get called on postprocessing
374 progress, with a dictionary with the entries
375 * status: One of "started", "processing", or "finished".
376 Check this first and ignore unknown values.
377 * postprocessor: Name of the postprocessor
378 * info_dict: The extracted info_dict
379
380 Progress hooks are guaranteed to be called at least twice
381 (with status "started" and "finished") if the processing is successful.
fc61aff4 382 merge_output_format: "/" separated list of extensions to use when merging formats.
6b591b29 383 final_ext: Expected final extension; used to detect when the file was
59a7a13e 384 already downloaded and converted
6271f1ca
PH
385 fixup: Automatically correct known faults of the file.
386 One of:
387 - "never": do nothing
388 - "warn": only emit a warning
389 - "detect_or_warn": check whether we can do anything
62cd676c 390 about it, warn otherwise (default)
504f20dd 391 source_address: Client-side IP address to bind to.
1cf376f5 392 sleep_interval_requests: Number of seconds to sleep between requests
393 during extraction
7aa589a5
S
394 sleep_interval: Number of seconds to sleep before each download when
395 used alone or a lower bound of a range for randomized
396 sleep before each download (minimum possible number
397 of seconds to sleep) when used along with
398 max_sleep_interval.
399 max_sleep_interval:Upper bound of a range for randomized sleep before each
400 download (maximum possible number of seconds to sleep).
401 Must only be used along with sleep_interval.
402 Actual sleep time will be a random float from range
403 [sleep_interval; max_sleep_interval].
1cf376f5 404 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
405 listformats: Print an overview of available video formats and exit.
406 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 407 match_filter: A function that gets called for every video with the signature
408 (info_dict, *, incomplete: bool) -> Optional[str]
409 For backward compatibility with youtube-dl, the signature
410 (info_dict) -> Optional[str] is also allowed.
411 - If it returns a message, the video is ignored.
412 - If it returns None, the video is downloaded.
413 - If it returns utils.NO_DEFAULT, the user is interactively
414 asked whether to download the video.
347de493 415 match_filter_func in utils.py is one example for this.
7e5db8c9 416 no_color: Do not emit color codes in output.
0a840f58 417 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 418 HTTP header
0a840f58 419 geo_bypass_country:
773f291d
S
420 Two-letter ISO 3166-2 country code that will be used for
421 explicit geographic restriction bypassing via faking
504f20dd 422 X-Forwarded-For HTTP header
5f95927a
S
423 geo_bypass_ip_block:
424 IP range in CIDR notation that will be used similarly to
504f20dd 425 geo_bypass_country
52a8a1e1 426 external_downloader: A dictionary of protocol keys and the executable of the
427 external downloader to use for it. The allowed protocols
428 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
429 Set the value to 'native' to use the native downloader
53ed7066 430 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 431 The following options do not work when used through the API:
b5ae35ee 432 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 433 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 434 Refer __init__.py for their implementation
819e0531 435 progress_template: Dictionary of templates for progress outputs.
436 Allowed keys are 'download', 'postprocess',
437 'download-title' (console title) and 'postprocess-title'.
438 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 439 retry_sleep_functions: Dictionary of functions that takes the number of attempts
440 as argument and returns the time to sleep in seconds.
441 Allowed keys are 'http', 'fragment', 'file_access'
0f446365
SW
442 download_ranges: A callback function that gets called for every video with
443 the signature (info_dict, ydl) -> Iterable[Section].
444 Only the returned sections will be downloaded.
445 Each Section is a dict with the following keys:
5ec1b6b7 446 * start_time: Start time of the section in seconds
447 * end_time: End time of the section in seconds
448 * title: Section title (Optional)
449 * index: Section number (Optional)
0f446365 450 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
a7dc6a89 451 noprogress: Do not print the progress bar
a831c2ea 452 live_from_start: Whether to download livestreams videos from the start
fe7e0c98 453
8222d8de 454 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 455 the downloader (see yt_dlp/downloader/common.py):
51d9739f 456 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654 457 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
a7dc6a89 458 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 459 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
460
461 The following options are used by the post processors:
c0b7d117
S
462 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
463 to the binary or its containing directory.
43820c03 464 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 465 and a list of additional command-line arguments for the
466 postprocessor/executable. The dict can also have "PP+EXE" keys
467 which are used when the given exe is used by the given PP.
468 Use 'default' as the name for arguments to passed to all PP
469 For compatibility with youtube-dl, a single list of args
470 can also be used
e409895f 471
472 The following options are used by the extractors:
62bff2c1 473 extractor_retries: Number of times to retry for known errors
474 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 475 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 476 discontinuities such as ad breaks (default: False)
5d3a0e79 477 extractor_args: A dictionary of arguments to be passed to the extractors.
478 See "EXTRACTOR ARGUMENTS" for details.
62b58c09 479 E.g. {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 480 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 481
482 The following options are deprecated and may be removed in the future:
483
fe7866d0 484 force_generic_extractor: Force downloader to use the generic extractor
485 - Use allowed_extractors = ['generic', 'default']
7e9a6125 486 playliststart: - Use playlist_items
487 Playlist item to start at.
488 playlistend: - Use playlist_items
489 Playlist item to end at.
490 playlistreverse: - Use playlist_items
491 Download playlist items in reverse order.
1890fc63 492 forceurl: - Use forceprint
493 Force printing final URL.
494 forcetitle: - Use forceprint
495 Force printing title.
496 forceid: - Use forceprint
497 Force printing ID.
498 forcethumbnail: - Use forceprint
499 Force printing thumbnail URL.
500 forcedescription: - Use forceprint
501 Force printing description.
502 forcefilename: - Use forceprint
503 Force printing final filename.
504 forceduration: - Use forceprint
505 Force printing duration.
506 allsubtitles: - Use subtitleslangs = ['all']
507 Downloads all the subtitles of the video
508 (requires writesubtitles or writeautomaticsub)
509 include_ads: - Doesn't work
510 Download ads as well
511 call_home: - Not implemented
512 Boolean, true iff we are allowed to contact the
513 yt-dlp servers for debugging.
514 post_hooks: - Register a custom postprocessor
515 A list of functions that get called as the final step
516 for each video file, after all postprocessors have been
517 called. The filename will be passed as the only argument.
518 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
519 Use the native HLS downloader instead of ffmpeg/avconv
520 if True, otherwise use ffmpeg/avconv if False, otherwise
521 use downloader suggested by extractor if None.
522 prefer_ffmpeg: - avconv support is deprecated
523 If False, use avconv instead of ffmpeg if both are available,
524 otherwise prefer ffmpeg.
525 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 526 If True (default), DASH manifests and related
62bff2c1 527 data will be downloaded and processed by extractor.
528 You can reduce network I/O by disabling it if you don't
529 care about DASH. (only for youtube)
1890fc63 530 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 531 If True (default), HLS manifests and related
62bff2c1 532 data will be downloaded and processed by extractor.
533 You can reduce network I/O by disabling it if you don't
534 care about HLS. (only for youtube)
8222d8de
JMF
535 """
536
86e5f3ed 537 _NUMERIC_FIELDS = {
b8ed0f15 538 'width', 'height', 'asr', 'audio_channels', 'fps',
539 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
e6f21b3d 540 'timestamp', 'release_timestamp',
c9969434
S
541 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
542 'average_rating', 'comment_count', 'age_limit',
543 'start_time', 'end_time',
544 'chapter_number', 'season_number', 'episode_number',
545 'track_number', 'disc_number', 'release_year',
86e5f3ed 546 }
c9969434 547
6db9c4d5 548 _format_fields = {
549 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 550 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
105bfd90 551 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
d5d1df8a 552 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
6db9c4d5 553 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
554 'preference', 'language', 'language_preference', 'quality', 'source_preference',
555 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
556 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
557 }
48ee10ee 558 _format_selection_exts = {
8dc59305 559 'audio': set(MEDIA_EXTENSIONS.common_audio),
560 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
561 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
48ee10ee 562 }
563
3511266b 564 def __init__(self, params=None, auto_init=True):
883d4b1e 565 """Create a FileDownloader object with the given options.
566 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 567 Set to 'no_verbose_header' to not print the header
883d4b1e 568 """
e9f9a10f
JMF
569 if params is None:
570 params = {}
592b7485 571 self.params = params
8b7491c8 572 self._ies = {}
56c73665 573 self._ies_instances = {}
1e43a6f7 574 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 575 self._printed_messages = set()
1cf376f5 576 self._first_webpage_request = True
ab8e5e51 577 self._post_hooks = []
933605d7 578 self._progress_hooks = []
819e0531 579 self._postprocessor_hooks = []
8222d8de
JMF
580 self._download_retcode = 0
581 self._num_downloads = 0
9c906919 582 self._num_videos = 0
592b7485 583 self._playlist_level = 0
584 self._playlist_urls = set()
a0e07d31 585 self.cache = Cache(self)
34308b30 586
819e0531 587 windows_enable_vt_mode()
591bb9d3 588 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
589 self._out_files = Namespace(
590 out=stdout,
591 error=sys.stderr,
592 screen=sys.stderr if self.params.get('quiet') else stdout,
593 console=None if compat_os_name == 'nt' else next(
cf4f42cb 594 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 595 )
596 self._allow_colors = Namespace(**{
597 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
64fa820c 598 for type_, stream in self._out_files.items_ if type_ != 'console'
591bb9d3 599 })
819e0531 600
6929b41a 601 # The code is left like this to be reused for future deprecations
602 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
eff42759 603 current_version = sys.version_info[:2]
604 if current_version < MIN_RECOMMENDED:
9d339c41 605 msg = ('Support for Python version %d.%d has been deprecated. '
24093d52 606 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
c6e07cf1 607 '\n You will no longer receive updates on this version')
eff42759 608 if current_version < MIN_SUPPORTED:
609 msg = 'Python version %d.%d is no longer supported'
610 self.deprecation_warning(
611 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
a61f4b28 612
88acdbc2 613 if self.params.get('allow_unplayable_formats'):
614 self.report_warning(
ec11a9f4 615 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 616 'This is a developer option intended for debugging. \n'
617 ' If you experience any issues while using this option, '
ec11a9f4 618 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 619
497074f0 620 if self.params.get('bidi_workaround', False):
621 try:
622 import pty
623 master, slave = pty.openpty()
624 width = shutil.get_terminal_size().columns
625 width_args = [] if width is None else ['-w', str(width)]
626 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
627 try:
628 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
629 except OSError:
630 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
631 self._output_channel = os.fdopen(master, 'rb')
632 except OSError as ose:
633 if ose.errno == errno.ENOENT:
634 self.report_warning(
635 'Could not find fribidi executable, ignoring --bidi-workaround. '
636 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
637 else:
638 raise
639
640 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
641 if auto_init and auto_init != 'no_verbose_header':
642 self.print_debug_header()
643
be5df5ee
S
644 def check_deprecated(param, option, suggestion):
645 if self.params.get(param) is not None:
86e5f3ed 646 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
647 return True
648 return False
649
650 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
651 if self.params.get('geo_verification_proxy') is None:
652 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
653
0d1bb027 654 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
655 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 656 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 657
49a57e70 658 for msg in self.params.get('_warnings', []):
0d1bb027 659 self.report_warning(msg)
ee8dd27a 660 for msg in self.params.get('_deprecation_warnings', []):
da4db748 661 self.deprecated_feature(msg)
0d1bb027 662
8a82af35 663 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 664 self.params['listformats_table'] = False
665
b5ae35ee 666 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 667 # nooverwrites was unnecessarily changed to overwrites
668 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
669 # This ensures compatibility with both keys
670 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 671 elif self.params.get('overwrites') is None:
672 self.params.pop('overwrites', None)
b868936c 673 else:
674 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 675
e4221b70 676 if self.params.get('simulate') is None and any((
677 self.params.get('list_thumbnails'),
678 self.params.get('listformats'),
679 self.params.get('listsubtitles'),
680 )):
681 self.params['simulate'] = 'list_only'
682
455a15e2 683 self.params.setdefault('forceprint', {})
684 self.params.setdefault('print_to_file', {})
bb66c247 685
686 # Compatibility with older syntax
ca30f449 687 if not isinstance(params['forceprint'], dict):
455a15e2 688 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 689
97ec5bc5 690 if auto_init:
97ec5bc5 691 self.add_default_info_extractors()
692
3089bc74
S
693 if (sys.platform != 'win32'
694 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 695 and not self.params.get('restrictfilenames', False)):
e9137224 696 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 697 self.report_warning(
6febd1c1 698 'Assuming --restrict-filenames since file system encoding '
1b725173 699 'cannot encode all characters. '
6febd1c1 700 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 701 self.params['restrictfilenames'] = True
34308b30 702
bf1824b3 703 self._parse_outtmpl()
486dd09e 704
187986a8 705 # Creating format selector here allows us to catch syntax errors before the extraction
706 self.format_selector = (
fa9f30b8 707 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 708 else self.params['format'] if callable(self.params['format'])
187986a8 709 else self.build_format_selector(self.params['format']))
710
8b7539d2 711 # Set http_headers defaults according to std_headers
712 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
713
013b50b7 714 hooks = {
715 'post_hooks': self.add_post_hook,
716 'progress_hooks': self.add_progress_hook,
717 'postprocessor_hooks': self.add_postprocessor_hook,
718 }
719 for opt, fn in hooks.items():
720 for ph in self.params.get(opt, []):
721 fn(ph)
71b640cc 722
5bfc8bee 723 for pp_def_raw in self.params.get('postprocessors', []):
724 pp_def = dict(pp_def_raw)
725 when = pp_def.pop('when', 'post_process')
726 self.add_post_processor(
f9934b96 727 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 728 when=when)
729
97ec5bc5 730 self._setup_opener()
51fb4995
YCH
731 register_socks_protocols()
732
ed39cac5 733 def preload_download_archive(fn):
734 """Preload the archive, if any is specified"""
ae103564 735 archive = set()
ed39cac5 736 if fn is None:
ae103564 737 return archive
941e881e 738 elif not is_path_like(fn):
ae103564 739 return fn
740
49a57e70 741 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 742 try:
743 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
744 for line in archive_file:
ae103564 745 archive.add(line.strip())
86e5f3ed 746 except OSError as ioe:
ed39cac5 747 if ioe.errno != errno.ENOENT:
748 raise
ae103564 749 return archive
ed39cac5 750
ae103564 751 self.archive = preload_download_archive(self.params.get('download_archive'))
ed39cac5 752
7d4111ed
PH
753 def warn_if_short_id(self, argv):
754 # short YouTube ID starting with dash?
755 idxs = [
756 i for i, a in enumerate(argv)
757 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
758 if idxs:
759 correct_argv = (
7a5c1cfe 760 ['yt-dlp']
3089bc74
S
761 + [a for i, a in enumerate(argv) if i not in idxs]
762 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
763 )
764 self.report_warning(
765 'Long argument string detected. '
49a57e70 766 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
767 args_to_str(correct_argv))
768
8222d8de
JMF
769 def add_info_extractor(self, ie):
770 """Add an InfoExtractor object to the end of the list."""
8b7491c8 771 ie_key = ie.ie_key()
772 self._ies[ie_key] = ie
e52d7f85 773 if not isinstance(ie, type):
8b7491c8 774 self._ies_instances[ie_key] = ie
e52d7f85 775 ie.set_downloader(self)
8222d8de 776
56c73665
JMF
777 def get_info_extractor(self, ie_key):
778 """
779 Get an instance of an IE with name ie_key, it will try to get one from
780 the _ies list, if there's no instance it will create a new one and add
781 it to the extractor list.
782 """
783 ie = self._ies_instances.get(ie_key)
784 if ie is None:
785 ie = get_info_extractor(ie_key)()
786 self.add_info_extractor(ie)
787 return ie
788
023fa8c4
JMF
789 def add_default_info_extractors(self):
790 """
791 Add the InfoExtractors returned by gen_extractors to the end of the list
792 """
fe7866d0 793 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
794 all_ies['end'] = UnsupportedURLIE()
795 try:
796 ie_names = orderedSet_from_options(
797 self.params.get('allowed_extractors', ['default']), {
798 'all': list(all_ies),
799 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
800 }, use_regex=True)
801 except re.error as e:
802 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
803 for name in ie_names:
804 self.add_info_extractor(all_ies[name])
805 self.write_debug(f'Loaded {len(ie_names)} extractors')
023fa8c4 806
56d868db 807 def add_post_processor(self, pp, when='post_process'):
8222d8de 808 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 809 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 810 self._pps[when].append(pp)
8222d8de
JMF
811 pp.set_downloader(self)
812
ab8e5e51
AM
813 def add_post_hook(self, ph):
814 """Add the post hook"""
815 self._post_hooks.append(ph)
816
933605d7 817 def add_progress_hook(self, ph):
819e0531 818 """Add the download progress hook"""
933605d7 819 self._progress_hooks.append(ph)
8ab470f1 820
819e0531 821 def add_postprocessor_hook(self, ph):
822 """Add the postprocessing progress hook"""
823 self._postprocessor_hooks.append(ph)
5bfc8bee 824 for pps in self._pps.values():
825 for pp in pps:
826 pp.add_progress_hook(ph)
819e0531 827
1c088fa8 828 def _bidi_workaround(self, message):
5d681e96 829 if not hasattr(self, '_output_channel'):
1c088fa8
PH
830 return message
831
5d681e96 832 assert hasattr(self, '_output_process')
14f25df2 833 assert isinstance(message, str)
6febd1c1 834 line_count = message.count('\n') + 1
0f06bcd7 835 self._output_process.stdin.write((message + '\n').encode())
5d681e96 836 self._output_process.stdin.flush()
0f06bcd7 837 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 838 for _ in range(line_count))
6febd1c1 839 return res[:-len('\n')]
1c088fa8 840
b35496d8 841 def _write_string(self, message, out=None, only_once=False):
842 if only_once:
843 if message in self._printed_messages:
844 return
845 self._printed_messages.add(message)
846 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 847
cf4f42cb 848 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 849 """Print message to stdout"""
cf4f42cb 850 if quiet is not None:
da4db748 851 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
852 'Use "YoutubeDL.to_screen" instead')
8a82af35 853 if skip_eol is not False:
da4db748 854 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
855 'Use "YoutubeDL.to_screen" instead')
0bf9dc1e 856 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 857
dfea94f8 858 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
cf4f42cb 859 """Print message to screen if not in quiet mode"""
8bf9319e 860 if self.params.get('logger'):
43afe285 861 self.params['logger'].debug(message)
cf4f42cb 862 return
863 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
864 return
865 self._write_string(
866 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
dfea94f8 867 self._out_files.screen, only_once=only_once)
8222d8de 868
b35496d8 869 def to_stderr(self, message, only_once=False):
0760b0a7 870 """Print message to stderr"""
14f25df2 871 assert isinstance(message, str)
8bf9319e 872 if self.params.get('logger'):
43afe285
IB
873 self.params['logger'].error(message)
874 else:
5792c950 875 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 876
877 def _send_console_code(self, code):
591bb9d3 878 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 879 return
591bb9d3 880 self._write_string(code, self._out_files.console)
8222d8de 881
1e5b9a95
PH
882 def to_console_title(self, message):
883 if not self.params.get('consoletitle', False):
884 return
3efb96a6 885 message = remove_terminal_sequences(message)
4bede0d8
C
886 if compat_os_name == 'nt':
887 if ctypes.windll.kernel32.GetConsoleWindow():
888 # c_wchar_p() might not be necessary if `message` is
889 # already of type unicode()
890 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 891 else:
892 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 893
bdde425c 894 def save_console_title(self):
cf4f42cb 895 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 896 return
592b7485 897 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
898
899 def restore_console_title(self):
cf4f42cb 900 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 901 return
592b7485 902 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
903
904 def __enter__(self):
905 self.save_console_title()
906 return self
907
908 def __exit__(self, *args):
909 self.restore_console_title()
f89197d7 910
dca08720 911 if self.params.get('cookiefile') is not None:
1bab3437 912 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 913
fa9f30b8 914 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
915 """Determine action to take when a download problem appears.
916
917 Depending on if the downloader has been configured to ignore
918 download errors or not, this method may throw an exception or
919 not when errors are found, after printing the message.
920
fa9f30b8 921 @param tb If given, is additional traceback information
922 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
923 """
924 if message is not None:
925 self.to_stderr(message)
926 if self.params.get('verbose'):
927 if tb is None:
928 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 929 tb = ''
8222d8de 930 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 931 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 932 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
933 else:
934 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 935 tb = ''.join(tb_data)
c19bc311 936 if tb:
937 self.to_stderr(tb)
fa9f30b8 938 if not is_error:
939 return
b1940459 940 if not self.params.get('ignoreerrors'):
8222d8de
JMF
941 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
942 exc_info = sys.exc_info()[1].exc_info
943 else:
944 exc_info = sys.exc_info()
945 raise DownloadError(message, exc_info)
946 self._download_retcode = 1
947
19a03940 948 Styles = Namespace(
949 HEADERS='yellow',
950 EMPHASIS='light blue',
492272fe 951 FILENAME='green',
19a03940 952 ID='green',
953 DELIM='blue',
954 ERROR='red',
955 WARNING='yellow',
956 SUPPRESS='light black',
957 )
ec11a9f4 958
7578d77d 959 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 960 text = str(text)
ec11a9f4 961 if test_encoding:
962 original_text = text
5c104538 963 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
964 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 965 text = text.encode(encoding, 'ignore').decode(encoding)
966 if fallback is not None and text != original_text:
967 text = fallback
7578d77d 968 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 969
591bb9d3 970 def _format_out(self, *args, **kwargs):
971 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
972
ec11a9f4 973 def _format_screen(self, *args, **kwargs):
591bb9d3 974 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 975
976 def _format_err(self, *args, **kwargs):
591bb9d3 977 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 978
c84aeac6 979 def report_warning(self, message, only_once=False):
8222d8de
JMF
980 '''
981 Print the message to stderr, it will be prefixed with 'WARNING:'
982 If stderr is a tty file the 'WARNING:' will be colored
983 '''
6d07ce01
JMF
984 if self.params.get('logger') is not None:
985 self.params['logger'].warning(message)
8222d8de 986 else:
ad8915b7
PH
987 if self.params.get('no_warnings'):
988 return
ec11a9f4 989 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 990
da4db748 991 def deprecation_warning(self, message, *, stacklevel=0):
992 deprecation_warning(
993 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
994
995 def deprecated_feature(self, message):
ee8dd27a 996 if self.params.get('logger') is not None:
da4db748 997 self.params['logger'].warning(f'Deprecated Feature: {message}')
998 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
ee8dd27a 999
fa9f30b8 1000 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
1001 '''
1002 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1003 in red if stderr is a tty file.
1004 '''
fa9f30b8 1005 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 1006
b35496d8 1007 def write_debug(self, message, only_once=False):
0760b0a7 1008 '''Log debug message or Print message to stderr'''
1009 if not self.params.get('verbose', False):
1010 return
8a82af35 1011 message = f'[debug] {message}'
0760b0a7 1012 if self.params.get('logger'):
1013 self.params['logger'].debug(message)
1014 else:
b35496d8 1015 self.to_stderr(message, only_once)
0760b0a7 1016
8222d8de
JMF
1017 def report_file_already_downloaded(self, file_name):
1018 """Report file has already been fully downloaded."""
1019 try:
6febd1c1 1020 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 1021 except UnicodeEncodeError:
6febd1c1 1022 self.to_screen('[download] The file has already been downloaded')
8222d8de 1023
0c3d0f51 1024 def report_file_delete(self, file_name):
1025 """Report that existing file will be deleted."""
1026 try:
c25228e5 1027 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 1028 except UnicodeEncodeError:
c25228e5 1029 self.to_screen('Deleting existing file')
0c3d0f51 1030
319b6059 1031 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 1032 has_drm = info.get('_has_drm')
319b6059 1033 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1034 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1035 if forced or not ignored:
1151c407 1036 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1037 expected=has_drm or ignored or expected)
88acdbc2 1038 else:
1039 self.report_warning(msg)
1040
de6000d9 1041 def parse_outtmpl(self):
bf1824b3 1042 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1043 self._parse_outtmpl()
1044 return self.params['outtmpl']
1045
1046 def _parse_outtmpl(self):
7b2c3f47 1047 sanitize = IDENTITY
bf1824b3 1048 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1049 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1050
1051 outtmpl = self.params.setdefault('outtmpl', {})
1052 if not isinstance(outtmpl, dict):
1053 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1054 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1055
21cd8fae 1056 def get_output_path(self, dir_type='', filename=None):
1057 paths = self.params.get('paths', {})
d2c8aadf 1058 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
21cd8fae 1059 path = os.path.join(
1060 expand_path(paths.get('home', '').strip()),
1061 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1062 filename or '')
21cd8fae 1063 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1064
76a264ac 1065 @staticmethod
901130bb 1066 def _outtmpl_expandpath(outtmpl):
1067 # expand_path translates '%%' into '%' and '$$' into '$'
1068 # correspondingly that is not what we want since we need to keep
1069 # '%%' intact for template dict substitution step. Working around
1070 # with boundary-alike separator hack.
1071 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
86e5f3ed 1072 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1073
1074 # outtmpl should be expand_path'ed before template dict substitution
1075 # because meta fields may contain env variables we don't want to
62b58c09 1076 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
901130bb 1077 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1078 return expand_path(outtmpl).replace(sep, '')
1079
1080 @staticmethod
1081 def escape_outtmpl(outtmpl):
1082 ''' Escape any remaining strings like %s, %abc% etc. '''
1083 return re.sub(
1084 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1085 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1086 outtmpl)
1087
1088 @classmethod
1089 def validate_outtmpl(cls, outtmpl):
76a264ac 1090 ''' @return None or Exception object '''
7d1eb38a 1091 outtmpl = re.sub(
47cdc68e 1092 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
7d1eb38a 1093 lambda mobj: f'{mobj.group(0)[:-1]}s',
1094 cls._outtmpl_expandpath(outtmpl))
76a264ac 1095 try:
7d1eb38a 1096 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1097 return None
1098 except ValueError as err:
1099 return err
1100
03b4de72 1101 @staticmethod
1102 def _copy_infodict(info_dict):
1103 info_dict = dict(info_dict)
09b49e1f 1104 info_dict.pop('__postprocessors', None)
415f8d51 1105 info_dict.pop('__pending_error', None)
03b4de72 1106 return info_dict
1107
e0fd9573 1108 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1109 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1110 @param sanitize Whether to sanitize the output as a filename.
1111 For backward compatibility, a function can also be passed
1112 """
1113
6e84b215 1114 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1115
03b4de72 1116 info_dict = self._copy_infodict(info_dict)
752cda38 1117 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1118 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1119 if info_dict.get('duration', None) is not None
1120 else None)
1d485a1a 1121 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1122 info_dict['video_autonumber'] = self._num_videos
752cda38 1123 if info_dict.get('resolution') is None:
1124 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1125
e6f21b3d 1126 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1127 # of %(field)s to %(field)0Nd for backward compatibility
1128 field_size_compat_map = {
0a5a191a 1129 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1130 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1131 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1132 }
752cda38 1133
385a27fa 1134 TMPL_DICT = {}
47cdc68e 1135 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
385a27fa 1136 MATH_FUNCTIONS = {
1137 '+': float.__add__,
1138 '-': float.__sub__,
1139 }
e625be0d 1140 # Field is of the form key1.key2...
07a1250e 1141 # where keys (except first) can be string, int, slice or "{field, ...}"
1142 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1143 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1144 'inner': FIELD_INNER_RE,
1145 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1146 }
1d485a1a 1147 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1148 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1d485a1a 1149 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
e625be0d 1150 (?P<negate>-)?
1d485a1a 1151 (?P<fields>{FIELD_RE})
1152 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1153 (?:>(?P<strf_format>.+?))?
34baa9fd 1154 (?P<remaining>
1155 (?P<alternate>(?<!\\),[^|&)]+)?
1156 (?:&(?P<replacement>.*?))?
1157 (?:\|(?P<default>.*?))?
1d485a1a 1158 )$''')
752cda38 1159
07a1250e 1160 def _traverse_infodict(fields):
1161 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1162 for f in ([x] if x.startswith('{') else x.split('.'))]
1163 for i in (0, -1):
1164 if fields and not fields[i]:
1165 fields.pop(i)
1166
1167 for i, f in enumerate(fields):
1168 if not f.startswith('{'):
1169 continue
1170 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1171 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1172
1173 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
76a264ac 1174
752cda38 1175 def get_value(mdict):
1176 # Object traversal
2b8a2973 1177 value = _traverse_infodict(mdict['fields'])
752cda38 1178 # Negative
1179 if mdict['negate']:
1180 value = float_or_none(value)
1181 if value is not None:
1182 value *= -1
1183 # Do maths
385a27fa 1184 offset_key = mdict['maths']
1185 if offset_key:
752cda38 1186 value = float_or_none(value)
1187 operator = None
385a27fa 1188 while offset_key:
1189 item = re.match(
1190 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1191 offset_key).group(0)
1192 offset_key = offset_key[len(item):]
1193 if operator is None:
752cda38 1194 operator = MATH_FUNCTIONS[item]
385a27fa 1195 continue
1196 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1197 offset = float_or_none(item)
1198 if offset is None:
2b8a2973 1199 offset = float_or_none(_traverse_infodict(item))
385a27fa 1200 try:
1201 value = operator(value, multiplier * offset)
1202 except (TypeError, ZeroDivisionError):
1203 return None
1204 operator = None
752cda38 1205 # Datetime formatting
1206 if mdict['strf_format']:
7c37ff97 1207 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1208
a6bcaf71 1209 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1210 if sanitize and value == '':
1211 value = None
752cda38 1212 return value
1213
b868936c 1214 na = self.params.get('outtmpl_na_placeholder', 'NA')
1215
e0fd9573 1216 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1217 return sanitize_filename(str(value), restricted=restricted, is_id=(
1218 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1219 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1220 else NO_DEFAULT))
e0fd9573 1221
1222 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1223 sanitize = bool(sanitize)
1224
6e84b215 1225 def _dumpjson_default(obj):
1226 if isinstance(obj, (set, LazyList)):
1227 return list(obj)
adbc4ec4 1228 return repr(obj)
6e84b215 1229
752cda38 1230 def create_key(outer_mobj):
1231 if not outer_mobj.group('has_key'):
b836dc94 1232 return outer_mobj.group(0)
752cda38 1233 key = outer_mobj.group('key')
752cda38 1234 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1235 initial_field = mobj.group('fields') if mobj else ''
e978789f 1236 value, replacement, default = None, None, na
7c37ff97 1237 while mobj:
e625be0d 1238 mobj = mobj.groupdict()
7c37ff97 1239 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1240 value = get_value(mobj)
e978789f 1241 replacement = mobj['replacement']
7c37ff97 1242 if value is None and mobj['alternate']:
34baa9fd 1243 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1244 else:
1245 break
752cda38 1246
b868936c 1247 fmt = outer_mobj.group('format')
752cda38 1248 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1249 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1250
e978789f 1251 value = default if value is None else value if replacement is None else replacement
752cda38 1252
4476d2c7 1253 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1254 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1255 if fmt[-1] == 'l': # list
4476d2c7 1256 delim = '\n' if '#' in flags else ', '
9e907ebd 1257 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1258 elif fmt[-1] == 'j': # json
deae7c17 1259 value, fmt = json.dumps(
1260 value, default=_dumpjson_default,
9b9dad11 1261 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
47cdc68e 1262 elif fmt[-1] == 'h': # html
deae7c17 1263 value, fmt = escapeHTML(str(value)), str_fmt
524e2e4f 1264 elif fmt[-1] == 'q': # quoted
4476d2c7 1265 value = map(str, variadic(value) if '#' in flags else [value])
1266 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1267 elif fmt[-1] == 'B': # bytes
0f06bcd7 1268 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1269 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1270 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1271 value, fmt = unicodedata.normalize(
1272 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1273 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1274 value), str_fmt
e0fd9573 1275 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1276 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1277 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1278 factor=1024 if '#' in flags else 1000)
37893bb0 1279 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1280 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1281 elif fmt[-1] == 'c':
524e2e4f 1282 if value:
1283 value = str(value)[0]
76a264ac 1284 else:
524e2e4f 1285 fmt = str_fmt
76a264ac 1286 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1287 value = float_or_none(value)
752cda38 1288 if value is None:
1289 value, fmt = default, 's'
901130bb 1290
752cda38 1291 if sanitize:
1292 if fmt[-1] == 'r':
1293 # If value is an object, sanitize might convert it to a string
1294 # So we convert it to repr first
7d1eb38a 1295 value, fmt = repr(value), str_fmt
639f1cea 1296 if fmt[-1] in 'csr':
e0fd9573 1297 value = sanitizer(initial_field, value)
901130bb 1298
b868936c 1299 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1300 TMPL_DICT[key] = value
b868936c 1301 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1302
385a27fa 1303 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1304
819e0531 1305 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1306 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1307 return self.escape_outtmpl(outtmpl) % info_dict
1308
5127e92a 1309 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1310 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1311 if outtmpl is None:
bf1824b3 1312 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1313 try:
5127e92a 1314 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1315 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1316 if not filename:
1317 return None
15da37c7 1318
5127e92a 1319 if tmpl_type in ('', 'temp'):
6a0546e3 1320 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1321 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1322 filename = replace_extension(filename, ext, final_ext)
5127e92a 1323 elif tmpl_type:
6a0546e3 1324 force_ext = OUTTMPL_TYPES[tmpl_type]
1325 if force_ext:
1326 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1327
bdc3fd2f
U
1328 # https://github.com/blackjack4494/youtube-dlc/issues/85
1329 trim_file_name = self.params.get('trim_file_name', False)
1330 if trim_file_name:
5c22c63d 1331 no_ext, *ext = filename.rsplit('.', 2)
1332 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1333
0202b52a 1334 return filename
8222d8de 1335 except ValueError as err:
6febd1c1 1336 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1337 return None
1338
5127e92a 1339 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1340 """Generate the output filename"""
1341 if outtmpl:
1342 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1343 dir_type = None
1344 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1345 if not filename and dir_type not in ('', 'temp'):
1346 return ''
de6000d9 1347
c84aeac6 1348 if warn:
21cd8fae 1349 if not self.params.get('paths'):
de6000d9 1350 pass
1351 elif filename == '-':
c84aeac6 1352 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1353 elif os.path.isabs(filename):
c84aeac6 1354 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1355 if filename == '-' or not filename:
1356 return filename
1357
21cd8fae 1358 return self.get_output_path(dir_type, filename)
0202b52a 1359
120fe513 1360 def _match_entry(self, info_dict, incomplete=False, silent=False):
6368e2e6 1361 """Returns None if the file should be downloaded"""
d7b460d0 1362 _type = info_dict.get('_type', 'video')
1363 assert incomplete or _type == 'video', 'Only video result can be considered complete'
8222d8de 1364
3bec830a 1365 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
c77495e3 1366
8b0d7497 1367 def check_filter():
d7b460d0 1368 if _type in ('playlist', 'multi_video'):
1369 return
1370 elif _type in ('url', 'url_transparent') and not try_call(
1371 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1372 return
1373
8b0d7497 1374 if 'title' in info_dict:
1375 # This can happen when we're just evaluating the playlist
1376 title = info_dict['title']
1377 matchtitle = self.params.get('matchtitle', False)
1378 if matchtitle:
1379 if not re.search(matchtitle, title, re.IGNORECASE):
1380 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1381 rejecttitle = self.params.get('rejecttitle', False)
1382 if rejecttitle:
1383 if re.search(rejecttitle, title, re.IGNORECASE):
1384 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
6368e2e6 1385
8b0d7497 1386 date = info_dict.get('upload_date')
1387 if date is not None:
1388 dateRange = self.params.get('daterange', DateRange())
1389 if date not in dateRange:
86e5f3ed 1390 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1391 view_count = info_dict.get('view_count')
1392 if view_count is not None:
1393 min_views = self.params.get('min_views')
1394 if min_views is not None and view_count < min_views:
1395 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1396 max_views = self.params.get('max_views')
1397 if max_views is not None and view_count > max_views:
1398 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1399 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1400 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1401
8f18aca8 1402 match_filter = self.params.get('match_filter')
1403 if match_filter is not None:
1404 try:
1405 ret = match_filter(info_dict, incomplete=incomplete)
1406 except TypeError:
1407 # For backward compatibility
1408 ret = None if incomplete else match_filter(info_dict)
492272fe 1409 if ret is NO_DEFAULT:
1410 while True:
1411 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1412 reply = input(self._format_screen(
1413 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1414 if reply in {'y', ''}:
1415 return None
1416 elif reply == 'n':
1417 return f'Skipping {video_title}'
492272fe 1418 elif ret is not None:
8f18aca8 1419 return ret
8b0d7497 1420 return None
1421
c77495e3 1422 if self.in_download_archive(info_dict):
1423 reason = '%s has already been recorded in the archive' % video_title
1424 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1425 else:
1426 reason = check_filter()
1427 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1428 if reason is not None:
120fe513 1429 if not silent:
1430 self.to_screen('[download] ' + reason)
c77495e3 1431 if self.params.get(break_opt, False):
1432 raise break_err()
8b0d7497 1433 return reason
fe7e0c98 1434
b6c45014
JMF
1435 @staticmethod
1436 def add_extra_info(info_dict, extra_info):
1437 '''Set the keys from extra_info in info dict if they are missing'''
1438 for key, value in extra_info.items():
1439 info_dict.setdefault(key, value)
1440
409e1828 1441 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1442 process=True, force_generic_extractor=False):
41d1cca3 1443 """
17ffed18 1444 Extract and return the information dictionary of the URL
41d1cca3 1445
1446 Arguments:
17ffed18 1447 @param url URL to extract
41d1cca3 1448
1449 Keyword arguments:
17ffed18 1450 @param download Whether to download videos
1451 @param process Whether to resolve all unresolved references (URLs, playlist items).
1452 Must be True for download to work
1453 @param ie_key Use only the extractor with this key
1454
1455 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1456 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
41d1cca3 1457 """
fe7e0c98 1458
409e1828 1459 if extra_info is None:
1460 extra_info = {}
1461
61aa5ba3 1462 if not ie_key and force_generic_extractor:
d22dec74
S
1463 ie_key = 'Generic'
1464
8222d8de 1465 if ie_key:
fe7866d0 1466 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
8222d8de
JMF
1467 else:
1468 ies = self._ies
1469
fe7866d0 1470 for key, ie in ies.items():
8222d8de
JMF
1471 if not ie.suitable(url):
1472 continue
1473
1474 if not ie.working():
6febd1c1
PH
1475 self.report_warning('The program functionality for this site has been marked as broken, '
1476 'and will probably not work.')
8222d8de 1477
1151c407 1478 temp_id = ie.get_temp_id(url)
fe7866d0 1479 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1480 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
5e5be0c0 1481 if self.params.get('break_on_existing', False):
1482 raise ExistingVideoReached()
a0566bbf 1483 break
fe7866d0 1484 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
a0566bbf 1485 else:
fe7866d0 1486 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1487 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1488 tb=False if extractors_restricted else None)
a0566bbf 1489
7e88d7d7 1490 def _handle_extraction_exceptions(func):
b5ae35ee 1491 @functools.wraps(func)
a0566bbf 1492 def wrapper(self, *args, **kwargs):
6da22e7d 1493 while True:
1494 try:
1495 return func(self, *args, **kwargs)
1496 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1497 raise
6da22e7d 1498 except ReExtractInfo as e:
1499 if e.expected:
1500 self.to_screen(f'{e}; Re-extracting data')
1501 else:
1502 self.to_stderr('\r')
1503 self.report_warning(f'{e}; Re-extracting data')
1504 continue
1505 except GeoRestrictedError as e:
1506 msg = e.msg
1507 if e.countries:
1508 msg += '\nThis video is available in %s.' % ', '.join(
1509 map(ISO3166Utils.short2full, e.countries))
1510 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1511 self.report_error(msg)
1512 except ExtractorError as e: # An error we somewhat expected
1513 self.report_error(str(e), e.format_traceback())
1514 except Exception as e:
1515 if self.params.get('ignoreerrors'):
1516 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1517 else:
1518 raise
1519 break
a0566bbf 1520 return wrapper
1521
693f0600 1522 def _wait_for_video(self, ie_result={}):
f2ebc5c7 1523 if (not self.params.get('wait_for_video')
1524 or ie_result.get('_type', 'video') != 'video'
1525 or ie_result.get('formats') or ie_result.get('url')):
1526 return
1527
1528 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1529 last_msg = ''
1530
1531 def progress(msg):
1532 nonlocal last_msg
a7dc6a89 1533 full_msg = f'{msg}\n'
1534 if not self.params.get('noprogress'):
1535 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1536 elif last_msg:
1537 return
1538 self.to_screen(full_msg, skip_eol=True)
f2ebc5c7 1539 last_msg = msg
1540
1541 min_wait, max_wait = self.params.get('wait_for_video')
1542 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1543 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1544 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1545 self.report_warning('Release time of video is not known')
693f0600 1546 elif ie_result and (diff or 0) <= 0:
f2ebc5c7 1547 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1548 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1549 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1550
1551 wait_till = time.time() + diff
1552 try:
1553 while True:
1554 diff = wait_till - time.time()
1555 if diff <= 0:
1556 progress('')
1557 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1558 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1559 time.sleep(1)
1560 except KeyboardInterrupt:
1561 progress('')
1562 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1563 except BaseException as e:
1564 if not isinstance(e, ReExtractInfo):
1565 self.to_screen('')
1566 raise
1567
7e88d7d7 1568 @_handle_extraction_exceptions
58f197b7 1569 def __extract_info(self, url, ie, download, extra_info, process):
693f0600 1570 try:
1571 ie_result = ie.extract(url)
1572 except UserNotLive as e:
1573 if process:
1574 if self.params.get('wait_for_video'):
1575 self.report_warning(e)
1576 self._wait_for_video()
1577 raise
a0566bbf 1578 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
cb794ee0 1579 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
a0566bbf 1580 return
1581 if isinstance(ie_result, list):
1582 # Backwards compatibility: old IE result format
1583 ie_result = {
1584 '_type': 'compat_list',
1585 'entries': ie_result,
1586 }
e37d0efb 1587 if extra_info.get('original_url'):
1588 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1589 self.add_default_extra_info(ie_result, ie, url)
1590 if process:
f2ebc5c7 1591 self._wait_for_video(ie_result)
a0566bbf 1592 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1593 else:
a0566bbf 1594 return ie_result
fe7e0c98 1595
ea38e55f 1596 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1597 if url is not None:
1598 self.add_extra_info(ie_result, {
1599 'webpage_url': url,
1600 'original_url': url,
57ebfca3 1601 })
1602 webpage_url = ie_result.get('webpage_url')
1603 if webpage_url:
1604 self.add_extra_info(ie_result, {
1605 'webpage_url_basename': url_basename(webpage_url),
1606 'webpage_url_domain': get_domain(webpage_url),
6033d980 1607 })
1608 if ie is not None:
1609 self.add_extra_info(ie_result, {
1610 'extractor': ie.IE_NAME,
1611 'extractor_key': ie.ie_key(),
1612 })
ea38e55f 1613
58adec46 1614 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1615 """
1616 Take the result of the ie(may be modified) and resolve all unresolved
1617 references (URLs, playlist items).
1618
1619 It will also download the videos if 'download'.
1620 Returns the resolved ie_result.
1621 """
58adec46 1622 if extra_info is None:
1623 extra_info = {}
e8ee972c
PH
1624 result_type = ie_result.get('_type', 'video')
1625
057a5206 1626 if result_type in ('url', 'url_transparent'):
8f97a15d 1627 ie_result['url'] = sanitize_url(
1628 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
e37d0efb 1629 if ie_result.get('original_url'):
1630 extra_info.setdefault('original_url', ie_result['original_url'])
1631
057a5206 1632 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1633 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1634 or extract_flat is True):
ecb54191 1635 info_copy = ie_result.copy()
6033d980 1636 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1637 if ie and not ie_result.get('id'):
4614bc22 1638 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1639 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1640 self.add_extra_info(info_copy, extra_info)
b5475f11 1641 info_copy, _ = self.pre_process(info_copy)
94dc8604 1642 self._fill_common_fields(info_copy, False)
ecb54191 1643 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
415f8d51 1644 self._raise_pending_errors(info_copy)
4614bc22 1645 if self.params.get('force_write_download_archive', False):
1646 self.record_download_archive(info_copy)
e8ee972c
PH
1647 return ie_result
1648
8222d8de 1649 if result_type == 'video':
b6c45014 1650 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1651 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1652 self._raise_pending_errors(ie_result)
28b0eb0f 1653 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1654 if additional_urls:
e9f4ccd1 1655 # TODO: Improve MetadataParserPP to allow setting a list
14f25df2 1656 if isinstance(additional_urls, str):
9c2b75b5 1657 additional_urls = [additional_urls]
1658 self.to_screen(
1659 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1660 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1661 ie_result['additional_entries'] = [
1662 self.extract_info(
b69fd25c 1663 url, download, extra_info=extra_info,
9c2b75b5 1664 force_generic_extractor=self.params.get('force_generic_extractor'))
1665 for url in additional_urls
1666 ]
1667 return ie_result
8222d8de
JMF
1668 elif result_type == 'url':
1669 # We have to add extra_info to the results because it may be
1670 # contained in a playlist
07cce701 1671 return self.extract_info(
1672 ie_result['url'], download,
1673 ie_key=ie_result.get('ie_key'),
1674 extra_info=extra_info)
7fc3fa05
PH
1675 elif result_type == 'url_transparent':
1676 # Use the information from the embedding page
1677 info = self.extract_info(
1678 ie_result['url'], ie_key=ie_result.get('ie_key'),
1679 extra_info=extra_info, download=False, process=False)
1680
1640eb09
S
1681 # extract_info may return None when ignoreerrors is enabled and
1682 # extraction failed with an error, don't crash and return early
1683 # in this case
1684 if not info:
1685 return info
1686
3975b4d2 1687 exempted_fields = {'_type', 'url', 'ie_key'}
1688 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1689 # For video clips, the id etc of the clip extractor should be used
1690 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1691
412c617d 1692 new_result = info.copy()
3975b4d2 1693 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1694
0563f7ac
S
1695 # Extracted info may not be a video result (i.e.
1696 # info.get('_type', 'video') != video) but rather an url or
1697 # url_transparent. In such cases outer metadata (from ie_result)
1698 # should be propagated to inner one (info). For this to happen
1699 # _type of info should be overridden with url_transparent. This
067aa17e 1700 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1701 if new_result.get('_type') == 'url':
1702 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1703
1704 return self.process_ie_result(
1705 new_result, download=download, extra_info=extra_info)
40fcba5e 1706 elif result_type in ('playlist', 'multi_video'):
30a074c2 1707 # Protect from infinite recursion due to recursively nested playlists
1708 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
0bd5a039 1709 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1710 if webpage_url and webpage_url in self._playlist_urls:
7e85e872 1711 self.to_screen(
30a074c2 1712 '[download] Skipping already downloaded playlist: %s'
1713 % ie_result.get('title') or ie_result.get('id'))
1714 return
7e85e872 1715
30a074c2 1716 self._playlist_level += 1
1717 self._playlist_urls.add(webpage_url)
03f83004 1718 self._fill_common_fields(ie_result, False)
bc516a3f 1719 self._sanitize_thumbnails(ie_result)
30a074c2 1720 try:
1721 return self.__process_playlist(ie_result, download)
1722 finally:
1723 self._playlist_level -= 1
1724 if not self._playlist_level:
1725 self._playlist_urls.clear()
8222d8de 1726 elif result_type == 'compat_list':
c9bf4114
PH
1727 self.report_warning(
1728 'Extractor %s returned a compat_list result. '
1729 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1730
8222d8de 1731 def _fixup(r):
b868936c 1732 self.add_extra_info(r, {
1733 'extractor': ie_result['extractor'],
1734 'webpage_url': ie_result['webpage_url'],
1735 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1736 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1737 'extractor_key': ie_result['extractor_key'],
1738 })
8222d8de
JMF
1739 return r
1740 ie_result['entries'] = [
b6c45014 1741 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1742 for r in ie_result['entries']
1743 ]
1744 return ie_result
1745 else:
1746 raise Exception('Invalid result type: %s' % result_type)
1747
e92caff5 1748 def _ensure_dir_exists(self, path):
1749 return make_dir(path, self.report_error)
1750
3b603dbd 1751 @staticmethod
3bec830a 1752 def _playlist_infodict(ie_result, strict=False, **kwargs):
1753 info = {
1754 'playlist_count': ie_result.get('playlist_count'),
3b603dbd 1755 'playlist': ie_result.get('title') or ie_result.get('id'),
1756 'playlist_id': ie_result.get('id'),
1757 'playlist_title': ie_result.get('title'),
1758 'playlist_uploader': ie_result.get('uploader'),
1759 'playlist_uploader_id': ie_result.get('uploader_id'),
3b603dbd 1760 **kwargs,
1761 }
3bec830a 1762 if strict:
1763 return info
0bd5a039 1764 if ie_result.get('webpage_url'):
1765 info.update({
1766 'webpage_url': ie_result['webpage_url'],
1767 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1768 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1769 })
3bec830a 1770 return {
1771 **info,
1772 'playlist_index': 0,
1773 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1774 'extractor': ie_result['extractor'],
3bec830a 1775 'extractor_key': ie_result['extractor_key'],
1776 }
3b603dbd 1777
30a074c2 1778 def __process_playlist(self, ie_result, download):
7e88d7d7 1779 """Process each entry in the playlist"""
f5ea4748 1780 assert ie_result['_type'] in ('playlist', 'multi_video')
1781
3bec830a 1782 common_info = self._playlist_infodict(ie_result, strict=True)
3955b207 1783 title = common_info.get('playlist') or '<Untitled>'
3bec830a 1784 if self._match_entry(common_info, incomplete=True) is not None:
1785 return
c6e07cf1 1786 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
f0d785d3 1787
7e88d7d7 1788 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1789 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1790
1791 lazy = self.params.get('lazy_playlist')
1792 if lazy:
1793 resolved_entries, n_entries = [], 'N/A'
1794 ie_result['requested_entries'], ie_result['entries'] = None, None
1795 else:
1796 entries = resolved_entries = list(entries)
1797 n_entries = len(resolved_entries)
1798 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1799 if not ie_result.get('playlist_count'):
1800 # Better to do this after potentially exhausting entries
1801 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1802
0647d925 1803 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1804 ie_copy = collections.ChainMap(ie_result, extra)
3bec830a 1805
e08a85d8 1806 _infojson_written = False
0bfc53d0 1807 write_playlist_files = self.params.get('allow_playlist_files', True)
1808 if write_playlist_files and self.params.get('list_thumbnails'):
1809 self.list_thumbnails(ie_result)
1810 if write_playlist_files and not self.params.get('simulate'):
e08a85d8 1811 _infojson_written = self._write_info_json(
1812 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1813 if _infojson_written is None:
80c03fa9 1814 return
1815 if self._write_description('playlist', ie_result,
1816 self.prepare_filename(ie_copy, 'pl_description')) is None:
1817 return
681de68e 1818 # TODO: This should be passed to ThumbnailsConvertor if necessary
3bec830a 1819 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1820
7e9a6125 1821 if lazy:
1822 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1823 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1824 elif self.params.get('playlistreverse'):
1825 entries.reverse()
1826 elif self.params.get('playlistrandom'):
30a074c2 1827 random.shuffle(entries)
1828
bc5c2f8a 1829 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
7e88d7d7 1830 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1831
134c913c 1832 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1833 if self.params.get('extract_flat') == 'discard_in_playlist':
1834 keep_resolved_entries = ie_result['_type'] != 'playlist'
1835 if keep_resolved_entries:
1836 self.write_debug('The information of all playlist entries will be held in memory')
1837
26e2805c 1838 failures = 0
1839 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 1840 for i, (playlist_index, entry) in enumerate(entries):
1841 if lazy:
1842 resolved_entries.append((playlist_index, entry))
3bec830a 1843 if not entry:
7e88d7d7 1844 continue
1845
7e88d7d7 1846 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
7e9a6125 1847 if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1848 playlist_index = ie_result['requested_entries'][i]
1849
0647d925 1850 entry_copy = collections.ChainMap(entry, {
3bec830a 1851 **common_info,
3955b207 1852 'n_entries': int_or_none(n_entries),
71729754 1853 'playlist_index': playlist_index,
7e9a6125 1854 'playlist_autonumber': i + 1,
0647d925 1855 })
3bec830a 1856
0647d925 1857 if self._match_entry(entry_copy, incomplete=True) is not None:
f0ad6f8c 1858 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1859 resolved_entries[i] = (playlist_index, NO_DEFAULT)
3bec830a 1860 continue
1861
bc5c2f8a 1862 self.to_screen('[download] Downloading item %s of %s' % (
3bec830a 1863 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1864
a6ca61d4 1865 extra.update({
1866 'playlist_index': playlist_index,
1867 'playlist_autonumber': i + 1,
1868 })
3bec830a 1869 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1870 if not entry_result:
1871 failures += 1
1872 if failures >= max_failures:
1873 self.report_error(
7e88d7d7 1874 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 1875 break
134c913c 1876 if keep_resolved_entries:
1877 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 1878
1879 # Update with processed data
f0ad6f8c 1880 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
bc5c2f8a 1881 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1882 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1883 # Do not set for full playlist
1884 ie_result.pop('requested_entries')
e08a85d8 1885
1886 # Write the updated info to json
cb96c5be 1887 if _infojson_written is True and self._write_info_json(
e08a85d8 1888 'updated playlist', ie_result,
1889 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1890 return
ca30f449 1891
ed5835b4 1892 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 1893 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 1894 return ie_result
1895
7e88d7d7 1896 @_handle_extraction_exceptions
a0566bbf 1897 def __process_iterable_entry(self, entry, download, extra_info):
1898 return self.process_ie_result(
1899 entry, download=download, extra_info=extra_info)
1900
67134eab
JMF
1901 def _build_format_filter(self, filter_spec):
1902 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1903
1904 OPERATORS = {
1905 '<': operator.lt,
1906 '<=': operator.le,
1907 '>': operator.gt,
1908 '>=': operator.ge,
1909 '=': operator.eq,
1910 '!=': operator.ne,
1911 }
67134eab 1912 operator_rex = re.compile(r'''(?x)\s*
187986a8 1913 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1914 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1915 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1916 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1917 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1918 if m:
1919 try:
1920 comparison_value = int(m.group('value'))
1921 except ValueError:
1922 comparison_value = parse_filesize(m.group('value'))
1923 if comparison_value is None:
1924 comparison_value = parse_filesize(m.group('value') + 'B')
1925 if comparison_value is None:
1926 raise ValueError(
1927 'Invalid value %r in format specification %r' % (
67134eab 1928 m.group('value'), filter_spec))
9ddb6925
S
1929 op = OPERATORS[m.group('op')]
1930
083c9df9 1931 if not m:
9ddb6925
S
1932 STR_OPERATORS = {
1933 '=': operator.eq,
10d33b34
YCH
1934 '^=': lambda attr, value: attr.startswith(value),
1935 '$=': lambda attr, value: attr.endswith(value),
1936 '*=': lambda attr, value: value in attr,
1ce9a3cb 1937 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1938 }
187986a8 1939 str_operator_rex = re.compile(r'''(?x)\s*
1940 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1941 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1942 (?P<quote>["'])?
1943 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1944 (?(quote)(?P=quote))\s*
9ddb6925 1945 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1946 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1947 if m:
1ce9a3cb
LF
1948 if m.group('op') == '~=':
1949 comparison_value = re.compile(m.group('value'))
1950 else:
1951 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1952 str_op = STR_OPERATORS[m.group('op')]
1953 if m.group('negation'):
e118a879 1954 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1955 else:
1956 op = str_op
083c9df9 1957
9ddb6925 1958 if not m:
187986a8 1959 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1960
1961 def _filter(f):
1962 actual_value = f.get(m.group('key'))
1963 if actual_value is None:
1964 return m.group('none_inclusive')
1965 return op(actual_value, comparison_value)
67134eab
JMF
1966 return _filter
1967
9f1a1c36 1968 def _check_formats(self, formats):
1969 for f in formats:
1970 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1971 path = self.get_output_path('temp')
1972 if not self._ensure_dir_exists(f'{path}/'):
1973 continue
1974 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1975 temp_file.close()
1976 try:
1977 success, _ = self.dl(temp_file.name, f, test=True)
8a82af35 1978 except (DownloadError, OSError, ValueError) + network_exceptions:
9f1a1c36 1979 success = False
1980 finally:
1981 if os.path.exists(temp_file.name):
1982 try:
1983 os.remove(temp_file.name)
1984 except OSError:
1985 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1986 if success:
1987 yield f
1988 else:
1989 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1990
0017d9ad 1991 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1992
af0f7428
S
1993 def can_merge():
1994 merger = FFmpegMergerPP(self)
1995 return merger.available and merger.can_merge()
1996
91ebc640 1997 prefer_best = (
b7b04c78 1998 not self.params.get('simulate')
91ebc640 1999 and download
2000 and (
2001 not can_merge()
21633673 2002 or info_dict.get('is_live') and not self.params.get('live_from_start')
bf1824b3 2003 or self.params['outtmpl']['default'] == '-'))
53ed7066 2004 compat = (
2005 prefer_best
2006 or self.params.get('allow_multiple_audio_streams', False)
8a82af35 2007 or 'format-spec' in self.params['compat_opts'])
91ebc640 2008
2009 return (
53ed7066 2010 'best/bestvideo+bestaudio' if prefer_best
2011 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 2012 else 'bestvideo+bestaudio/best')
0017d9ad 2013
67134eab
JMF
2014 def build_format_selector(self, format_spec):
2015 def syntax_error(note, start):
2016 message = (
2017 'Invalid format specification: '
86e5f3ed 2018 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
2019 return SyntaxError(message)
2020
2021 PICKFIRST = 'PICKFIRST'
2022 MERGE = 'MERGE'
2023 SINGLE = 'SINGLE'
0130afb7 2024 GROUP = 'GROUP'
67134eab
JMF
2025 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2026
91ebc640 2027 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2028 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 2029
9f1a1c36 2030 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 2031
67134eab
JMF
2032 def _parse_filter(tokens):
2033 filter_parts = []
2034 for type, string, start, _, _ in tokens:
2035 if type == tokenize.OP and string == ']':
2036 return ''.join(filter_parts)
2037 else:
2038 filter_parts.append(string)
2039
232541df 2040 def _remove_unused_ops(tokens):
62b58c09
L
2041 # Remove operators that we don't use and join them with the surrounding strings.
2042 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
232541df
JMF
2043 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2044 last_string, last_start, last_end, last_line = None, None, None, None
2045 for type, string, start, end, line in tokens:
2046 if type == tokenize.OP and string == '[':
2047 if last_string:
2048 yield tokenize.NAME, last_string, last_start, last_end, last_line
2049 last_string = None
2050 yield type, string, start, end, line
2051 # everything inside brackets will be handled by _parse_filter
2052 for type, string, start, end, line in tokens:
2053 yield type, string, start, end, line
2054 if type == tokenize.OP and string == ']':
2055 break
2056 elif type == tokenize.OP and string in ALLOWED_OPS:
2057 if last_string:
2058 yield tokenize.NAME, last_string, last_start, last_end, last_line
2059 last_string = None
2060 yield type, string, start, end, line
2061 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2062 if not last_string:
2063 last_string = string
2064 last_start = start
2065 last_end = end
2066 else:
2067 last_string += string
2068 if last_string:
2069 yield tokenize.NAME, last_string, last_start, last_end, last_line
2070
cf2ac6df 2071 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2072 selectors = []
2073 current_selector = None
2074 for type, string, start, _, _ in tokens:
2075 # ENCODING is only defined in python 3.x
2076 if type == getattr(tokenize, 'ENCODING', None):
2077 continue
2078 elif type in [tokenize.NAME, tokenize.NUMBER]:
2079 current_selector = FormatSelector(SINGLE, string, [])
2080 elif type == tokenize.OP:
cf2ac6df
JMF
2081 if string == ')':
2082 if not inside_group:
2083 # ')' will be handled by the parentheses group
2084 tokens.restore_last_token()
67134eab 2085 break
cf2ac6df 2086 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2087 tokens.restore_last_token()
2088 break
cf2ac6df
JMF
2089 elif inside_choice and string == ',':
2090 tokens.restore_last_token()
2091 break
2092 elif string == ',':
0a31a350
JMF
2093 if not current_selector:
2094 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2095 selectors.append(current_selector)
2096 current_selector = None
2097 elif string == '/':
d96d604e
JMF
2098 if not current_selector:
2099 raise syntax_error('"/" must follow a format selector', start)
67134eab 2100 first_choice = current_selector
cf2ac6df 2101 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2102 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2103 elif string == '[':
2104 if not current_selector:
2105 current_selector = FormatSelector(SINGLE, 'best', [])
2106 format_filter = _parse_filter(tokens)
2107 current_selector.filters.append(format_filter)
0130afb7
JMF
2108 elif string == '(':
2109 if current_selector:
2110 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2111 group = _parse_format_selection(tokens, inside_group=True)
2112 current_selector = FormatSelector(GROUP, group, [])
67134eab 2113 elif string == '+':
d03cfdce 2114 if not current_selector:
2115 raise syntax_error('Unexpected "+"', start)
2116 selector_1 = current_selector
2117 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2118 if not selector_2:
2119 raise syntax_error('Expected a selector', start)
2120 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2121 else:
86e5f3ed 2122 raise syntax_error(f'Operator not recognized: "{string}"', start)
67134eab
JMF
2123 elif type == tokenize.ENDMARKER:
2124 break
2125 if current_selector:
2126 selectors.append(current_selector)
2127 return selectors
2128
f8d4ad9a 2129 def _merge(formats_pair):
2130 format_1, format_2 = formats_pair
2131
2132 formats_info = []
2133 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2134 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2135
2136 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2137 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2138 for (i, fmt_info) in enumerate(formats_info):
551f9388 2139 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2140 formats_info.pop(i)
2141 continue
2142 for aud_vid in ['audio', 'video']:
f8d4ad9a 2143 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2144 if get_no_more[aud_vid]:
2145 formats_info.pop(i)
f5510afe 2146 break
f8d4ad9a 2147 get_no_more[aud_vid] = True
2148
2149 if len(formats_info) == 1:
2150 return formats_info[0]
2151
2152 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2153 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2154
2155 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2156 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2157
fc61aff4
LL
2158 output_ext = get_compatible_ext(
2159 vcodecs=[f.get('vcodec') for f in video_fmts],
2160 acodecs=[f.get('acodec') for f in audio_fmts],
2161 vexts=[f['ext'] for f in video_fmts],
2162 aexts=[f['ext'] for f in audio_fmts],
2163 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2164 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
f8d4ad9a 2165
975a0d0d 2166 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2167
f8d4ad9a 2168 new_dict = {
2169 'requested_formats': formats_info,
975a0d0d 2170 'format': '+'.join(filtered('format')),
2171 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2172 'ext': output_ext,
975a0d0d 2173 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2174 'language': '+'.join(orderedSet(filtered('language'))) or None,
2175 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2176 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2177 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2178 }
2179
2180 if the_only_video:
2181 new_dict.update({
2182 'width': the_only_video.get('width'),
2183 'height': the_only_video.get('height'),
2184 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2185 'fps': the_only_video.get('fps'),
49a57e70 2186 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2187 'vcodec': the_only_video.get('vcodec'),
2188 'vbr': the_only_video.get('vbr'),
2189 'stretched_ratio': the_only_video.get('stretched_ratio'),
105bfd90 2190 'aspect_ratio': the_only_video.get('aspect_ratio'),
f8d4ad9a 2191 })
2192
2193 if the_only_audio:
2194 new_dict.update({
2195 'acodec': the_only_audio.get('acodec'),
2196 'abr': the_only_audio.get('abr'),
975a0d0d 2197 'asr': the_only_audio.get('asr'),
b8ed0f15 2198 'audio_channels': the_only_audio.get('audio_channels')
f8d4ad9a 2199 })
2200
2201 return new_dict
2202
e8e73840 2203 def _check_formats(formats):
981052c9 2204 if not check_formats:
2205 yield from formats
b5ac45b1 2206 return
9f1a1c36 2207 yield from self._check_formats(formats)
e8e73840 2208
67134eab 2209 def _build_selector_function(selector):
909d24dd 2210 if isinstance(selector, list): # ,
67134eab
JMF
2211 fs = [_build_selector_function(s) for s in selector]
2212
317f7ab6 2213 def selector_function(ctx):
67134eab 2214 for f in fs:
981052c9 2215 yield from f(ctx)
67134eab 2216 return selector_function
909d24dd 2217
2218 elif selector.type == GROUP: # ()
0130afb7 2219 selector_function = _build_selector_function(selector.selector)
909d24dd 2220
2221 elif selector.type == PICKFIRST: # /
67134eab
JMF
2222 fs = [_build_selector_function(s) for s in selector.selector]
2223
317f7ab6 2224 def selector_function(ctx):
67134eab 2225 for f in fs:
317f7ab6 2226 picked_formats = list(f(ctx))
67134eab
JMF
2227 if picked_formats:
2228 return picked_formats
2229 return []
67134eab 2230
981052c9 2231 elif selector.type == MERGE: # +
2232 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2233
2234 def selector_function(ctx):
adbc4ec4 2235 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2236 yield _merge(pair)
2237
909d24dd 2238 elif selector.type == SINGLE: # atom
598d185d 2239 format_spec = selector.selector or 'best'
909d24dd 2240
f8d4ad9a 2241 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2242 if format_spec == 'all':
2243 def selector_function(ctx):
9222c381 2244 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2245 elif format_spec == 'mergeall':
2246 def selector_function(ctx):
316f2650 2247 formats = list(_check_formats(
2248 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2249 if not formats:
2250 return
921b76ca 2251 merged_format = formats[-1]
2252 for f in formats[-2::-1]:
f8d4ad9a 2253 merged_format = _merge((merged_format, f))
2254 yield merged_format
909d24dd 2255
2256 else:
85e801a9 2257 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2258 mobj = re.match(
2259 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2260 format_spec)
2261 if mobj is not None:
2262 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2263 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2264 format_type = (mobj.group('type') or [None])[0]
2265 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2266 format_modified = mobj.group('mod') is not None
909d24dd 2267
2268 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2269 _filter_f = (
eff63539 2270 (lambda f: f.get('%scodec' % format_type) != 'none')
2271 if format_type and format_modified # bv*, ba*, wv*, wa*
2272 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2273 if format_type # bv, ba, wv, wa
2274 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2275 if not format_modified # b, w
8326b00a 2276 else lambda f: True) # b*, w*
2277 filter_f = lambda f: _filter_f(f) and (
2278 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2279 else:
48ee10ee 2280 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2281 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2282 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2283 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2284 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2285 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2286 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2287 else:
b5ae35ee 2288 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2289
2290 def selector_function(ctx):
2291 formats = list(ctx['formats'])
909d24dd 2292 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2293 if not matches:
2294 if format_fallback and ctx['incomplete_formats']:
2295 # for extractors with incomplete formats (audio only (soundcloud)
2296 # or video only (imgur)) best/worst will fallback to
2297 # best/worst {video,audio}-only format
2298 matches = formats
2299 elif seperate_fallback and not ctx['has_merged_format']:
2300 # for compatibility with youtube-dl when there is no pre-merged format
2301 matches = list(filter(seperate_fallback, formats))
981052c9 2302 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2303 try:
e8e73840 2304 yield matches[format_idx - 1]
4abea8ca 2305 except LazyList.IndexError:
981052c9 2306 return
083c9df9 2307
67134eab 2308 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2309
317f7ab6 2310 def final_selector(ctx):
adbc4ec4 2311 ctx_copy = dict(ctx)
67134eab 2312 for _filter in filters:
317f7ab6
S
2313 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2314 return selector_function(ctx_copy)
67134eab 2315 return final_selector
083c9df9 2316
0f06bcd7 2317 stream = io.BytesIO(format_spec.encode())
0130afb7 2318 try:
f9934b96 2319 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2320 except tokenize.TokenError:
2321 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2322
86e5f3ed 2323 class TokenIterator:
0130afb7
JMF
2324 def __init__(self, tokens):
2325 self.tokens = tokens
2326 self.counter = 0
2327
2328 def __iter__(self):
2329 return self
2330
2331 def __next__(self):
2332 if self.counter >= len(self.tokens):
2333 raise StopIteration()
2334 value = self.tokens[self.counter]
2335 self.counter += 1
2336 return value
2337
2338 next = __next__
2339
2340 def restore_last_token(self):
2341 self.counter -= 1
2342
2343 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2344 return _build_selector_function(parsed_selector)
a9c58ad9 2345
e5660ee6 2346 def _calc_headers(self, info_dict):
8b7539d2 2347 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6 2348
c487cf00 2349 cookies = self._calc_cookies(info_dict['url'])
e5660ee6
JMF
2350 if cookies:
2351 res['Cookie'] = cookies
2352
0016b84e
S
2353 if 'X-Forwarded-For' not in res:
2354 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2355 if x_forwarded_for_ip:
2356 res['X-Forwarded-For'] = x_forwarded_for_ip
2357
e5660ee6
JMF
2358 return res
2359
c487cf00 2360 def _calc_cookies(self, url):
2361 pr = sanitized_Request(url)
e5660ee6 2362 self.cookiejar.add_cookie_header(pr)
662435f7 2363 return pr.get_header('Cookie')
e5660ee6 2364
9f1a1c36 2365 def _sort_thumbnails(self, thumbnails):
2366 thumbnails.sort(key=lambda t: (
2367 t.get('preference') if t.get('preference') is not None else -1,
2368 t.get('width') if t.get('width') is not None else -1,
2369 t.get('height') if t.get('height') is not None else -1,
2370 t.get('id') if t.get('id') is not None else '',
2371 t.get('url')))
2372
b0249bca 2373 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2374 thumbnails = info_dict.get('thumbnails')
2375 if thumbnails is None:
2376 thumbnail = info_dict.get('thumbnail')
2377 if thumbnail:
2378 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2379 if not thumbnails:
2380 return
2381
2382 def check_thumbnails(thumbnails):
2383 for t in thumbnails:
2384 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2385 try:
2386 self.urlopen(HEADRequest(t['url']))
2387 except network_exceptions as err:
2388 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2389 continue
2390 yield t
2391
2392 self._sort_thumbnails(thumbnails)
2393 for i, t in enumerate(thumbnails):
2394 if t.get('id') is None:
2395 t['id'] = '%d' % i
2396 if t.get('width') and t.get('height'):
2397 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2398 t['url'] = sanitize_url(t['url'])
2399
2400 if self.params.get('check_formats') is True:
282f5709 2401 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2402 else:
2403 info_dict['thumbnails'] = thumbnails
bc516a3f 2404
94dc8604 2405 def _fill_common_fields(self, info_dict, final=True):
03f83004 2406 # TODO: move sanitization here
94dc8604 2407 if final:
d4736fdb 2408 title = info_dict.get('title', NO_DEFAULT)
2409 if title is NO_DEFAULT:
03f83004
LNO
2410 raise ExtractorError('Missing "title" field in extractor result',
2411 video_id=info_dict['id'], ie=info_dict['extractor'])
d4736fdb 2412 info_dict['fulltitle'] = title
2413 if not title:
2414 if title == '':
2415 self.write_debug('Extractor gave empty title. Creating a generic title')
2416 else:
2417 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2418 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2419
2420 if info_dict.get('duration') is not None:
2421 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2422
2423 for ts_key, date_key in (
2424 ('timestamp', 'upload_date'),
2425 ('release_timestamp', 'release_date'),
2426 ('modified_timestamp', 'modified_date'),
2427 ):
2428 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2429 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2430 # see http://bugs.python.org/issue1646728)
19a03940 2431 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2432 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2433 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2434
2435 live_keys = ('is_live', 'was_live')
2436 live_status = info_dict.get('live_status')
2437 if live_status is None:
2438 for key in live_keys:
2439 if info_dict.get(key) is False:
2440 continue
2441 if info_dict.get(key):
2442 live_status = key
2443 break
2444 if all(info_dict.get(key) is False for key in live_keys):
2445 live_status = 'not_live'
2446 if live_status:
2447 info_dict['live_status'] = live_status
2448 for key in live_keys:
2449 if info_dict.get(key) is None:
2450 info_dict[key] = (live_status == key)
a057779d 2451 if live_status == 'post_live':
2452 info_dict['was_live'] = True
03f83004
LNO
2453
2454 # Auto generate title fields corresponding to the *_number fields when missing
2455 # in order to always have clean titles. This is very common for TV series.
2456 for field in ('chapter', 'season', 'episode'):
94dc8604 2457 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
03f83004
LNO
2458 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2459
415f8d51 2460 def _raise_pending_errors(self, info):
2461 err = info.pop('__pending_error', None)
2462 if err:
2463 self.report_error(err, tb=False)
2464
784320c9 2465 def sort_formats(self, info_dict):
2466 formats = self._get_formats(info_dict)
2467 if not formats:
2468 return
2469 # Backward compatibility with InfoExtractor._sort_formats
2470 field_preference = formats[0].pop('__sort_fields', None)
2471 if field_preference:
2472 info_dict['_format_sort_fields'] = field_preference
2473
2474 formats.sort(key=FormatSorter(
2475 self, info_dict.get('_format_sort_fields', [])).calculate_preference)
2476
dd82ffea
JMF
2477 def process_video_result(self, info_dict, download=True):
2478 assert info_dict.get('_type', 'video') == 'video'
9c906919 2479 self._num_videos += 1
dd82ffea 2480
bec1fad2 2481 if 'id' not in info_dict:
fc08bdd6 2482 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2483 elif not info_dict.get('id'):
2484 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2485
c9969434
S
2486 def report_force_conversion(field, field_not, conversion):
2487 self.report_warning(
2488 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2489 % (field, field_not, conversion))
2490
2491 def sanitize_string_field(info, string_field):
2492 field = info.get(string_field)
14f25df2 2493 if field is None or isinstance(field, str):
c9969434
S
2494 return
2495 report_force_conversion(string_field, 'a string', 'string')
14f25df2 2496 info[string_field] = str(field)
c9969434
S
2497
2498 def sanitize_numeric_fields(info):
2499 for numeric_field in self._NUMERIC_FIELDS:
2500 field = info.get(numeric_field)
f9934b96 2501 if field is None or isinstance(field, (int, float)):
c9969434
S
2502 continue
2503 report_force_conversion(numeric_field, 'numeric', 'int')
2504 info[numeric_field] = int_or_none(field)
2505
2506 sanitize_string_field(info_dict, 'id')
2507 sanitize_numeric_fields(info_dict)
3975b4d2 2508 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2509 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2510 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2511 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2512
9eef7c4e 2513 chapters = info_dict.get('chapters') or []
a3976e07 2514 if chapters and chapters[0].get('start_time'):
2515 chapters.insert(0, {'start_time': 0})
2516
9eef7c4e 2517 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
a3976e07 2518 for idx, (prev, current, next_) in enumerate(zip(
2519 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
9eef7c4e 2520 if current.get('start_time') is None:
2521 current['start_time'] = prev.get('end_time')
2522 if not current.get('end_time'):
2523 current['end_time'] = next_.get('start_time')
a3976e07 2524 if not current.get('title'):
2525 current['title'] = f'<Untitled Chapter {idx}>'
9eef7c4e 2526
dd82ffea
JMF
2527 if 'playlist' not in info_dict:
2528 # It isn't part of a playlist
2529 info_dict['playlist'] = None
2530 info_dict['playlist_index'] = None
2531
bc516a3f 2532 self._sanitize_thumbnails(info_dict)
d5519808 2533
536a55da 2534 thumbnail = info_dict.get('thumbnail')
bc516a3f 2535 thumbnails = info_dict.get('thumbnails')
536a55da
S
2536 if thumbnail:
2537 info_dict['thumbnail'] = sanitize_url(thumbnail)
2538 elif thumbnails:
d5519808
PH
2539 info_dict['thumbnail'] = thumbnails[-1]['url']
2540
ae30b840 2541 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2542 info_dict['display_id'] = info_dict['id']
2543
03f83004 2544 self._fill_common_fields(info_dict)
33d2fc2f 2545
05108a49
S
2546 for cc_kind in ('subtitles', 'automatic_captions'):
2547 cc = info_dict.get(cc_kind)
2548 if cc:
2549 for _, subtitle in cc.items():
2550 for subtitle_format in subtitle:
2551 if subtitle_format.get('url'):
2552 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2553 if subtitle_format.get('ext') is None:
2554 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2555
2556 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2557 subtitles = info_dict.get('subtitles')
4bba3716 2558
360e1ca5 2559 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2560 info_dict['id'], subtitles, automatic_captions)
a504ced0 2561
784320c9 2562 self.sort_formats(info_dict)
aebb4f4b 2563 formats = self._get_formats(info_dict)
dd82ffea 2564
0a5a191a 2565 # or None ensures --clean-infojson removes it
2566 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2567 if not self.params.get('allow_unplayable_formats'):
2568 formats = [f for f in formats if not f.get('has_drm')]
17ffed18 2569
2570 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2571 self.report_warning(
2572 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2573 'only images are available for download. Use --list-formats to see them'.capitalize())
88acdbc2 2574
319b6059 2575 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2576 if not get_from_start:
2577 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2578 if info_dict.get('is_live') and formats:
adbc4ec4 2579 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2580 if get_from_start and not formats:
a44ca5a4 2581 self.raise_no_formats(info_dict, msg=(
2582 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2583 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2584
73af5cc8
S
2585 def is_wellformed(f):
2586 url = f.get('url')
a5ac0c47 2587 if not url:
73af5cc8
S
2588 self.report_warning(
2589 '"url" field is missing or empty - skipping format, '
2590 'there is an error in extractor')
a5ac0c47
S
2591 return False
2592 if isinstance(url, bytes):
2593 sanitize_string_field(f, 'url')
2594 return True
73af5cc8
S
2595
2596 # Filter out malformed formats for better extraction robustness
1ac7f461 2597 formats = list(filter(is_wellformed, formats or []))
2598
2599 if not formats:
2600 self.raise_no_formats(info_dict)
73af5cc8 2601
181c7053
S
2602 formats_dict = {}
2603
dd82ffea 2604 # We check that all the formats have the format and format_id fields
db95dc13 2605 for i, format in enumerate(formats):
c9969434
S
2606 sanitize_string_field(format, 'format_id')
2607 sanitize_numeric_fields(format)
dcf77cf1 2608 format['url'] = sanitize_url(format['url'])
e74e3b63 2609 if not format.get('format_id'):
14f25df2 2610 format['format_id'] = str(i)
e2effb08
S
2611 else:
2612 # Sanitize format_id from characters used in format selector expression
ec85ded8 2613 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2614 format_id = format['format_id']
2615 if format_id not in formats_dict:
2616 formats_dict[format_id] = []
2617 formats_dict[format_id].append(format)
2618
2619 # Make sure all formats have unique format_id
03b4de72 2620 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2621 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2622 ambigious_id = len(ambiguous_formats) > 1
2623 for i, format in enumerate(ambiguous_formats):
2624 if ambigious_id:
181c7053 2625 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2626 if format.get('ext') is None:
2627 format['ext'] = determine_ext(format['url']).lower()
2628 # Ensure there is no conflict between id and ext in format selection
2629 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2630 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2631 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2632
2633 for i, format in enumerate(formats):
8c51aa65 2634 if format.get('format') is None:
6febd1c1 2635 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2636 id=format['format_id'],
2637 res=self.format_resolution(format),
b868936c 2638 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2639 )
6f0be937 2640 if format.get('protocol') is None:
b5559424 2641 format['protocol'] = determine_protocol(format)
239df021 2642 if format.get('resolution') is None:
2643 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2644 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2645 format['dynamic_range'] = 'SDR'
105bfd90 2646 if format.get('aspect_ratio') is None:
2647 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
f2fe69c7 2648 if (info_dict.get('duration') and format.get('tbr')
2649 and not format.get('filesize') and not format.get('filesize_approx')):
56ba69e4 2650 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
f2fe69c7 2651
e5660ee6
JMF
2652 # Add HTTP headers, so that external programs can use them from the
2653 # json output
2654 full_format_info = info_dict.copy()
2655 full_format_info.update(format)
2656 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2657 # Remove private housekeeping stuff
2658 if '__x_forwarded_for_ip' in info_dict:
2659 del info_dict['__x_forwarded_for_ip']
dd82ffea 2660
9f1a1c36 2661 if self.params.get('check_formats') is True:
282f5709 2662 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2663
88acdbc2 2664 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2665 # only set the 'formats' fields if the original info_dict list them
2666 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2667 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2668 # which can't be exported to json
b3d9ef88 2669 info_dict['formats'] = formats
4ec82a72 2670
2671 info_dict, _ = self.pre_process(info_dict)
2672
6db9c4d5 2673 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2674 return info_dict
2675
2676 self.post_extract(info_dict)
2677 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2678
093a1710 2679 # The pre-processors may have modified the formats
aebb4f4b 2680 formats = self._get_formats(info_dict)
093a1710 2681
e4221b70 2682 list_only = self.params.get('simulate') == 'list_only'
fa9f30b8 2683 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2684 if self.params.get('list_thumbnails'):
2685 self.list_thumbnails(info_dict)
b7b04c78 2686 if self.params.get('listsubtitles'):
2687 if 'automatic_captions' in info_dict:
2688 self.list_subtitles(
2689 info_dict['id'], automatic_captions, 'automatic captions')
2690 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2691 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2692 self.list_formats(info_dict)
169dbde9 2693 if list_only:
b7b04c78 2694 # Without this printing, -F --print-json will not work
169dbde9 2695 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
c487cf00 2696 return info_dict
bfaae0a7 2697
187986a8 2698 format_selector = self.format_selector
2699 if format_selector is None:
0017d9ad 2700 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2701 self.write_debug('Default format spec: %s' % req_format)
187986a8 2702 format_selector = self.build_format_selector(req_format)
317f7ab6 2703
fa9f30b8 2704 while True:
2705 if interactive_format_selection:
2706 req_format = input(
2707 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2708 try:
2709 format_selector = self.build_format_selector(req_format)
2710 except SyntaxError as err:
2711 self.report_error(err, tb=False, is_error=False)
2712 continue
2713
85e801a9 2714 formats_to_download = list(format_selector({
fa9f30b8 2715 'formats': formats,
85e801a9 2716 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2717 'incomplete_formats': (
2718 # All formats are video-only or
2719 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2720 # all formats are audio-only
2721 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2722 }))
fa9f30b8 2723 if interactive_format_selection and not formats_to_download:
2724 self.report_error('Requested format is not available', tb=False, is_error=False)
2725 continue
2726 break
317f7ab6 2727
dd82ffea 2728 if not formats_to_download:
b7da73eb 2729 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2730 raise ExtractorError(
2731 'Requested format is not available. Use --list-formats for a list of available formats',
2732 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2733 self.report_warning('Requested format is not available')
2734 # Process what we can, even without any available formats.
2735 formats_to_download = [{}]
a13e6848 2736
0500ee3d 2737 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
5ec1b6b7 2738 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2739 if download:
0500ee3d 2740 if best_format and requested_ranges:
5ec1b6b7 2741 def to_screen(*msg):
2742 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2743
2744 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2745 (f['format_id'] for f in formats_to_download))
0500ee3d 2746 if requested_ranges != ({}, ):
5ec1b6b7 2747 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
fc2ba496 2748 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
a13e6848 2749 max_downloads_reached = False
5ec1b6b7 2750
0500ee3d 2751 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
5ec1b6b7 2752 new_info = self._copy_infodict(info_dict)
b7da73eb 2753 new_info.update(fmt)
3975b4d2 2754 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
fc2ba496 2755 end_time = offset + min(chapter.get('end_time', duration), duration)
3975b4d2 2756 if chapter or offset:
5ec1b6b7 2757 new_info.update({
3975b4d2 2758 'section_start': offset + chapter.get('start_time', 0),
2576d53a 2759 # duration may not be accurate. So allow deviations <1sec
2760 'section_end': end_time if end_time <= offset + duration + 1 else None,
5ec1b6b7 2761 'section_title': chapter.get('title'),
2762 'section_number': chapter.get('index'),
2763 })
2764 downloaded_formats.append(new_info)
a13e6848 2765 try:
2766 self.process_info(new_info)
2767 except MaxDownloadsReached:
2768 max_downloads_reached = True
415f8d51 2769 self._raise_pending_errors(new_info)
f46e2f9d 2770 # Remove copied info
2771 for key, val in tuple(new_info.items()):
2772 if info_dict.get(key) == val:
2773 new_info.pop(key)
a13e6848 2774 if max_downloads_reached:
2775 break
ebed8b37 2776
5ec1b6b7 2777 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 2778 assert write_archive.issubset({True, False, 'ignore'})
2779 if True in write_archive and False not in write_archive:
2780 self.record_download_archive(info_dict)
be72c624 2781
5ec1b6b7 2782 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 2783 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2784 if max_downloads_reached:
2785 raise MaxDownloadsReached()
ebed8b37 2786
49a57e70 2787 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2788 info_dict.update(best_format)
dd82ffea
JMF
2789 return info_dict
2790
98c70d6f 2791 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2792 """Select the requested subtitles and their format"""
d8a58ddc 2793 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2794 if normal_subtitles and self.params.get('writesubtitles'):
2795 available_subs.update(normal_subtitles)
d8a58ddc 2796 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2797 if automatic_captions and self.params.get('writeautomaticsub'):
2798 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2799 if lang not in available_subs:
2800 available_subs[lang] = cap_info
2801
d2c8aadf 2802 if not available_subs or (
2803 not self.params.get('writesubtitles')
2804 and not self.params.get('writeautomaticsub')):
4d171848 2805 return None
a504ced0 2806
d8a58ddc 2807 all_sub_langs = tuple(available_subs.keys())
a504ced0 2808 if self.params.get('allsubtitles', False):
c32b0aab 2809 requested_langs = all_sub_langs
2810 elif self.params.get('subtitleslangs', False):
5314b521 2811 try:
2812 requested_langs = orderedSet_from_options(
2813 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2814 except re.error as e:
2815 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
d8a58ddc 2816 elif normal_sub_langs:
2817 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
a504ced0 2818 else:
d8a58ddc 2819 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
ad3dc496 2820 if requested_langs:
d2c8aadf 2821 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
a504ced0
JMF
2822
2823 formats_query = self.params.get('subtitlesformat', 'best')
2824 formats_preference = formats_query.split('/') if formats_query else []
2825 subs = {}
2826 for lang in requested_langs:
2827 formats = available_subs.get(lang)
2828 if formats is None:
86e5f3ed 2829 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2830 continue
a504ced0
JMF
2831 for ext in formats_preference:
2832 if ext == 'best':
2833 f = formats[-1]
2834 break
2835 matches = list(filter(lambda f: f['ext'] == ext, formats))
2836 if matches:
2837 f = matches[-1]
2838 break
2839 else:
2840 f = formats[-1]
2841 self.report_warning(
2842 'No subtitle format found matching "%s" for language %s, '
2843 'using %s' % (formats_query, lang, f['ext']))
2844 subs[lang] = f
2845 return subs
2846
bb66c247 2847 def _forceprint(self, key, info_dict):
2848 if info_dict is None:
2849 return
2850 info_copy = info_dict.copy()
2851 info_copy['formats_table'] = self.render_formats_table(info_dict)
2852 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2853 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2854 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2855
2856 def format_tmpl(tmpl):
48c8424b 2857 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
07a1250e 2858 if not mobj:
2859 return tmpl
48c8424b 2860
2861 fmt = '%({})s'
2862 if tmpl.startswith('{'):
2863 tmpl = f'.{tmpl}'
2864 if tmpl.endswith('='):
2865 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
2866 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
8130779d 2867
bb66c247 2868 for tmpl in self.params['forceprint'].get(key, []):
2869 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2870
2871 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2872 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2873 tmpl = format_tmpl(tmpl)
2874 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2875 if self._ensure_dir_exists(filename):
86e5f3ed 2876 with open(filename, 'a', encoding='utf-8') as f:
8d93e69d 2877 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2878
d06daf23 2879 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2880 def print_mandatory(field, actual_field=None):
2881 if actual_field is None:
2882 actual_field = field
d06daf23 2883 if (self.params.get('force%s' % field, False)
53c18592 2884 and (not incomplete or info_dict.get(actual_field) is not None)):
2885 self.to_stdout(info_dict[actual_field])
d06daf23
S
2886
2887 def print_optional(field):
2888 if (self.params.get('force%s' % field, False)
2889 and info_dict.get(field) is not None):
2890 self.to_stdout(info_dict[field])
2891
53c18592 2892 info_dict = info_dict.copy()
2893 if filename is not None:
2894 info_dict['filename'] = filename
2895 if info_dict.get('requested_formats') is not None:
2896 # For RTMP URLs, also include the playpath
2897 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2898 elif info_dict.get('url'):
53c18592 2899 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2900
bb66c247 2901 if (self.params.get('forcejson')
2902 or self.params['forceprint'].get('video')
2903 or self.params['print_to_file'].get('video')):
2b8a2973 2904 self.post_extract(info_dict)
bb66c247 2905 self._forceprint('video', info_dict)
53c18592 2906
d06daf23
S
2907 print_mandatory('title')
2908 print_mandatory('id')
53c18592 2909 print_mandatory('url', 'urls')
d06daf23
S
2910 print_optional('thumbnail')
2911 print_optional('description')
53c18592 2912 print_optional('filename')
b868936c 2913 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2914 self.to_stdout(formatSeconds(info_dict['duration']))
2915 print_mandatory('format')
53c18592 2916
2b8a2973 2917 if self.params.get('forcejson'):
6e84b215 2918 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2919
e8e73840 2920 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2921 if not info.get('url'):
1151c407 2922 self.raise_no_formats(info, True)
e8e73840 2923
2924 if test:
2925 verbose = self.params.get('verbose')
2926 params = {
2927 'test': True,
a169858f 2928 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2929 'verbose': verbose,
2930 'noprogress': not verbose,
2931 'nopart': True,
2932 'skip_unavailable_fragments': False,
2933 'keep_fragments': False,
2934 'overwrites': True,
2935 '_no_ytdl_file': True,
2936 }
2937 else:
2938 params = self.params
96fccc10 2939 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2940 if not test:
2941 for ph in self._progress_hooks:
2942 fd.add_progress_hook(ph)
42676437
M
2943 urls = '", "'.join(
2944 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2945 for f in info.get('requested_formats', []) or [info])
3a408f9d 2946 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 2947
adbc4ec4
THD
2948 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2949 # But it may contain objects that are not deep-copyable
2950 new_info = self._copy_infodict(info)
e8e73840 2951 if new_info.get('http_headers') is None:
2952 new_info['http_headers'] = self._calc_headers(new_info)
2953 return fd.download(name, new_info, subtitle)
2954
e04938ab 2955 def existing_file(self, filepaths, *, default_overwrite=True):
2956 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2957 if existing_files and not self.params.get('overwrites', default_overwrite):
2958 return existing_files[0]
2959
2960 for file in existing_files:
2961 self.report_file_delete(file)
2962 os.remove(file)
2963 return None
2964
8222d8de 2965 def process_info(self, info_dict):
09b49e1f 2966 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2967
2968 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2969 original_infodict = info_dict
fd288278 2970
4513a41a 2971 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2972 info_dict['format'] = info_dict['ext']
2973
c77495e3 2974 if self._match_entry(info_dict) is not None:
9e907ebd 2975 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2976 return
2977
09b49e1f 2978 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2979 self.post_extract(info_dict)
0c14d66a 2980 self._num_downloads += 1
8222d8de 2981
dcf64d43 2982 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2983 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2984 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2985 files_to_move = {}
8222d8de
JMF
2986
2987 # Forced printings
4513a41a 2988 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2989
ca6d59d2 2990 def check_max_downloads():
2991 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2992 raise MaxDownloadsReached()
2993
b7b04c78 2994 if self.params.get('simulate'):
9e907ebd 2995 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 2996 check_max_downloads()
8222d8de
JMF
2997 return
2998
de6000d9 2999 if full_filename is None:
8222d8de 3000 return
e92caff5 3001 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 3002 return
e92caff5 3003 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
3004 return
3005
80c03fa9 3006 if self._write_description('video', info_dict,
3007 self.prepare_filename(info_dict, 'description')) is None:
3008 return
3009
3010 sub_files = self._write_subtitles(info_dict, temp_filename)
3011 if sub_files is None:
3012 return
3013 files_to_move.update(dict(sub_files))
3014
3015 thumb_files = self._write_thumbnails(
3016 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3017 if thumb_files is None:
3018 return
3019 files_to_move.update(dict(thumb_files))
8222d8de 3020
80c03fa9 3021 infofn = self.prepare_filename(info_dict, 'infojson')
3022 _infojson_written = self._write_info_json('video', info_dict, infofn)
3023 if _infojson_written:
dac5df5a 3024 info_dict['infojson_filename'] = infofn
e75bb0d6 3025 # For backward compatibility, even though it was a private field
80c03fa9 3026 info_dict['__infojson_filename'] = infofn
3027 elif _infojson_written is None:
3028 return
3029
3030 # Note: Annotations are deprecated
3031 annofn = None
1fb07d10 3032 if self.params.get('writeannotations', False):
de6000d9 3033 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 3034 if annofn:
e92caff5 3035 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 3036 return
0c3d0f51 3037 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 3038 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
3039 elif not info_dict.get('annotations'):
3040 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
3041 else:
3042 try:
6febd1c1 3043 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 3044 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
3045 annofile.write(info_dict['annotations'])
3046 except (KeyError, TypeError):
6febd1c1 3047 self.report_warning('There are no annotations to write.')
86e5f3ed 3048 except OSError:
6febd1c1 3049 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 3050 return
1fb07d10 3051
732044af 3052 # Write internet shortcut files
08438d2c 3053 def _write_link_file(link_type):
60f3e995 3054 url = try_get(info_dict['webpage_url'], iri_to_uri)
3055 if not url:
3056 self.report_warning(
3057 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3058 return True
08438d2c 3059 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
3060 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3061 return False
10e3742e 3062 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 3063 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3064 return True
3065 try:
3066 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 3067 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3068 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 3069 template_vars = {'url': url}
08438d2c 3070 if link_type == 'desktop':
3071 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3072 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 3073 except OSError:
08438d2c 3074 self.report_error(f'Cannot write internet shortcut {linkfn}')
3075 return False
732044af 3076 return True
3077
08438d2c 3078 write_links = {
3079 'url': self.params.get('writeurllink'),
3080 'webloc': self.params.get('writewebloclink'),
3081 'desktop': self.params.get('writedesktoplink'),
3082 }
3083 if self.params.get('writelink'):
3084 link_type = ('webloc' if sys.platform == 'darwin'
3085 else 'desktop' if sys.platform.startswith('linux')
3086 else 'url')
3087 write_links[link_type] = True
3088
3089 if any(should_write and not _write_link_file(link_type)
3090 for link_type, should_write in write_links.items()):
3091 return
732044af 3092
f46e2f9d 3093 def replace_info_dict(new_info):
3094 nonlocal info_dict
3095 if new_info == info_dict:
3096 return
3097 info_dict.clear()
3098 info_dict.update(new_info)
3099
415f8d51 3100 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3101 replace_info_dict(new_info)
56d868db 3102
a13e6848 3103 if self.params.get('skip_download'):
56d868db 3104 info_dict['filepath'] = temp_filename
3105 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3106 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3107 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3108 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3109 else:
3110 # Download
b868936c 3111 info_dict.setdefault('__postprocessors', [])
4340deca 3112 try:
0202b52a 3113
e04938ab 3114 def existing_video_file(*filepaths):
6b591b29 3115 ext = info_dict.get('ext')
e04938ab 3116 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3117 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3118 default_overwrite=False)
3119 if file:
3120 info_dict['ext'] = os.path.splitext(file)[1][1:]
3121 return file
0202b52a 3122
7b2c3f47 3123 fd, success = None, True
fccf90e7 3124 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3125 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
71df9b7f 3126 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
56ba69e4 3127 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3128 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3129 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3130 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3131 return
5ec1b6b7 3132
4340deca 3133 if info_dict.get('requested_formats') is not None:
81cd954a 3134 requested_formats = info_dict['requested_formats']
0202b52a 3135 old_ext = info_dict['ext']
4e3b637d 3136 if self.params.get('merge_output_format') is None:
4e3b637d 3137 if (info_dict['ext'] == 'webm'
3138 and info_dict.get('thumbnails')
3139 # check with type instead of pp_key, __name__, or isinstance
3140 # since we dont want any custom PPs to trigger this
c487cf00 3141 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3142 info_dict['ext'] = 'mkv'
3143 self.report_warning(
3144 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3145 new_ext = info_dict['ext']
0202b52a 3146
124bc071 3147 def correct_ext(filename, ext=new_ext):
96fccc10 3148 if filename == '-':
3149 return filename
0202b52a 3150 filename_real_ext = os.path.splitext(filename)[1][1:]
3151 filename_wo_ext = (
3152 os.path.splitext(filename)[0]
124bc071 3153 if filename_real_ext in (old_ext, new_ext)
0202b52a 3154 else filename)
86e5f3ed 3155 return f'{filename_wo_ext}.{ext}'
0202b52a 3156
38c6902b 3157 # Ensure filename always has a correct extension for successful merge
0202b52a 3158 full_filename = correct_ext(full_filename)
3159 temp_filename = correct_ext(temp_filename)
e04938ab 3160 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3161 info_dict['__real_download'] = False
18e674b4 3162
7b2c3f47 3163 merger = FFmpegMergerPP(self)
adbc4ec4 3164 downloaded = []
dbf5416a 3165 if dl_filename is not None:
6c7274ec 3166 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3167 elif fd:
3168 for f in requested_formats if fd != FFmpegFD else []:
3169 f['filepath'] = fname = prepend_extension(
3170 correct_ext(temp_filename, info_dict['ext']),
3171 'f%s' % f['format_id'], info_dict['ext'])
3172 downloaded.append(fname)
dbf5416a 3173 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3174 success, real_download = self.dl(temp_filename, info_dict)
3175 info_dict['__real_download'] = real_download
18e674b4 3176 else:
18e674b4 3177 if self.params.get('allow_unplayable_formats'):
3178 self.report_warning(
3179 'You have requested merging of multiple formats '
3180 'while also allowing unplayable formats to be downloaded. '
3181 'The formats won\'t be merged to prevent data corruption.')
3182 elif not merger.available:
e8969bda 3183 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3184 if not self.params.get('ignoreerrors'):
3185 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3186 return
3187 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3188
96fccc10 3189 if temp_filename == '-':
adbc4ec4 3190 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3191 else 'but the formats are incompatible for simultaneous download' if merger.available
3192 else 'but ffmpeg is not installed')
3193 self.report_warning(
3194 f'You have requested downloading multiple formats to stdout {reason}. '
3195 'The formats will be streamed one after the other')
3196 fname = temp_filename
dbf5416a 3197 for f in requested_formats:
3198 new_info = dict(info_dict)
3199 del new_info['requested_formats']
3200 new_info.update(f)
96fccc10 3201 if temp_filename != '-':
124bc071 3202 fname = prepend_extension(
3203 correct_ext(temp_filename, new_info['ext']),
3204 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3205 if not self._ensure_dir_exists(fname):
3206 return
a21e0ab1 3207 f['filepath'] = fname
96fccc10 3208 downloaded.append(fname)
dbf5416a 3209 partial_success, real_download = self.dl(fname, new_info)
3210 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3211 success = success and partial_success
adbc4ec4
THD
3212
3213 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3214 info_dict['__postprocessors'].append(merger)
3215 info_dict['__files_to_merge'] = downloaded
3216 # Even if there were no downloads, it is being merged only now
3217 info_dict['__real_download'] = True
3218 else:
3219 for file in downloaded:
3220 files_to_move[file] = None
4340deca
P
3221 else:
3222 # Just a single file
e04938ab 3223 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3224 if dl_filename is None or dl_filename == temp_filename:
3225 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3226 # So we should try to resume the download
e8e73840 3227 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3228 info_dict['__real_download'] = real_download
6c7274ec 3229 else:
3230 self.report_file_already_downloaded(dl_filename)
0202b52a 3231
0202b52a 3232 dl_filename = dl_filename or temp_filename
c571435f 3233 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3234
3158150c 3235 except network_exceptions as err:
7960b056 3236 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3237 return
86e5f3ed 3238 except OSError as err:
4340deca
P
3239 raise UnavailableVideoError(err)
3240 except (ContentTooShortError, ) as err:
86e5f3ed 3241 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3242 return
8222d8de 3243
415f8d51 3244 self._raise_pending_errors(info_dict)
de6000d9 3245 if success and full_filename != '-':
f17f8651 3246
fd7cfb64 3247 def fixup():
3248 do_fixup = True
3249 fixup_policy = self.params.get('fixup')
3250 vid = info_dict['id']
3251
3252 if fixup_policy in ('ignore', 'never'):
3253 return
3254 elif fixup_policy == 'warn':
3fe75fdc 3255 do_fixup = 'warn'
f89b3e2d 3256 elif fixup_policy != 'force':
3257 assert fixup_policy in ('detect_or_warn', None)
3258 if not info_dict.get('__real_download'):
3259 do_fixup = False
fd7cfb64 3260
3261 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3262 if not (do_fixup and cndn):
fd7cfb64 3263 return
3fe75fdc 3264 elif do_fixup == 'warn':
fd7cfb64 3265 self.report_warning(f'{vid}: {msg}')
3266 return
3267 pp = cls(self)
3268 if pp.available:
3269 info_dict['__postprocessors'].append(pp)
3270 else:
3271 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3272
3273 stretched_ratio = info_dict.get('stretched_ratio')
ca9def71
LNO
3274 ffmpeg_fixup(stretched_ratio not in (1, None),
3275 f'Non-uniform pixel ratio {stretched_ratio}',
3276 FFmpegFixupStretchedPP)
fd7cfb64 3277
993191c0 3278 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3279 downloader = downloader.FD_NAME if downloader else None
adbc4ec4 3280
ca9def71
LNO
3281 ext = info_dict.get('ext')
3282 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3283 isinstance(pp, FFmpegVideoConvertorPP)
3284 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3285 ) for pp in self._pps['post_process'])
3286
3287 if not postprocessed_by_ffmpeg:
3288 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
f2df4071 3289 'writing DASH m4a. Only some players support this container',
3290 FFmpegFixupM4aPP)
24146491 3291 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3292 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3293 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3294 FFmpegFixupM3u8PP)
3295 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3296 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3297
24146491 3298 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3299 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3300
3301 fixup()
8222d8de 3302 try:
f46e2f9d 3303 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3304 except PostProcessingError as err:
3305 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3306 return
ab8e5e51
AM
3307 try:
3308 for ph in self._post_hooks:
23c1a667 3309 ph(info_dict['filepath'])
ab8e5e51
AM
3310 except Exception as err:
3311 self.report_error('post hooks: %s' % str(err))
3312 return
9e907ebd 3313 info_dict['__write_download_archive'] = True
2d30509f 3314
c487cf00 3315 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3316 if self.params.get('force_write_download_archive'):
9e907ebd 3317 info_dict['__write_download_archive'] = True
ca6d59d2 3318 check_max_downloads()
8222d8de 3319
aa9369a2 3320 def __download_wrapper(self, func):
3321 @functools.wraps(func)
3322 def wrapper(*args, **kwargs):
3323 try:
3324 res = func(*args, **kwargs)
3325 except UnavailableVideoError as e:
3326 self.report_error(e)
b222c271 3327 except DownloadCancelled as e:
3328 self.to_screen(f'[info] {e}')
3329 if not self.params.get('break_per_url'):
3330 raise
fd404bec 3331 self._num_downloads = 0
aa9369a2 3332 else:
3333 if self.params.get('dump_single_json', False):
3334 self.post_extract(res)
3335 self.to_stdout(json.dumps(self.sanitize_info(res)))
3336 return wrapper
3337
8222d8de
JMF
3338 def download(self, url_list):
3339 """Download a given list of URLs."""
aa9369a2 3340 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3341 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3342 if (len(url_list) > 1
3343 and outtmpl != '-'
3344 and '%' not in outtmpl
3345 and self.params.get('max_downloads') != 1):
acd69589 3346 raise SameFileError(outtmpl)
8222d8de
JMF
3347
3348 for url in url_list:
aa9369a2 3349 self.__download_wrapper(self.extract_info)(
3350 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3351
3352 return self._download_retcode
3353
1dcc4c0c 3354 def download_with_info_file(self, info_filename):
31bd3925
JMF
3355 with contextlib.closing(fileinput.FileInput(
3356 [info_filename], mode='r',
3357 openhook=fileinput.hook_encoded('utf-8'))) as f:
3358 # FileInput doesn't have a read method, we can't call json.load
8012d892 3359 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3360 try:
aa9369a2 3361 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3362 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3363 if not isinstance(e, EntryNotInPlaylist):
3364 self.to_stderr('\r')
d4943898
JMF
3365 webpage_url = info.get('webpage_url')
3366 if webpage_url is not None:
aa9369a2 3367 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3368 return self.download([webpage_url])
3369 else:
3370 raise
3371 return self._download_retcode
1dcc4c0c 3372
cb202fd2 3373 @staticmethod
8012d892 3374 def sanitize_info(info_dict, remove_private_keys=False):
3375 ''' Sanitize the infodict for converting to json '''
3ad56b42 3376 if info_dict is None:
3377 return info_dict
6e84b215 3378 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3379 info_dict.setdefault('_type', 'video')
b5e7a2e6 3380 info_dict.setdefault('_version', {
3381 'version': __version__,
3382 'current_git_head': current_git_head(),
3383 'release_git_head': RELEASE_GIT_HEAD,
3384 'repository': REPOSITORY,
3385 })
09b49e1f 3386
8012d892 3387 if remove_private_keys:
0a5a191a 3388 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3389 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
0a5a191a 3390 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3391 }
ae8f99e6 3392 else:
09b49e1f 3393 reject = lambda k, v: False
adbc4ec4
THD
3394
3395 def filter_fn(obj):
3396 if isinstance(obj, dict):
3397 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3398 elif isinstance(obj, (list, tuple, set, LazyList)):
3399 return list(map(filter_fn, obj))
3400 elif obj is None or isinstance(obj, (str, int, float, bool)):
3401 return obj
3402 else:
3403 return repr(obj)
3404
5226731e 3405 return filter_fn(info_dict)
cb202fd2 3406
8012d892 3407 @staticmethod
3408 def filter_requested_info(info_dict, actually_filter=True):
3409 ''' Alias of sanitize_info for backward compatibility '''
3410 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3411
43d7f5a5 3412 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3413 for filename in set(filter(None, files_to_delete)):
3414 if msg:
3415 self.to_screen(msg % filename)
3416 try:
3417 os.remove(filename)
3418 except OSError:
3419 self.report_warning(f'Unable to delete file {filename}')
3420 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3421 del info['__files_to_move'][filename]
3422
ed5835b4 3423 @staticmethod
3424 def post_extract(info_dict):
3425 def actual_post_extract(info_dict):
3426 if info_dict.get('_type') in ('playlist', 'multi_video'):
3427 for video_dict in info_dict.get('entries', {}):
3428 actual_post_extract(video_dict or {})
3429 return
3430
09b49e1f 3431 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3432 info_dict.update(post_extractor())
ed5835b4 3433
3434 actual_post_extract(info_dict or {})
3435
dcf64d43 3436 def run_pp(self, pp, infodict):
5bfa4862 3437 files_to_delete = []
dcf64d43 3438 if '__files_to_move' not in infodict:
3439 infodict['__files_to_move'] = {}
b1940459 3440 try:
3441 files_to_delete, infodict = pp.run(infodict)
3442 except PostProcessingError as e:
3443 # Must be True and not 'only_download'
3444 if self.params.get('ignoreerrors') is True:
3445 self.report_error(e)
3446 return infodict
3447 raise
3448
5bfa4862 3449 if not files_to_delete:
dcf64d43 3450 return infodict
5bfa4862 3451 if self.params.get('keepvideo', False):
3452 for f in files_to_delete:
dcf64d43 3453 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3454 else:
43d7f5a5 3455 self._delete_downloaded_files(
3456 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3457 return infodict
5bfa4862 3458
ed5835b4 3459 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3460 self._forceprint(key, info)
ed5835b4 3461 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3462 info = self.run_pp(pp, info)
ed5835b4 3463 return info
277d6ff5 3464
56d868db 3465 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3466 info = dict(ie_info)
56d868db 3467 info['__files_to_move'] = files_to_move or {}
415f8d51 3468 try:
3469 info = self.run_all_pps(key, info)
3470 except PostProcessingError as err:
3471 msg = f'Preprocessing: {err}'
3472 info.setdefault('__pending_error', msg)
3473 self.report_error(msg, is_error=False)
56d868db 3474 return info, info.pop('__files_to_move', None)
5bfa4862 3475
f46e2f9d 3476 def post_process(self, filename, info, files_to_move=None):
8222d8de 3477 """Run all the postprocessors on the given file."""
8222d8de 3478 info['filepath'] = filename
dcf64d43 3479 info['__files_to_move'] = files_to_move or {}
ed5835b4 3480 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3481 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3482 del info['__files_to_move']
ed5835b4 3483 return self.run_all_pps('after_move', info)
c1c9a79c 3484
5db07df6 3485 def _make_archive_id(self, info_dict):
e9fef7ee
S
3486 video_id = info_dict.get('id')
3487 if not video_id:
3488 return
5db07df6
PH
3489 # Future-proof against any change in case
3490 # and backwards compatibility with prior versions
e9fef7ee 3491 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3492 if extractor is None:
1211bb6d
S
3493 url = str_or_none(info_dict.get('url'))
3494 if not url:
3495 return
e9fef7ee 3496 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3497 for ie_key, ie in self._ies.items():
1211bb6d 3498 if ie.suitable(url):
8b7491c8 3499 extractor = ie_key
e9fef7ee
S
3500 break
3501 else:
3502 return
0647d925 3503 return make_archive_id(extractor, video_id)
5db07df6
PH
3504
3505 def in_download_archive(self, info_dict):
ae103564 3506 if not self.archive:
5db07df6
PH
3507 return False
3508
1e8fe57e 3509 vid_ids = [self._make_archive_id(info_dict)]
c200096c 3510 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
1e8fe57e 3511 return any(id_ in self.archive for id_ in vid_ids)
c1c9a79c
PH
3512
3513 def record_download_archive(self, info_dict):
3514 fn = self.params.get('download_archive')
3515 if fn is None:
3516 return
5db07df6
PH
3517 vid_id = self._make_archive_id(info_dict)
3518 assert vid_id
ae103564 3519
a13e6848 3520 self.write_debug(f'Adding to archive: {vid_id}')
9c935fbc 3521 if is_path_like(fn):
ae103564 3522 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3523 archive_file.write(vid_id + '\n')
a45e8619 3524 self.archive.add(vid_id)
dd82ffea 3525
8c51aa65 3526 @staticmethod
8abeeb94 3527 def format_resolution(format, default='unknown'):
9359f3d4 3528 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3529 return 'audio only'
f49d89ee
PH
3530 if format.get('resolution') is not None:
3531 return format['resolution']
35615307 3532 if format.get('width') and format.get('height'):
ff51ed58 3533 return '%dx%d' % (format['width'], format['height'])
35615307 3534 elif format.get('height'):
ff51ed58 3535 return '%sp' % format['height']
35615307 3536 elif format.get('width'):
ff51ed58 3537 return '%dx?' % format['width']
3538 return default
8c51aa65 3539
8130779d 3540 def _list_format_headers(self, *headers):
3541 if self.params.get('listformats_table', True) is not False:
591bb9d3 3542 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3543 return headers
3544
c57f7757
PH
3545 def _format_note(self, fdict):
3546 res = ''
3547 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3548 res += '(unsupported)'
32f90364
PH
3549 if fdict.get('language'):
3550 if res:
3551 res += ' '
f304da8a 3552 res += '[%s]' % fdict['language']
c57f7757 3553 if fdict.get('format_note') is not None:
f304da8a 3554 if res:
3555 res += ' '
3556 res += fdict['format_note']
c57f7757 3557 if fdict.get('tbr') is not None:
f304da8a 3558 if res:
3559 res += ', '
3560 res += '%4dk' % fdict['tbr']
c57f7757
PH
3561 if fdict.get('container') is not None:
3562 if res:
3563 res += ', '
3564 res += '%s container' % fdict['container']
3089bc74
S
3565 if (fdict.get('vcodec') is not None
3566 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3567 if res:
3568 res += ', '
3569 res += fdict['vcodec']
91c7271a 3570 if fdict.get('vbr') is not None:
c57f7757
PH
3571 res += '@'
3572 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3573 res += 'video@'
3574 if fdict.get('vbr') is not None:
3575 res += '%4dk' % fdict['vbr']
fbb21cf5 3576 if fdict.get('fps') is not None:
5d583bdf
S
3577 if res:
3578 res += ', '
3579 res += '%sfps' % fdict['fps']
c57f7757
PH
3580 if fdict.get('acodec') is not None:
3581 if res:
3582 res += ', '
3583 if fdict['acodec'] == 'none':
3584 res += 'video only'
3585 else:
3586 res += '%-5s' % fdict['acodec']
3587 elif fdict.get('abr') is not None:
3588 if res:
3589 res += ', '
3590 res += 'audio'
3591 if fdict.get('abr') is not None:
3592 res += '@%3dk' % fdict['abr']
3593 if fdict.get('asr') is not None:
3594 res += ' (%5dHz)' % fdict['asr']
3595 if fdict.get('filesize') is not None:
3596 if res:
3597 res += ', '
3598 res += format_bytes(fdict['filesize'])
9732d77e
PH
3599 elif fdict.get('filesize_approx') is not None:
3600 if res:
3601 res += ', '
3602 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3603 return res
91c7271a 3604
aebb4f4b 3605 def _get_formats(self, info_dict):
3606 if info_dict.get('formats') is None:
3607 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3608 return [info_dict]
3609 return []
3610 return info_dict['formats']
b69fd25c 3611
aebb4f4b 3612 def render_formats_table(self, info_dict):
3613 formats = self._get_formats(info_dict)
3614 if not formats:
3615 return
8130779d 3616 if not self.params.get('listformats_table', True) is not False:
76d321f6 3617 table = [
3618 [
3619 format_field(f, 'format_id'),
3620 format_field(f, 'ext'),
3621 self.format_resolution(f),
8130779d 3622 self._format_note(f)
d5d1df8a 3623 ] for f in formats if (f.get('preference') or 0) >= -1000]
8130779d 3624 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3625
d816f61f 3626 def simplified_codec(f, field):
3627 assert field in ('acodec', 'vcodec')
3628 codec = f.get(field, 'unknown')
f5ea4748 3629 if not codec:
3630 return 'unknown'
3631 elif codec != 'none':
d816f61f 3632 return '.'.join(codec.split('.')[:4])
3633
3634 if field == 'vcodec' and f.get('acodec') == 'none':
3635 return 'images'
3636 elif field == 'acodec' and f.get('vcodec') == 'none':
3637 return ''
3638 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3639 self.Styles.SUPPRESS)
3640
591bb9d3 3641 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3642 table = [
3643 [
591bb9d3 3644 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3645 format_field(f, 'ext'),
3646 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
563e0bf8 3647 format_field(f, 'fps', '\t%d', func=round),
8130779d 3648 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
b8ed0f15 3649 format_field(f, 'audio_channels', '\t%s'),
8130779d 3650 delim,
3651 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
563e0bf8 3652 format_field(f, 'tbr', '\t%dk', func=round),
8130779d 3653 shorten_protocol_name(f.get('protocol', '')),
3654 delim,
d816f61f 3655 simplified_codec(f, 'vcodec'),
563e0bf8 3656 format_field(f, 'vbr', '\t%dk', func=round),
d816f61f 3657 simplified_codec(f, 'acodec'),
563e0bf8 3658 format_field(f, 'abr', '\t%dk', func=round),
ae61d108 3659 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
8130779d 3660 join_nonempty(
591bb9d3 3661 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
8130779d 3662 format_field(f, 'language', '[%s]'),
3663 join_nonempty(format_field(f, 'format_note'),
3664 format_field(f, 'container', ignore=(None, f.get('ext'))),
3665 delim=', '),
3666 delim=' '),
3667 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3668 header_line = self._list_format_headers(
b8ed0f15 3669 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
8130779d 3670 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3671
3672 return render_table(
3673 header_line, table, hide_empty=True,
591bb9d3 3674 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3675
3676 def render_thumbnails_table(self, info_dict):
88f23a18 3677 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3678 if not thumbnails:
8130779d 3679 return None
3680 return render_table(
ec11a9f4 3681 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
177662e0 3682 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
2412044c 3683
8130779d 3684 def render_subtitles_table(self, video_id, subtitles):
2412044c 3685 def _row(lang, formats):
49c258e1 3686 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3687 if len(set(names)) == 1:
7aee40c1 3688 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3689 return [lang, ', '.join(names), ', '.join(exts)]
3690
8130779d 3691 if not subtitles:
3692 return None
3693 return render_table(
ec11a9f4 3694 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3695 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3696 hide_empty=True)
3697
3698 def __list_table(self, video_id, name, func, *args):
3699 table = func(*args)
3700 if not table:
3701 self.to_screen(f'{video_id} has no {name}')
3702 return
3703 self.to_screen(f'[info] Available {name} for {video_id}:')
3704 self.to_stdout(table)
3705
3706 def list_formats(self, info_dict):
3707 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3708
3709 def list_thumbnails(self, info_dict):
3710 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3711
3712 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3713 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3714
dca08720
PH
3715 def urlopen(self, req):
3716 """ Start an HTTP download """
f9934b96 3717 if isinstance(req, str):
67dda517 3718 req = sanitized_Request(req)
19a41fc6 3719 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3720
3721 def print_debug_header(self):
3722 if not self.params.get('verbose'):
3723 return
49a57e70 3724
a057779d 3725 from . import _IN_CLI # Must be delayed import
3726
560738f3 3727 # These imports can be slow. So import them only as needed
3728 from .extractor.extractors import _LAZY_LOADER
3729 from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3730
49a57e70 3731 def get_encoding(stream):
2a938746 3732 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3733 if not supports_terminal_sequences(stream):
53973b4d 3734 from .utils import WINDOWS_VT_MODE # Must be imported locally
e3c7d495 3735 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3736 return ret
3737
591bb9d3 3738 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3739 locale.getpreferredencoding(),
3740 sys.getfilesystemencoding(),
591bb9d3 3741 self.get_encoding(),
3742 ', '.join(
64fa820c 3743 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
591bb9d3 3744 if stream is not None and key != 'console')
3745 )
883d4b1e 3746
3747 logger = self.params.get('logger')
3748 if logger:
3749 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3750 write_debug(encoding_str)
3751 else:
96565c7e 3752 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3753 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3754
4c88ff87 3755 source = detect_variant()
70b23409 3756 if VARIANT not in (None, 'pip'):
3757 source += '*'
36eaf303 3758 write_debug(join_nonempty(
b5e7a2e6 3759 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3760 __version__,
36eaf303 3761 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3762 '' if source == 'unknown' else f'({source})',
a057779d 3763 '' if _IN_CLI else 'API',
36eaf303 3764 delim=' '))
497074f0 3765
3766 if not _IN_CLI:
3767 write_debug(f'params: {self.params}')
3768
6e21fdd2 3769 if not _LAZY_LOADER:
3770 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3771 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3772 else:
49a57e70 3773 write_debug('Lazy loading extractors is disabled')
3ae5e797 3774 if plugin_extractors or plugin_postprocessors:
49a57e70 3775 write_debug('Plugins: %s' % [
3ae5e797 3776 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3777 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
8a82af35 3778 if self.params['compat_opts']:
3779 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
36eaf303 3780
b5e7a2e6 3781 if current_git_head():
3782 write_debug(f'Git HEAD: {current_git_head()}')
b1f94422 3783 write_debug(system_identifier())
d28b5171 3784
8913ef74 3785 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3786 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3787 if ffmpeg_features:
19a03940 3788 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3789
4c83c967 3790 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3791 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3792 exe_str = ', '.join(
2831b468 3793 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3794 ) or 'none'
49a57e70 3795 write_debug('exe versions: %s' % exe_str)
dca08720 3796
1d485a1a 3797 from .compat.compat_utils import get_package_info
9b8ee23b 3798 from .dependencies import available_dependencies
3799
3800 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3801 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3802 })) or 'none'))
2831b468 3803
97ec5bc5 3804 self._setup_opener()
dca08720
PH
3805 proxy_map = {}
3806 for handler in self._opener.handlers:
3807 if hasattr(handler, 'proxies'):
3808 proxy_map.update(handler.proxies)
49a57e70 3809 write_debug(f'Proxy map: {proxy_map}')
dca08720 3810
49a57e70 3811 # Not implemented
3812 if False and self.params.get('call_home'):
0f06bcd7 3813 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 3814 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 3815 latest_version = self.urlopen(
0f06bcd7 3816 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
3817 if version_tuple(latest_version) > version_tuple(__version__):
3818 self.report_warning(
3819 'You are using an outdated version (newest version: %s)! '
3820 'See https://yt-dl.org/update if you need help updating.' %
3821 latest_version)
3822
e344693b 3823 def _setup_opener(self):
97ec5bc5 3824 if hasattr(self, '_opener'):
3825 return
6ad14cab 3826 timeout_val = self.params.get('socket_timeout')
17bddf3e 3827 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3828
982ee69a 3829 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3830 opts_cookiefile = self.params.get('cookiefile')
3831 opts_proxy = self.params.get('proxy')
3832
982ee69a 3833 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3834
6a3f4c3f 3835 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3836 if opts_proxy is not None:
3837 if opts_proxy == '':
3838 proxies = {}
3839 else:
3840 proxies = {'http': opts_proxy, 'https': opts_proxy}
3841 else:
ac668111 3842 proxies = urllib.request.getproxies()
067aa17e 3843 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3844 if 'http' in proxies and 'https' not in proxies:
3845 proxies['https'] = proxies['http']
91410c9b 3846 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3847
3848 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3849 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3850 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3851 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3852 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3853
3854 # When passing our own FileHandler instance, build_opener won't add the
3855 # default FileHandler and allows us to disable the file protocol, which
3856 # can be used for malicious purposes (see
067aa17e 3857 # https://github.com/ytdl-org/youtube-dl/issues/8227)
ac668111 3858 file_handler = urllib.request.FileHandler()
6240b0a2
JMF
3859
3860 def file_open(*args, **kwargs):
ac668111 3861 raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3862 file_handler.file_open = file_open
3863
ac668111 3864 opener = urllib.request.build_opener(
fca6dba8 3865 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3866
dca08720
PH
3867 # Delete the default user-agent header, which would otherwise apply in
3868 # cases where our custom HTTP handler doesn't come into play
067aa17e 3869 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3870 opener.addheaders = []
3871 self._opener = opener
62fec3b2
PH
3872
3873 def encode(self, s):
3874 if isinstance(s, bytes):
3875 return s # Already encoded
3876
3877 try:
3878 return s.encode(self.get_encoding())
3879 except UnicodeEncodeError as err:
3880 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3881 raise
3882
3883 def get_encoding(self):
3884 encoding = self.params.get('encoding')
3885 if encoding is None:
3886 encoding = preferredencoding()
3887 return encoding
ec82d85a 3888
e08a85d8 3889 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3890 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3891 if overwrite is None:
3892 overwrite = self.params.get('overwrites', True)
80c03fa9 3893 if not self.params.get('writeinfojson'):
3894 return False
3895 elif not infofn:
3896 self.write_debug(f'Skipping writing {label} infojson')
3897 return False
3898 elif not self._ensure_dir_exists(infofn):
3899 return None
e08a85d8 3900 elif not overwrite and os.path.exists(infofn):
80c03fa9 3901 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3902 return 'exists'
3903
3904 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3905 try:
3906 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3907 return True
86e5f3ed 3908 except OSError:
cb96c5be 3909 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3910 return None
80c03fa9 3911
3912 def _write_description(self, label, ie_result, descfn):
3913 ''' Write description and returns True = written, False = skip, None = error '''
3914 if not self.params.get('writedescription'):
3915 return False
3916 elif not descfn:
3917 self.write_debug(f'Skipping writing {label} description')
3918 return False
3919 elif not self._ensure_dir_exists(descfn):
3920 return None
3921 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3922 self.to_screen(f'[info] {label.title()} description is already present')
3923 elif ie_result.get('description') is None:
3924 self.report_warning(f'There\'s no {label} description to write')
3925 return False
3926 else:
3927 try:
3928 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 3929 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 3930 descfile.write(ie_result['description'])
86e5f3ed 3931 except OSError:
80c03fa9 3932 self.report_error(f'Cannot write {label} description file {descfn}')
3933 return None
3934 return True
3935
3936 def _write_subtitles(self, info_dict, filename):
3937 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3938 ret = []
3939 subtitles = info_dict.get('requested_subtitles')
3940 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3941 # subtitles download errors are already managed as troubles in relevant IE
3942 # that way it will silently go on when used with unsupporting IE
3943 return ret
3944
3945 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3946 if not sub_filename_base:
3947 self.to_screen('[info] Skipping writing video subtitles')
3948 return ret
3949 for sub_lang, sub_info in subtitles.items():
3950 sub_format = sub_info['ext']
3951 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3952 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3953 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3954 if existing_sub:
80c03fa9 3955 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3956 sub_info['filepath'] = existing_sub
3957 ret.append((existing_sub, sub_filename_final))
80c03fa9 3958 continue
3959
3960 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3961 if sub_info.get('data') is not None:
3962 try:
3963 # Use newline='' to prevent conversion of newline characters
3964 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 3965 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 3966 subfile.write(sub_info['data'])
3967 sub_info['filepath'] = sub_filename
3968 ret.append((sub_filename, sub_filename_final))
3969 continue
86e5f3ed 3970 except OSError:
80c03fa9 3971 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3972 return None
3973
3974 try:
3975 sub_copy = sub_info.copy()
3976 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3977 self.dl(sub_filename, sub_copy, subtitle=True)
3978 sub_info['filepath'] = sub_filename
3979 ret.append((sub_filename, sub_filename_final))
6020e05d 3980 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3981 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3982 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3983 if not self.params.get('ignoreerrors'):
3984 self.report_error(msg)
3985 raise DownloadError(msg)
3986 self.report_warning(msg)
519804a9 3987 return ret
80c03fa9 3988
3989 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3990 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3991 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3992 thumbnails, ret = [], []
6c4fd172 3993 if write_all or self.params.get('writethumbnail', False):
0202b52a 3994 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3995 multiple = write_all and len(thumbnails) > 1
ec82d85a 3996
80c03fa9 3997 if thumb_filename_base is None:
3998 thumb_filename_base = filename
3999 if thumbnails and not thumb_filename_base:
4000 self.write_debug(f'Skipping writing {label} thumbnail')
4001 return ret
4002
dd0228ce 4003 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 4004 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 4005 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 4006 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4007 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 4008
e04938ab 4009 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4010 if existing_thumb:
aa9369a2 4011 self.to_screen('[info] %s is already present' % (
4012 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 4013 t['filepath'] = existing_thumb
4014 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 4015 else:
80c03fa9 4016 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 4017 try:
297e9952 4018 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 4019 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 4020 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 4021 shutil.copyfileobj(uf, thumbf)
80c03fa9 4022 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 4023 t['filepath'] = thumb_filename
3158150c 4024 except network_exceptions as err:
dd0228ce 4025 thumbnails.pop(idx)
80c03fa9 4026 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 4027 if ret and not write_all:
4028 break
0202b52a 4029 return ret