]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Add pre-processor stage `video`
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
26e63931 1import collections
31bd3925 2import contextlib
9d2ecdbc 3import datetime
c1c9a79c 4import errno
31bd3925 5import fileinput
b5ae35ee 6import functools
8222d8de 7import io
b82f815f 8import itertools
8694c600 9import json
62fec3b2 10import locale
083c9df9 11import operator
8222d8de 12import os
f8271158 13import random
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de 17import sys
21cd8fae 18import tempfile
8222d8de 19import time
67134eab 20import tokenize
8222d8de 21import traceback
524e2e4f 22import unicodedata
f9934b96 23import urllib.request
961ea474
S
24from string import ascii_letters
25
f8271158 26from .cache import Cache
14f25df2 27from .compat import compat_os_name, compat_shlex_quote
982ee69a 28from .cookies import load_cookies
f8271158 29from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
30from .downloader.rtmp import rtmpdump_version
f8271158 31from .extractor import gen_extractor_classes, get_info_extractor
fe7866d0 32from .extractor.common import UnsupportedURLIE
f8271158 33from .extractor.openload import PhantomJSwrapper
34from .minicurses import format_text
35from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
36from .postprocessor import (
37 EmbedThumbnailPP,
38 FFmpegFixupDuplicateMoovPP,
39 FFmpegFixupDurationPP,
40 FFmpegFixupM3u8PP,
41 FFmpegFixupM4aPP,
42 FFmpegFixupStretchedPP,
43 FFmpegFixupTimestampPP,
44 FFmpegMergerPP,
45 FFmpegPostProcessor,
ca9def71 46 FFmpegVideoConvertorPP,
f8271158 47 MoveFilesAfterDownloadPP,
48 get_postprocessor,
49)
ca9def71 50from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
b5e7a2e6 51from .update import REPOSITORY, current_git_head, detect_variant
8c25f81b 52from .utils import (
f8271158 53 DEFAULT_OUTTMPL,
7b2c3f47 54 IDENTITY,
f8271158 55 LINK_TEMPLATES,
8dc59305 56 MEDIA_EXTENSIONS,
f8271158 57 NO_DEFAULT,
1d485a1a 58 NUMBER_RE,
f8271158 59 OUTTMPL_TYPES,
60 POSTPROCESS_WHEN,
61 STR_FORMAT_RE_TMPL,
62 STR_FORMAT_TYPES,
63 ContentTooShortError,
64 DateRange,
65 DownloadCancelled,
66 DownloadError,
67 EntryNotInPlaylist,
68 ExistingVideoReached,
69 ExtractorError,
784320c9 70 FormatSorter,
f8271158 71 GeoRestrictedError,
72 HEADRequest,
f8271158 73 ISO3166Utils,
74 LazyList,
75 MaxDownloadsReached,
19a03940 76 Namespace,
f8271158 77 PagedList,
78 PerRequestProxyHandler,
7e88d7d7 79 PlaylistEntries,
f8271158 80 Popen,
81 PostProcessingError,
82 ReExtractInfo,
83 RejectedVideoReached,
84 SameFileError,
85 UnavailableVideoError,
693f0600 86 UserNotLive,
f8271158 87 YoutubeDLCookieProcessor,
88 YoutubeDLHandler,
89 YoutubeDLRedirectHandler,
eedb7ba5
S
90 age_restricted,
91 args_to_str,
cb794ee0 92 bug_reports_message,
ce02ed60 93 date_from_str,
da4db748 94 deprecation_warning,
ce02ed60 95 determine_ext,
b5559424 96 determine_protocol,
c0384f22 97 encode_compat_str,
ce02ed60 98 encodeFilename,
a06916d9 99 error_to_compat_str,
47cdc68e 100 escapeHTML,
590bc6f6 101 expand_path,
90137ca4 102 filter_dict,
e29663c6 103 float_or_none,
02dbf93f 104 format_bytes,
e0fd9573 105 format_decimal_suffix,
f8271158 106 format_field,
525ef922 107 formatSeconds,
fc61aff4 108 get_compatible_ext,
0bb322b9 109 get_domain,
c9969434 110 int_or_none,
732044af 111 iri_to_uri,
941e881e 112 is_path_like,
34921b43 113 join_nonempty,
ce02ed60 114 locked_file,
0647d925 115 make_archive_id,
0202b52a 116 make_dir,
dca08720 117 make_HTTPS_handler,
8b7539d2 118 merge_headers,
3158150c 119 network_exceptions,
ec11a9f4 120 number_of_digits,
cd6fc19e 121 orderedSet,
5314b521 122 orderedSet_from_options,
083c9df9 123 parse_filesize,
ce02ed60 124 preferredencoding,
eedb7ba5 125 prepend_extension,
51fb4995 126 register_socks_protocols,
3efb96a6 127 remove_terminal_sequences,
cfb56d1a 128 render_table,
eedb7ba5 129 replace_extension,
ce02ed60 130 sanitize_filename,
1bb5c511 131 sanitize_path,
dcf77cf1 132 sanitize_url,
67dda517 133 sanitized_Request,
e5660ee6 134 std_headers,
1211bb6d 135 str_or_none,
e29663c6 136 strftime_or_none,
ce02ed60 137 subtitles_filename,
819e0531 138 supports_terminal_sequences,
b1f94422 139 system_identifier,
f2ebc5c7 140 timetuple_from_msec,
732044af 141 to_high_limit_path,
324ad820 142 traverse_obj,
fc61aff4 143 try_call,
6033d980 144 try_get,
29eb5174 145 url_basename,
7d1eb38a 146 variadic,
58b1f00d 147 version_tuple,
53973b4d 148 windows_enable_vt_mode,
ce02ed60
PH
149 write_json_file,
150 write_string,
4f026faf 151)
70b23409 152from .version import RELEASE_GIT_HEAD, VARIANT, __version__
8222d8de 153
e9c0cdd3
YCH
154if compat_os_name == 'nt':
155 import ctypes
156
2459b6e1 157
86e5f3ed 158class YoutubeDL:
8222d8de
JMF
159 """YoutubeDL class.
160
161 YoutubeDL objects are the ones responsible of downloading the
162 actual video file and writing it to disk if the user has requested
163 it, among some other tasks. In most cases there should be one per
164 program. As, given a video URL, the downloader doesn't know how to
165 extract all the needed information, task that InfoExtractors do, it
166 has to pass the URL to one of them.
167
168 For this, YoutubeDL objects have a method that allows
169 InfoExtractors to be registered in a given order. When it is passed
170 a URL, the YoutubeDL object handles it to the first InfoExtractor it
171 finds that reports being able to handle it. The InfoExtractor extracts
172 all the information about the video or videos the URL refers to, and
173 YoutubeDL process the extracted information, possibly using a File
174 Downloader to download the video.
175
176 YoutubeDL objects accept a lot of parameters. In order not to saturate
177 the object constructor with arguments, it receives a dictionary of
178 options instead. These options are available through the params
179 attribute for the InfoExtractors to use. The YoutubeDL also
180 registers itself as the downloader in charge for the InfoExtractors
181 that are added to it, so this is a "mutual registration".
182
183 Available options:
184
185 username: Username for authentication purposes.
186 password: Password for authentication purposes.
180940e0 187 videopassword: Password for accessing a video.
1da50aa3
S
188 ap_mso: Adobe Pass multiple-system operator identifier.
189 ap_username: Multiple-system operator account username.
190 ap_password: Multiple-system operator account password.
8222d8de
JMF
191 usenetrc: Use netrc for authentication instead.
192 verbose: Print additional info to stdout.
193 quiet: Do not print messages to stdout.
ad8915b7 194 no_warnings: Do not print out anything for warnings.
bb66c247 195 forceprint: A dict with keys WHEN mapped to a list of templates to
196 print to stdout. The allowed keys are video or any of the
197 items in utils.POSTPROCESS_WHEN.
ca30f449 198 For compatibility, a single list is also accepted
bb66c247 199 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
200 a list of tuples with (template, filename)
8694c600 201 forcejson: Force printing info_dict as JSON.
63e0be34
PH
202 dump_single_json: Force printing the info_dict of the whole playlist
203 (or video) as a single JSON line.
c25228e5 204 force_write_download_archive: Force writing download archive regardless
205 of 'skip_download' or 'simulate'.
b7b04c78 206 simulate: Do not download the video files. If unset (or None),
207 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 208 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 209 You can also pass a function. The function takes 'ctx' as
210 argument and returns the formats to download.
211 See "build_format_selector" for an implementation
63ad4d43 212 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 213 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
214 extracting metadata even if the video is not actually
215 available for download (experimental)
0930b11f 216 format_sort: A list of fields by which to sort the video formats.
217 See "Sorting Formats" for more details.
c25228e5 218 format_sort_force: Force the given format_sort. see "Sorting Formats"
219 for more details.
08d30158 220 prefer_free_formats: Whether to prefer video formats with free containers
221 over non-free ones of same quality.
c25228e5 222 allow_multiple_video_streams: Allow multiple video streams to be merged
223 into a single file
224 allow_multiple_audio_streams: Allow multiple audio streams to be merged
225 into a single file
0ba692ac 226 check_formats Whether to test if the formats are downloadable.
9f1a1c36 227 Can be True (check all), False (check none),
228 'selected' (check selected formats),
0ba692ac 229 or None (check only if requested by extractor)
4524baf0 230 paths: Dictionary of output paths. The allowed keys are 'home'
231 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 232 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 233 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 234 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
235 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
236 restrictfilenames: Do not allow "&" and spaces in file names
237 trim_file_name: Limit length of filename (extension excluded)
4524baf0 238 windowsfilenames: Force the filenames to be windows compatible
b1940459 239 ignoreerrors: Do not stop on download/postprocessing errors.
240 Can be 'only_download' to ignore only download errors.
241 Default is 'only_download' for CLI, but False for API
26e2805c 242 skip_playlist_after_errors: Number of allowed failures until the rest of
243 the playlist is skipped
fe7866d0 244 allowed_extractors: List of regexes to match against extractor names that are allowed
0c3d0f51 245 overwrites: Overwrite all video and metadata files if True,
246 overwrite only non-video files if None
247 and don't overwrite any file if False
34488702 248 For compatibility with youtube-dl,
249 "nooverwrites" may also be used instead
c14e88f0 250 playlist_items: Specific indices of playlist to download.
75822ca7 251 playlistrandom: Download playlist items in random order.
7e9a6125 252 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
253 matchtitle: Download only matching titles.
254 rejecttitle: Reject downloads for matching titles.
8bf9319e 255 logger: Log messages to a logging.Logger instance.
17ffed18 256 logtostderr: Print everything to stderr instead of stdout.
257 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
258 writedescription: Write the video description to a .description file
259 writeinfojson: Write the video description to a .info.json file
75d43ca0 260 clean_infojson: Remove private fields from the infojson
34488702 261 getcomments: Extract video comments. This will not be written to disk
06167fbb 262 unless writeinfojson is also given
1fb07d10 263 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 264 writethumbnail: Write the thumbnail image to a file
c25228e5 265 allow_playlist_files: Whether to write playlists' description, infojson etc
266 also to disk when using the 'write*' options
ec82d85a 267 write_all_thumbnails: Write all thumbnail formats to files
732044af 268 writelink: Write an internet shortcut file, depending on the
269 current platform (.url/.webloc/.desktop)
270 writeurllink: Write a Windows internet shortcut file (.url)
271 writewebloclink: Write a macOS internet shortcut file (.webloc)
272 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 273 writesubtitles: Write the video subtitles to a file
741dd8ea 274 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 275 listsubtitles: Lists all available subtitles for the video
a504ced0 276 subtitlesformat: The format code for subtitles
c32b0aab 277 subtitleslangs: List of languages of the subtitles to download (can be regex).
278 The list may contain "all" to refer to all the available
279 subtitles. The language can be prefixed with a "-" to
62b58c09 280 exclude it from the requested languages, e.g. ['all', '-live_chat']
8222d8de
JMF
281 keepvideo: Keep the video file after post-processing
282 daterange: A DateRange object, download only if the upload_date is in the range.
283 skip_download: Skip the actual download of the video file
c35f9e72 284 cachedir: Location of the cache files in the filesystem.
a0e07d31 285 False to disable filesystem cache.
47192f92 286 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
287 age_limit: An integer representing the user's age in years.
288 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
289 min_views: An integer representing the minimum view count the video
290 must have in order to not be skipped.
291 Videos without view count information are always
292 downloaded. None for no limit.
293 max_views: An integer representing the maximum view count.
294 Videos that are more popular than that are not
295 downloaded.
296 Videos without view count information are always
297 downloaded. None for no limit.
ae103564 298 download_archive: A set, or the name of a file where all downloads are recorded.
299 Videos already present in the file are not downloaded again.
8a51f564 300 break_on_existing: Stop the download process after attempting to download a
301 file that is in the archive.
302 break_on_reject: Stop the download process when encountering a video that
303 has been filtered out.
b222c271 304 break_per_url: Whether break_on_reject and break_on_existing
305 should act on each input URL as opposed to for the entire queue
d76fa1f3 306 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8 307 cookiesfrombrowser: A tuple containing the name of the browser, the profile
9bd13fe5 308 name/path from where cookies are loaded, the name of the keyring,
309 and the container name, e.g. ('chrome', ) or
310 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
f81c62a6 311 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
312 support RFC 5746 secure renegotiation
f59f5ef8 313 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 314 client_certificate: Path to client certificate file in PEM format. May include the private key
315 client_certificate_key: Path to private key file for client certificate
316 client_certificate_password: Password for client certificate private key, if encrypted.
317 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0 318 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
c6e07cf1 319 (Only supported by some extractors)
8b7539d2 320 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 321 proxy: URL of the proxy server to use
38cce791 322 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 323 on geo-restricted sites.
e344693b 324 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
325 bidi_workaround: Work around buggy terminals without bidirectional text
326 support, using fridibi
a0ddb8a2 327 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
328 default_search: Prepend this string if an input url is not valid.
329 'auto' for elaborate guessing
62fec3b2 330 encoding: Use this encoding instead of the system-specified.
134c913c 331 extract_flat: Whether to resolve and process url_results further
332 * False: Always process (default)
333 * True: Never process
334 * 'in_playlist': Do not process inside playlist/multi_video
335 * 'discard': Always process, but don't return the result
336 from inside playlist/multi_video
337 * 'discard_in_playlist': Same as "discard", but only for
338 playlists (not multi_video)
f2ebc5c7 339 wait_for_video: If given, wait for scheduled streams to become available.
340 The value should be a tuple containing the range
341 (min_secs, max_secs) to wait between retries
4f026faf 342 postprocessors: A list of dictionaries, each with an entry
71b640cc 343 * key: The name of the postprocessor. See
7a5c1cfe 344 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 345 * when: When to run the postprocessor. Allowed values are
346 the entries of utils.POSTPROCESS_WHEN
56d868db 347 Assumed to be 'post_process' if not given
71b640cc
PH
348 progress_hooks: A list of functions that get called on download
349 progress, with a dictionary with the entries
5cda4eda 350 * status: One of "downloading", "error", or "finished".
ee69b99a 351 Check this first and ignore unknown values.
3ba7740d 352 * info_dict: The extracted info_dict
71b640cc 353
5cda4eda 354 If status is one of "downloading", or "finished", the
ee69b99a
PH
355 following properties may also be present:
356 * filename: The final filename (always present)
5cda4eda 357 * tmpfilename: The filename we're currently writing to
71b640cc
PH
358 * downloaded_bytes: Bytes on disk
359 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
360 * total_bytes_estimate: Guess of the eventual file size,
361 None if unavailable.
362 * elapsed: The number of seconds since download started.
71b640cc
PH
363 * eta: The estimated time in seconds, None if unknown
364 * speed: The download speed in bytes/second, None if
365 unknown
5cda4eda
PH
366 * fragment_index: The counter of the currently
367 downloaded video fragment.
368 * fragment_count: The number of fragments (= individual
369 files that will be merged)
71b640cc
PH
370
371 Progress hooks are guaranteed to be called at least once
372 (with status "finished") if the download is successful.
819e0531 373 postprocessor_hooks: A list of functions that get called on postprocessing
374 progress, with a dictionary with the entries
375 * status: One of "started", "processing", or "finished".
376 Check this first and ignore unknown values.
377 * postprocessor: Name of the postprocessor
378 * info_dict: The extracted info_dict
379
380 Progress hooks are guaranteed to be called at least twice
381 (with status "started" and "finished") if the processing is successful.
fc61aff4 382 merge_output_format: "/" separated list of extensions to use when merging formats.
6b591b29 383 final_ext: Expected final extension; used to detect when the file was
59a7a13e 384 already downloaded and converted
6271f1ca
PH
385 fixup: Automatically correct known faults of the file.
386 One of:
387 - "never": do nothing
388 - "warn": only emit a warning
389 - "detect_or_warn": check whether we can do anything
62cd676c 390 about it, warn otherwise (default)
504f20dd 391 source_address: Client-side IP address to bind to.
1cf376f5 392 sleep_interval_requests: Number of seconds to sleep between requests
393 during extraction
7aa589a5
S
394 sleep_interval: Number of seconds to sleep before each download when
395 used alone or a lower bound of a range for randomized
396 sleep before each download (minimum possible number
397 of seconds to sleep) when used along with
398 max_sleep_interval.
399 max_sleep_interval:Upper bound of a range for randomized sleep before each
400 download (maximum possible number of seconds to sleep).
401 Must only be used along with sleep_interval.
402 Actual sleep time will be a random float from range
403 [sleep_interval; max_sleep_interval].
1cf376f5 404 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
405 listformats: Print an overview of available video formats and exit.
406 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 407 match_filter: A function that gets called for every video with the signature
408 (info_dict, *, incomplete: bool) -> Optional[str]
409 For backward compatibility with youtube-dl, the signature
410 (info_dict) -> Optional[str] is also allowed.
411 - If it returns a message, the video is ignored.
412 - If it returns None, the video is downloaded.
413 - If it returns utils.NO_DEFAULT, the user is interactively
414 asked whether to download the video.
347de493 415 match_filter_func in utils.py is one example for this.
7e5db8c9 416 no_color: Do not emit color codes in output.
0a840f58 417 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 418 HTTP header
0a840f58 419 geo_bypass_country:
773f291d
S
420 Two-letter ISO 3166-2 country code that will be used for
421 explicit geographic restriction bypassing via faking
504f20dd 422 X-Forwarded-For HTTP header
5f95927a
S
423 geo_bypass_ip_block:
424 IP range in CIDR notation that will be used similarly to
504f20dd 425 geo_bypass_country
52a8a1e1 426 external_downloader: A dictionary of protocol keys and the executable of the
427 external downloader to use for it. The allowed protocols
428 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
429 Set the value to 'native' to use the native downloader
53ed7066 430 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 431 The following options do not work when used through the API:
b5ae35ee 432 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 433 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 434 Refer __init__.py for their implementation
819e0531 435 progress_template: Dictionary of templates for progress outputs.
436 Allowed keys are 'download', 'postprocess',
437 'download-title' (console title) and 'postprocess-title'.
438 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 439 retry_sleep_functions: Dictionary of functions that takes the number of attempts
440 as argument and returns the time to sleep in seconds.
441 Allowed keys are 'http', 'fragment', 'file_access'
0f446365
SW
442 download_ranges: A callback function that gets called for every video with
443 the signature (info_dict, ydl) -> Iterable[Section].
444 Only the returned sections will be downloaded.
445 Each Section is a dict with the following keys:
5ec1b6b7 446 * start_time: Start time of the section in seconds
447 * end_time: End time of the section in seconds
448 * title: Section title (Optional)
449 * index: Section number (Optional)
0f446365 450 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
a7dc6a89 451 noprogress: Do not print the progress bar
a831c2ea 452 live_from_start: Whether to download livestreams videos from the start
fe7e0c98 453
8222d8de 454 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 455 the downloader (see yt_dlp/downloader/common.py):
51d9739f 456 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654 457 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
a7dc6a89 458 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 459 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
460
461 The following options are used by the post processors:
c0b7d117
S
462 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
463 to the binary or its containing directory.
43820c03 464 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 465 and a list of additional command-line arguments for the
466 postprocessor/executable. The dict can also have "PP+EXE" keys
467 which are used when the given exe is used by the given PP.
468 Use 'default' as the name for arguments to passed to all PP
469 For compatibility with youtube-dl, a single list of args
470 can also be used
e409895f 471
472 The following options are used by the extractors:
62bff2c1 473 extractor_retries: Number of times to retry for known errors
474 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 475 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 476 discontinuities such as ad breaks (default: False)
5d3a0e79 477 extractor_args: A dictionary of arguments to be passed to the extractors.
478 See "EXTRACTOR ARGUMENTS" for details.
62b58c09 479 E.g. {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 480 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 481
482 The following options are deprecated and may be removed in the future:
483
fe7866d0 484 force_generic_extractor: Force downloader to use the generic extractor
485 - Use allowed_extractors = ['generic', 'default']
7e9a6125 486 playliststart: - Use playlist_items
487 Playlist item to start at.
488 playlistend: - Use playlist_items
489 Playlist item to end at.
490 playlistreverse: - Use playlist_items
491 Download playlist items in reverse order.
1890fc63 492 forceurl: - Use forceprint
493 Force printing final URL.
494 forcetitle: - Use forceprint
495 Force printing title.
496 forceid: - Use forceprint
497 Force printing ID.
498 forcethumbnail: - Use forceprint
499 Force printing thumbnail URL.
500 forcedescription: - Use forceprint
501 Force printing description.
502 forcefilename: - Use forceprint
503 Force printing final filename.
504 forceduration: - Use forceprint
505 Force printing duration.
506 allsubtitles: - Use subtitleslangs = ['all']
507 Downloads all the subtitles of the video
508 (requires writesubtitles or writeautomaticsub)
509 include_ads: - Doesn't work
510 Download ads as well
511 call_home: - Not implemented
512 Boolean, true iff we are allowed to contact the
513 yt-dlp servers for debugging.
514 post_hooks: - Register a custom postprocessor
515 A list of functions that get called as the final step
516 for each video file, after all postprocessors have been
517 called. The filename will be passed as the only argument.
518 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
519 Use the native HLS downloader instead of ffmpeg/avconv
520 if True, otherwise use ffmpeg/avconv if False, otherwise
521 use downloader suggested by extractor if None.
522 prefer_ffmpeg: - avconv support is deprecated
523 If False, use avconv instead of ffmpeg if both are available,
524 otherwise prefer ffmpeg.
525 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 526 If True (default), DASH manifests and related
62bff2c1 527 data will be downloaded and processed by extractor.
528 You can reduce network I/O by disabling it if you don't
529 care about DASH. (only for youtube)
1890fc63 530 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 531 If True (default), HLS manifests and related
62bff2c1 532 data will be downloaded and processed by extractor.
533 You can reduce network I/O by disabling it if you don't
534 care about HLS. (only for youtube)
8222d8de
JMF
535 """
536
86e5f3ed 537 _NUMERIC_FIELDS = {
b8ed0f15 538 'width', 'height', 'asr', 'audio_channels', 'fps',
539 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
e6f21b3d 540 'timestamp', 'release_timestamp',
c9969434
S
541 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
542 'average_rating', 'comment_count', 'age_limit',
543 'start_time', 'end_time',
544 'chapter_number', 'season_number', 'episode_number',
545 'track_number', 'disc_number', 'release_year',
86e5f3ed 546 }
c9969434 547
6db9c4d5 548 _format_fields = {
549 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 550 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
105bfd90 551 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
d5d1df8a 552 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
6db9c4d5 553 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
554 'preference', 'language', 'language_preference', 'quality', 'source_preference',
555 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
556 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
557 }
48ee10ee 558 _format_selection_exts = {
8dc59305 559 'audio': set(MEDIA_EXTENSIONS.common_audio),
560 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
561 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
48ee10ee 562 }
563
3511266b 564 def __init__(self, params=None, auto_init=True):
883d4b1e 565 """Create a FileDownloader object with the given options.
566 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 567 Set to 'no_verbose_header' to not print the header
883d4b1e 568 """
e9f9a10f
JMF
569 if params is None:
570 params = {}
592b7485 571 self.params = params
8b7491c8 572 self._ies = {}
56c73665 573 self._ies_instances = {}
1e43a6f7 574 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 575 self._printed_messages = set()
1cf376f5 576 self._first_webpage_request = True
ab8e5e51 577 self._post_hooks = []
933605d7 578 self._progress_hooks = []
819e0531 579 self._postprocessor_hooks = []
8222d8de
JMF
580 self._download_retcode = 0
581 self._num_downloads = 0
9c906919 582 self._num_videos = 0
592b7485 583 self._playlist_level = 0
584 self._playlist_urls = set()
a0e07d31 585 self.cache = Cache(self)
34308b30 586
819e0531 587 windows_enable_vt_mode()
591bb9d3 588 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
589 self._out_files = Namespace(
590 out=stdout,
591 error=sys.stderr,
592 screen=sys.stderr if self.params.get('quiet') else stdout,
593 console=None if compat_os_name == 'nt' else next(
cf4f42cb 594 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 595 )
596 self._allow_colors = Namespace(**{
597 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
64fa820c 598 for type_, stream in self._out_files.items_ if type_ != 'console'
591bb9d3 599 })
819e0531 600
6929b41a 601 # The code is left like this to be reused for future deprecations
602 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
eff42759 603 current_version = sys.version_info[:2]
604 if current_version < MIN_RECOMMENDED:
9d339c41 605 msg = ('Support for Python version %d.%d has been deprecated. '
24093d52 606 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
c6e07cf1 607 '\n You will no longer receive updates on this version')
eff42759 608 if current_version < MIN_SUPPORTED:
609 msg = 'Python version %d.%d is no longer supported'
610 self.deprecation_warning(
611 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
a61f4b28 612
88acdbc2 613 if self.params.get('allow_unplayable_formats'):
614 self.report_warning(
ec11a9f4 615 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 616 'This is a developer option intended for debugging. \n'
617 ' If you experience any issues while using this option, '
ec11a9f4 618 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 619
497074f0 620 if self.params.get('bidi_workaround', False):
621 try:
622 import pty
623 master, slave = pty.openpty()
624 width = shutil.get_terminal_size().columns
625 width_args = [] if width is None else ['-w', str(width)]
626 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
627 try:
628 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
629 except OSError:
630 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
631 self._output_channel = os.fdopen(master, 'rb')
632 except OSError as ose:
633 if ose.errno == errno.ENOENT:
634 self.report_warning(
635 'Could not find fribidi executable, ignoring --bidi-workaround. '
636 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
637 else:
638 raise
639
640 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
641 if auto_init and auto_init != 'no_verbose_header':
642 self.print_debug_header()
643
be5df5ee
S
644 def check_deprecated(param, option, suggestion):
645 if self.params.get(param) is not None:
86e5f3ed 646 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
647 return True
648 return False
649
650 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
651 if self.params.get('geo_verification_proxy') is None:
652 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
653
0d1bb027 654 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
655 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 656 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 657
49a57e70 658 for msg in self.params.get('_warnings', []):
0d1bb027 659 self.report_warning(msg)
ee8dd27a 660 for msg in self.params.get('_deprecation_warnings', []):
da4db748 661 self.deprecated_feature(msg)
0d1bb027 662
8a82af35 663 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 664 self.params['listformats_table'] = False
665
b5ae35ee 666 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 667 # nooverwrites was unnecessarily changed to overwrites
668 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
669 # This ensures compatibility with both keys
670 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 671 elif self.params.get('overwrites') is None:
672 self.params.pop('overwrites', None)
b868936c 673 else:
674 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 675
e4221b70 676 if self.params.get('simulate') is None and any((
677 self.params.get('list_thumbnails'),
678 self.params.get('listformats'),
679 self.params.get('listsubtitles'),
680 )):
681 self.params['simulate'] = 'list_only'
682
455a15e2 683 self.params.setdefault('forceprint', {})
684 self.params.setdefault('print_to_file', {})
bb66c247 685
686 # Compatibility with older syntax
ca30f449 687 if not isinstance(params['forceprint'], dict):
455a15e2 688 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 689
97ec5bc5 690 if auto_init:
97ec5bc5 691 self.add_default_info_extractors()
692
3089bc74
S
693 if (sys.platform != 'win32'
694 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 695 and not self.params.get('restrictfilenames', False)):
e9137224 696 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 697 self.report_warning(
6febd1c1 698 'Assuming --restrict-filenames since file system encoding '
1b725173 699 'cannot encode all characters. '
6febd1c1 700 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 701 self.params['restrictfilenames'] = True
34308b30 702
bf1824b3 703 self._parse_outtmpl()
486dd09e 704
187986a8 705 # Creating format selector here allows us to catch syntax errors before the extraction
706 self.format_selector = (
fa9f30b8 707 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 708 else self.params['format'] if callable(self.params['format'])
187986a8 709 else self.build_format_selector(self.params['format']))
710
8b7539d2 711 # Set http_headers defaults according to std_headers
712 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
713
013b50b7 714 hooks = {
715 'post_hooks': self.add_post_hook,
716 'progress_hooks': self.add_progress_hook,
717 'postprocessor_hooks': self.add_postprocessor_hook,
718 }
719 for opt, fn in hooks.items():
720 for ph in self.params.get(opt, []):
721 fn(ph)
71b640cc 722
5bfc8bee 723 for pp_def_raw in self.params.get('postprocessors', []):
724 pp_def = dict(pp_def_raw)
725 when = pp_def.pop('when', 'post_process')
726 self.add_post_processor(
f9934b96 727 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 728 when=when)
729
97ec5bc5 730 self._setup_opener()
51fb4995
YCH
731 register_socks_protocols()
732
ed39cac5 733 def preload_download_archive(fn):
734 """Preload the archive, if any is specified"""
ae103564 735 archive = set()
ed39cac5 736 if fn is None:
ae103564 737 return archive
941e881e 738 elif not is_path_like(fn):
ae103564 739 return fn
740
49a57e70 741 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 742 try:
743 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
744 for line in archive_file:
ae103564 745 archive.add(line.strip())
86e5f3ed 746 except OSError as ioe:
ed39cac5 747 if ioe.errno != errno.ENOENT:
748 raise
ae103564 749 return archive
ed39cac5 750
ae103564 751 self.archive = preload_download_archive(self.params.get('download_archive'))
ed39cac5 752
7d4111ed
PH
753 def warn_if_short_id(self, argv):
754 # short YouTube ID starting with dash?
755 idxs = [
756 i for i, a in enumerate(argv)
757 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
758 if idxs:
759 correct_argv = (
7a5c1cfe 760 ['yt-dlp']
3089bc74
S
761 + [a for i, a in enumerate(argv) if i not in idxs]
762 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
763 )
764 self.report_warning(
765 'Long argument string detected. '
49a57e70 766 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
767 args_to_str(correct_argv))
768
8222d8de
JMF
769 def add_info_extractor(self, ie):
770 """Add an InfoExtractor object to the end of the list."""
8b7491c8 771 ie_key = ie.ie_key()
772 self._ies[ie_key] = ie
e52d7f85 773 if not isinstance(ie, type):
8b7491c8 774 self._ies_instances[ie_key] = ie
e52d7f85 775 ie.set_downloader(self)
8222d8de 776
56c73665
JMF
777 def get_info_extractor(self, ie_key):
778 """
779 Get an instance of an IE with name ie_key, it will try to get one from
780 the _ies list, if there's no instance it will create a new one and add
781 it to the extractor list.
782 """
783 ie = self._ies_instances.get(ie_key)
784 if ie is None:
785 ie = get_info_extractor(ie_key)()
786 self.add_info_extractor(ie)
787 return ie
788
023fa8c4
JMF
789 def add_default_info_extractors(self):
790 """
791 Add the InfoExtractors returned by gen_extractors to the end of the list
792 """
fe7866d0 793 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
794 all_ies['end'] = UnsupportedURLIE()
795 try:
796 ie_names = orderedSet_from_options(
797 self.params.get('allowed_extractors', ['default']), {
798 'all': list(all_ies),
799 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
800 }, use_regex=True)
801 except re.error as e:
802 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
803 for name in ie_names:
804 self.add_info_extractor(all_ies[name])
805 self.write_debug(f'Loaded {len(ie_names)} extractors')
023fa8c4 806
56d868db 807 def add_post_processor(self, pp, when='post_process'):
8222d8de 808 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 809 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 810 self._pps[when].append(pp)
8222d8de
JMF
811 pp.set_downloader(self)
812
ab8e5e51
AM
813 def add_post_hook(self, ph):
814 """Add the post hook"""
815 self._post_hooks.append(ph)
816
933605d7 817 def add_progress_hook(self, ph):
819e0531 818 """Add the download progress hook"""
933605d7 819 self._progress_hooks.append(ph)
8ab470f1 820
819e0531 821 def add_postprocessor_hook(self, ph):
822 """Add the postprocessing progress hook"""
823 self._postprocessor_hooks.append(ph)
5bfc8bee 824 for pps in self._pps.values():
825 for pp in pps:
826 pp.add_progress_hook(ph)
819e0531 827
1c088fa8 828 def _bidi_workaround(self, message):
5d681e96 829 if not hasattr(self, '_output_channel'):
1c088fa8
PH
830 return message
831
5d681e96 832 assert hasattr(self, '_output_process')
14f25df2 833 assert isinstance(message, str)
6febd1c1 834 line_count = message.count('\n') + 1
0f06bcd7 835 self._output_process.stdin.write((message + '\n').encode())
5d681e96 836 self._output_process.stdin.flush()
0f06bcd7 837 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 838 for _ in range(line_count))
6febd1c1 839 return res[:-len('\n')]
1c088fa8 840
b35496d8 841 def _write_string(self, message, out=None, only_once=False):
842 if only_once:
843 if message in self._printed_messages:
844 return
845 self._printed_messages.add(message)
846 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 847
cf4f42cb 848 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 849 """Print message to stdout"""
cf4f42cb 850 if quiet is not None:
da4db748 851 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
852 'Use "YoutubeDL.to_screen" instead')
8a82af35 853 if skip_eol is not False:
da4db748 854 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
855 'Use "YoutubeDL.to_screen" instead')
0bf9dc1e 856 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 857
dfea94f8 858 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
cf4f42cb 859 """Print message to screen if not in quiet mode"""
8bf9319e 860 if self.params.get('logger'):
43afe285 861 self.params['logger'].debug(message)
cf4f42cb 862 return
863 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
864 return
865 self._write_string(
866 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
dfea94f8 867 self._out_files.screen, only_once=only_once)
8222d8de 868
b35496d8 869 def to_stderr(self, message, only_once=False):
0760b0a7 870 """Print message to stderr"""
14f25df2 871 assert isinstance(message, str)
8bf9319e 872 if self.params.get('logger'):
43afe285
IB
873 self.params['logger'].error(message)
874 else:
5792c950 875 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 876
877 def _send_console_code(self, code):
591bb9d3 878 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 879 return
591bb9d3 880 self._write_string(code, self._out_files.console)
8222d8de 881
1e5b9a95
PH
882 def to_console_title(self, message):
883 if not self.params.get('consoletitle', False):
884 return
3efb96a6 885 message = remove_terminal_sequences(message)
4bede0d8
C
886 if compat_os_name == 'nt':
887 if ctypes.windll.kernel32.GetConsoleWindow():
888 # c_wchar_p() might not be necessary if `message` is
889 # already of type unicode()
890 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 891 else:
892 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 893
bdde425c 894 def save_console_title(self):
cf4f42cb 895 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 896 return
592b7485 897 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
898
899 def restore_console_title(self):
cf4f42cb 900 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 901 return
592b7485 902 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
903
904 def __enter__(self):
905 self.save_console_title()
906 return self
907
908 def __exit__(self, *args):
909 self.restore_console_title()
f89197d7 910
dca08720 911 if self.params.get('cookiefile') is not None:
1bab3437 912 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 913
fa9f30b8 914 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
915 """Determine action to take when a download problem appears.
916
917 Depending on if the downloader has been configured to ignore
918 download errors or not, this method may throw an exception or
919 not when errors are found, after printing the message.
920
fa9f30b8 921 @param tb If given, is additional traceback information
922 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
923 """
924 if message is not None:
925 self.to_stderr(message)
926 if self.params.get('verbose'):
927 if tb is None:
928 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 929 tb = ''
8222d8de 930 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 931 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 932 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
933 else:
934 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 935 tb = ''.join(tb_data)
c19bc311 936 if tb:
937 self.to_stderr(tb)
fa9f30b8 938 if not is_error:
939 return
b1940459 940 if not self.params.get('ignoreerrors'):
8222d8de
JMF
941 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
942 exc_info = sys.exc_info()[1].exc_info
943 else:
944 exc_info = sys.exc_info()
945 raise DownloadError(message, exc_info)
946 self._download_retcode = 1
947
19a03940 948 Styles = Namespace(
949 HEADERS='yellow',
950 EMPHASIS='light blue',
492272fe 951 FILENAME='green',
19a03940 952 ID='green',
953 DELIM='blue',
954 ERROR='red',
955 WARNING='yellow',
956 SUPPRESS='light black',
957 )
ec11a9f4 958
7578d77d 959 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 960 text = str(text)
ec11a9f4 961 if test_encoding:
962 original_text = text
5c104538 963 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
964 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 965 text = text.encode(encoding, 'ignore').decode(encoding)
966 if fallback is not None and text != original_text:
967 text = fallback
7578d77d 968 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 969
591bb9d3 970 def _format_out(self, *args, **kwargs):
971 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
972
ec11a9f4 973 def _format_screen(self, *args, **kwargs):
591bb9d3 974 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 975
976 def _format_err(self, *args, **kwargs):
591bb9d3 977 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 978
c84aeac6 979 def report_warning(self, message, only_once=False):
8222d8de
JMF
980 '''
981 Print the message to stderr, it will be prefixed with 'WARNING:'
982 If stderr is a tty file the 'WARNING:' will be colored
983 '''
6d07ce01
JMF
984 if self.params.get('logger') is not None:
985 self.params['logger'].warning(message)
8222d8de 986 else:
ad8915b7
PH
987 if self.params.get('no_warnings'):
988 return
ec11a9f4 989 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 990
da4db748 991 def deprecation_warning(self, message, *, stacklevel=0):
992 deprecation_warning(
993 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
994
995 def deprecated_feature(self, message):
ee8dd27a 996 if self.params.get('logger') is not None:
da4db748 997 self.params['logger'].warning(f'Deprecated Feature: {message}')
998 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
ee8dd27a 999
fa9f30b8 1000 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
1001 '''
1002 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1003 in red if stderr is a tty file.
1004 '''
fa9f30b8 1005 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 1006
b35496d8 1007 def write_debug(self, message, only_once=False):
0760b0a7 1008 '''Log debug message or Print message to stderr'''
1009 if not self.params.get('verbose', False):
1010 return
8a82af35 1011 message = f'[debug] {message}'
0760b0a7 1012 if self.params.get('logger'):
1013 self.params['logger'].debug(message)
1014 else:
b35496d8 1015 self.to_stderr(message, only_once)
0760b0a7 1016
8222d8de
JMF
1017 def report_file_already_downloaded(self, file_name):
1018 """Report file has already been fully downloaded."""
1019 try:
6febd1c1 1020 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 1021 except UnicodeEncodeError:
6febd1c1 1022 self.to_screen('[download] The file has already been downloaded')
8222d8de 1023
0c3d0f51 1024 def report_file_delete(self, file_name):
1025 """Report that existing file will be deleted."""
1026 try:
c25228e5 1027 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 1028 except UnicodeEncodeError:
c25228e5 1029 self.to_screen('Deleting existing file')
0c3d0f51 1030
319b6059 1031 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 1032 has_drm = info.get('_has_drm')
319b6059 1033 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1034 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1035 if forced or not ignored:
1151c407 1036 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1037 expected=has_drm or ignored or expected)
88acdbc2 1038 else:
1039 self.report_warning(msg)
1040
de6000d9 1041 def parse_outtmpl(self):
bf1824b3 1042 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1043 self._parse_outtmpl()
1044 return self.params['outtmpl']
1045
1046 def _parse_outtmpl(self):
7b2c3f47 1047 sanitize = IDENTITY
bf1824b3 1048 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1049 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1050
1051 outtmpl = self.params.setdefault('outtmpl', {})
1052 if not isinstance(outtmpl, dict):
1053 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1054 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1055
21cd8fae 1056 def get_output_path(self, dir_type='', filename=None):
1057 paths = self.params.get('paths', {})
d2c8aadf 1058 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
21cd8fae 1059 path = os.path.join(
1060 expand_path(paths.get('home', '').strip()),
1061 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1062 filename or '')
21cd8fae 1063 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1064
76a264ac 1065 @staticmethod
901130bb 1066 def _outtmpl_expandpath(outtmpl):
1067 # expand_path translates '%%' into '%' and '$$' into '$'
1068 # correspondingly that is not what we want since we need to keep
1069 # '%%' intact for template dict substitution step. Working around
1070 # with boundary-alike separator hack.
efa944f4 1071 sep = ''.join(random.choices(ascii_letters, k=32))
86e5f3ed 1072 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1073
1074 # outtmpl should be expand_path'ed before template dict substitution
1075 # because meta fields may contain env variables we don't want to
62b58c09 1076 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
901130bb 1077 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1078 return expand_path(outtmpl).replace(sep, '')
1079
1080 @staticmethod
1081 def escape_outtmpl(outtmpl):
1082 ''' Escape any remaining strings like %s, %abc% etc. '''
1083 return re.sub(
1084 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1085 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1086 outtmpl)
1087
1088 @classmethod
1089 def validate_outtmpl(cls, outtmpl):
76a264ac 1090 ''' @return None or Exception object '''
7d1eb38a 1091 outtmpl = re.sub(
47cdc68e 1092 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
7d1eb38a 1093 lambda mobj: f'{mobj.group(0)[:-1]}s',
1094 cls._outtmpl_expandpath(outtmpl))
76a264ac 1095 try:
7d1eb38a 1096 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1097 return None
1098 except ValueError as err:
1099 return err
1100
03b4de72 1101 @staticmethod
1102 def _copy_infodict(info_dict):
1103 info_dict = dict(info_dict)
09b49e1f 1104 info_dict.pop('__postprocessors', None)
415f8d51 1105 info_dict.pop('__pending_error', None)
03b4de72 1106 return info_dict
1107
e0fd9573 1108 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1109 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1110 @param sanitize Whether to sanitize the output as a filename.
1111 For backward compatibility, a function can also be passed
1112 """
1113
6e84b215 1114 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1115
03b4de72 1116 info_dict = self._copy_infodict(info_dict)
752cda38 1117 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1118 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1119 if info_dict.get('duration', None) is not None
1120 else None)
1d485a1a 1121 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1122 info_dict['video_autonumber'] = self._num_videos
752cda38 1123 if info_dict.get('resolution') is None:
1124 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1125
e6f21b3d 1126 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1127 # of %(field)s to %(field)0Nd for backward compatibility
1128 field_size_compat_map = {
0a5a191a 1129 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1130 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1131 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1132 }
752cda38 1133
385a27fa 1134 TMPL_DICT = {}
47cdc68e 1135 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
385a27fa 1136 MATH_FUNCTIONS = {
1137 '+': float.__add__,
1138 '-': float.__sub__,
1139 }
e625be0d 1140 # Field is of the form key1.key2...
07a1250e 1141 # where keys (except first) can be string, int, slice or "{field, ...}"
1142 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1143 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1144 'inner': FIELD_INNER_RE,
1145 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1146 }
1d485a1a 1147 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1148 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1d485a1a 1149 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
e625be0d 1150 (?P<negate>-)?
1d485a1a 1151 (?P<fields>{FIELD_RE})
1152 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1153 (?:>(?P<strf_format>.+?))?
34baa9fd 1154 (?P<remaining>
1155 (?P<alternate>(?<!\\),[^|&)]+)?
1156 (?:&(?P<replacement>.*?))?
1157 (?:\|(?P<default>.*?))?
1d485a1a 1158 )$''')
752cda38 1159
07a1250e 1160 def _traverse_infodict(fields):
1161 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1162 for f in ([x] if x.startswith('{') else x.split('.'))]
1163 for i in (0, -1):
1164 if fields and not fields[i]:
1165 fields.pop(i)
1166
1167 for i, f in enumerate(fields):
1168 if not f.startswith('{'):
1169 continue
1170 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1171 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1172
1173 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
76a264ac 1174
752cda38 1175 def get_value(mdict):
1176 # Object traversal
2b8a2973 1177 value = _traverse_infodict(mdict['fields'])
752cda38 1178 # Negative
1179 if mdict['negate']:
1180 value = float_or_none(value)
1181 if value is not None:
1182 value *= -1
1183 # Do maths
385a27fa 1184 offset_key = mdict['maths']
1185 if offset_key:
752cda38 1186 value = float_or_none(value)
1187 operator = None
385a27fa 1188 while offset_key:
1189 item = re.match(
1190 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1191 offset_key).group(0)
1192 offset_key = offset_key[len(item):]
1193 if operator is None:
752cda38 1194 operator = MATH_FUNCTIONS[item]
385a27fa 1195 continue
1196 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1197 offset = float_or_none(item)
1198 if offset is None:
2b8a2973 1199 offset = float_or_none(_traverse_infodict(item))
385a27fa 1200 try:
1201 value = operator(value, multiplier * offset)
1202 except (TypeError, ZeroDivisionError):
1203 return None
1204 operator = None
752cda38 1205 # Datetime formatting
1206 if mdict['strf_format']:
7c37ff97 1207 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1208
a6bcaf71 1209 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1210 if sanitize and value == '':
1211 value = None
752cda38 1212 return value
1213
b868936c 1214 na = self.params.get('outtmpl_na_placeholder', 'NA')
1215
e0fd9573 1216 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1217 return sanitize_filename(str(value), restricted=restricted, is_id=(
1218 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1219 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1220 else NO_DEFAULT))
e0fd9573 1221
1222 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1223 sanitize = bool(sanitize)
1224
6e84b215 1225 def _dumpjson_default(obj):
1226 if isinstance(obj, (set, LazyList)):
1227 return list(obj)
adbc4ec4 1228 return repr(obj)
6e84b215 1229
752cda38 1230 def create_key(outer_mobj):
1231 if not outer_mobj.group('has_key'):
b836dc94 1232 return outer_mobj.group(0)
752cda38 1233 key = outer_mobj.group('key')
752cda38 1234 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1235 initial_field = mobj.group('fields') if mobj else ''
e978789f 1236 value, replacement, default = None, None, na
7c37ff97 1237 while mobj:
e625be0d 1238 mobj = mobj.groupdict()
7c37ff97 1239 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1240 value = get_value(mobj)
e978789f 1241 replacement = mobj['replacement']
7c37ff97 1242 if value is None and mobj['alternate']:
34baa9fd 1243 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1244 else:
1245 break
752cda38 1246
b868936c 1247 fmt = outer_mobj.group('format')
752cda38 1248 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1249 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1250
e978789f 1251 value = default if value is None else value if replacement is None else replacement
752cda38 1252
4476d2c7 1253 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1254 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1255 if fmt[-1] == 'l': # list
4476d2c7 1256 delim = '\n' if '#' in flags else ', '
9e907ebd 1257 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1258 elif fmt[-1] == 'j': # json
deae7c17 1259 value, fmt = json.dumps(
1260 value, default=_dumpjson_default,
9b9dad11 1261 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
47cdc68e 1262 elif fmt[-1] == 'h': # html
deae7c17 1263 value, fmt = escapeHTML(str(value)), str_fmt
524e2e4f 1264 elif fmt[-1] == 'q': # quoted
4476d2c7 1265 value = map(str, variadic(value) if '#' in flags else [value])
1266 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1267 elif fmt[-1] == 'B': # bytes
0f06bcd7 1268 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1269 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1270 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1271 value, fmt = unicodedata.normalize(
1272 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1273 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1274 value), str_fmt
e0fd9573 1275 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1276 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1277 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1278 factor=1024 if '#' in flags else 1000)
37893bb0 1279 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1280 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1281 elif fmt[-1] == 'c':
524e2e4f 1282 if value:
1283 value = str(value)[0]
76a264ac 1284 else:
524e2e4f 1285 fmt = str_fmt
76a264ac 1286 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1287 value = float_or_none(value)
752cda38 1288 if value is None:
1289 value, fmt = default, 's'
901130bb 1290
752cda38 1291 if sanitize:
1292 if fmt[-1] == 'r':
1293 # If value is an object, sanitize might convert it to a string
1294 # So we convert it to repr first
7d1eb38a 1295 value, fmt = repr(value), str_fmt
639f1cea 1296 if fmt[-1] in 'csr':
e0fd9573 1297 value = sanitizer(initial_field, value)
901130bb 1298
b868936c 1299 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1300 TMPL_DICT[key] = value
b868936c 1301 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1302
385a27fa 1303 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1304
819e0531 1305 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1306 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1307 return self.escape_outtmpl(outtmpl) % info_dict
1308
5127e92a 1309 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1310 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1311 if outtmpl is None:
bf1824b3 1312 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1313 try:
5127e92a 1314 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1315 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1316 if not filename:
1317 return None
15da37c7 1318
5127e92a 1319 if tmpl_type in ('', 'temp'):
6a0546e3 1320 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1321 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1322 filename = replace_extension(filename, ext, final_ext)
5127e92a 1323 elif tmpl_type:
6a0546e3 1324 force_ext = OUTTMPL_TYPES[tmpl_type]
1325 if force_ext:
1326 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1327
bdc3fd2f
U
1328 # https://github.com/blackjack4494/youtube-dlc/issues/85
1329 trim_file_name = self.params.get('trim_file_name', False)
1330 if trim_file_name:
5c22c63d 1331 no_ext, *ext = filename.rsplit('.', 2)
1332 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1333
0202b52a 1334 return filename
8222d8de 1335 except ValueError as err:
6febd1c1 1336 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1337 return None
1338
5127e92a 1339 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1340 """Generate the output filename"""
1341 if outtmpl:
1342 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1343 dir_type = None
1344 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1345 if not filename and dir_type not in ('', 'temp'):
1346 return ''
de6000d9 1347
c84aeac6 1348 if warn:
21cd8fae 1349 if not self.params.get('paths'):
de6000d9 1350 pass
1351 elif filename == '-':
c84aeac6 1352 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1353 elif os.path.isabs(filename):
c84aeac6 1354 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1355 if filename == '-' or not filename:
1356 return filename
1357
21cd8fae 1358 return self.get_output_path(dir_type, filename)
0202b52a 1359
120fe513 1360 def _match_entry(self, info_dict, incomplete=False, silent=False):
6368e2e6 1361 """Returns None if the file should be downloaded"""
d7b460d0 1362 _type = info_dict.get('_type', 'video')
1363 assert incomplete or _type == 'video', 'Only video result can be considered complete'
8222d8de 1364
3bec830a 1365 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
c77495e3 1366
8b0d7497 1367 def check_filter():
d7b460d0 1368 if _type in ('playlist', 'multi_video'):
1369 return
1370 elif _type in ('url', 'url_transparent') and not try_call(
1371 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1372 return
1373
8b0d7497 1374 if 'title' in info_dict:
1375 # This can happen when we're just evaluating the playlist
1376 title = info_dict['title']
1377 matchtitle = self.params.get('matchtitle', False)
1378 if matchtitle:
1379 if not re.search(matchtitle, title, re.IGNORECASE):
1380 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1381 rejecttitle = self.params.get('rejecttitle', False)
1382 if rejecttitle:
1383 if re.search(rejecttitle, title, re.IGNORECASE):
1384 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
6368e2e6 1385
8b0d7497 1386 date = info_dict.get('upload_date')
1387 if date is not None:
1388 dateRange = self.params.get('daterange', DateRange())
1389 if date not in dateRange:
86e5f3ed 1390 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1391 view_count = info_dict.get('view_count')
1392 if view_count is not None:
1393 min_views = self.params.get('min_views')
1394 if min_views is not None and view_count < min_views:
1395 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1396 max_views = self.params.get('max_views')
1397 if max_views is not None and view_count > max_views:
1398 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1399 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1400 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1401
8f18aca8 1402 match_filter = self.params.get('match_filter')
1403 if match_filter is not None:
1404 try:
1405 ret = match_filter(info_dict, incomplete=incomplete)
1406 except TypeError:
1407 # For backward compatibility
1408 ret = None if incomplete else match_filter(info_dict)
492272fe 1409 if ret is NO_DEFAULT:
1410 while True:
1411 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1412 reply = input(self._format_screen(
1413 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1414 if reply in {'y', ''}:
1415 return None
1416 elif reply == 'n':
1417 return f'Skipping {video_title}'
492272fe 1418 elif ret is not None:
8f18aca8 1419 return ret
8b0d7497 1420 return None
1421
c77495e3 1422 if self.in_download_archive(info_dict):
1423 reason = '%s has already been recorded in the archive' % video_title
1424 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1425 else:
1426 reason = check_filter()
1427 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1428 if reason is not None:
120fe513 1429 if not silent:
1430 self.to_screen('[download] ' + reason)
c77495e3 1431 if self.params.get(break_opt, False):
1432 raise break_err()
8b0d7497 1433 return reason
fe7e0c98 1434
b6c45014
JMF
1435 @staticmethod
1436 def add_extra_info(info_dict, extra_info):
1437 '''Set the keys from extra_info in info dict if they are missing'''
1438 for key, value in extra_info.items():
1439 info_dict.setdefault(key, value)
1440
409e1828 1441 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1442 process=True, force_generic_extractor=False):
41d1cca3 1443 """
17ffed18 1444 Extract and return the information dictionary of the URL
41d1cca3 1445
1446 Arguments:
17ffed18 1447 @param url URL to extract
41d1cca3 1448
1449 Keyword arguments:
17ffed18 1450 @param download Whether to download videos
1451 @param process Whether to resolve all unresolved references (URLs, playlist items).
1452 Must be True for download to work
1453 @param ie_key Use only the extractor with this key
1454
1455 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1456 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
41d1cca3 1457 """
fe7e0c98 1458
409e1828 1459 if extra_info is None:
1460 extra_info = {}
1461
61aa5ba3 1462 if not ie_key and force_generic_extractor:
d22dec74
S
1463 ie_key = 'Generic'
1464
8222d8de 1465 if ie_key:
fe7866d0 1466 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
8222d8de
JMF
1467 else:
1468 ies = self._ies
1469
fe7866d0 1470 for key, ie in ies.items():
8222d8de
JMF
1471 if not ie.suitable(url):
1472 continue
1473
1474 if not ie.working():
6febd1c1
PH
1475 self.report_warning('The program functionality for this site has been marked as broken, '
1476 'and will probably not work.')
8222d8de 1477
1151c407 1478 temp_id = ie.get_temp_id(url)
fe7866d0 1479 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1480 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
5e5be0c0 1481 if self.params.get('break_on_existing', False):
1482 raise ExistingVideoReached()
a0566bbf 1483 break
fe7866d0 1484 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
a0566bbf 1485 else:
fe7866d0 1486 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1487 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1488 tb=False if extractors_restricted else None)
a0566bbf 1489
7e88d7d7 1490 def _handle_extraction_exceptions(func):
b5ae35ee 1491 @functools.wraps(func)
a0566bbf 1492 def wrapper(self, *args, **kwargs):
6da22e7d 1493 while True:
1494 try:
1495 return func(self, *args, **kwargs)
1496 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1497 raise
6da22e7d 1498 except ReExtractInfo as e:
1499 if e.expected:
1500 self.to_screen(f'{e}; Re-extracting data')
1501 else:
1502 self.to_stderr('\r')
1503 self.report_warning(f'{e}; Re-extracting data')
1504 continue
1505 except GeoRestrictedError as e:
1506 msg = e.msg
1507 if e.countries:
1508 msg += '\nThis video is available in %s.' % ', '.join(
1509 map(ISO3166Utils.short2full, e.countries))
1510 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1511 self.report_error(msg)
1512 except ExtractorError as e: # An error we somewhat expected
1513 self.report_error(str(e), e.format_traceback())
1514 except Exception as e:
1515 if self.params.get('ignoreerrors'):
1516 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1517 else:
1518 raise
1519 break
a0566bbf 1520 return wrapper
1521
693f0600 1522 def _wait_for_video(self, ie_result={}):
f2ebc5c7 1523 if (not self.params.get('wait_for_video')
1524 or ie_result.get('_type', 'video') != 'video'
1525 or ie_result.get('formats') or ie_result.get('url')):
1526 return
1527
1528 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1529 last_msg = ''
1530
1531 def progress(msg):
1532 nonlocal last_msg
a7dc6a89 1533 full_msg = f'{msg}\n'
1534 if not self.params.get('noprogress'):
1535 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1536 elif last_msg:
1537 return
1538 self.to_screen(full_msg, skip_eol=True)
f2ebc5c7 1539 last_msg = msg
1540
1541 min_wait, max_wait = self.params.get('wait_for_video')
1542 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1543 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1544 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1545 self.report_warning('Release time of video is not known')
693f0600 1546 elif ie_result and (diff or 0) <= 0:
f2ebc5c7 1547 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1548 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1549 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1550
1551 wait_till = time.time() + diff
1552 try:
1553 while True:
1554 diff = wait_till - time.time()
1555 if diff <= 0:
1556 progress('')
1557 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1558 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1559 time.sleep(1)
1560 except KeyboardInterrupt:
1561 progress('')
1562 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1563 except BaseException as e:
1564 if not isinstance(e, ReExtractInfo):
1565 self.to_screen('')
1566 raise
1567
7e88d7d7 1568 @_handle_extraction_exceptions
58f197b7 1569 def __extract_info(self, url, ie, download, extra_info, process):
693f0600 1570 try:
1571 ie_result = ie.extract(url)
1572 except UserNotLive as e:
1573 if process:
1574 if self.params.get('wait_for_video'):
1575 self.report_warning(e)
1576 self._wait_for_video()
1577 raise
a0566bbf 1578 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
cb794ee0 1579 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
a0566bbf 1580 return
1581 if isinstance(ie_result, list):
1582 # Backwards compatibility: old IE result format
1583 ie_result = {
1584 '_type': 'compat_list',
1585 'entries': ie_result,
1586 }
e37d0efb 1587 if extra_info.get('original_url'):
1588 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1589 self.add_default_extra_info(ie_result, ie, url)
1590 if process:
f2ebc5c7 1591 self._wait_for_video(ie_result)
a0566bbf 1592 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1593 else:
a0566bbf 1594 return ie_result
fe7e0c98 1595
ea38e55f 1596 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1597 if url is not None:
1598 self.add_extra_info(ie_result, {
1599 'webpage_url': url,
1600 'original_url': url,
57ebfca3 1601 })
1602 webpage_url = ie_result.get('webpage_url')
1603 if webpage_url:
1604 self.add_extra_info(ie_result, {
1605 'webpage_url_basename': url_basename(webpage_url),
1606 'webpage_url_domain': get_domain(webpage_url),
6033d980 1607 })
1608 if ie is not None:
1609 self.add_extra_info(ie_result, {
1610 'extractor': ie.IE_NAME,
1611 'extractor_key': ie.ie_key(),
1612 })
ea38e55f 1613
58adec46 1614 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1615 """
1616 Take the result of the ie(may be modified) and resolve all unresolved
1617 references (URLs, playlist items).
1618
1619 It will also download the videos if 'download'.
1620 Returns the resolved ie_result.
1621 """
58adec46 1622 if extra_info is None:
1623 extra_info = {}
e8ee972c
PH
1624 result_type = ie_result.get('_type', 'video')
1625
057a5206 1626 if result_type in ('url', 'url_transparent'):
8f97a15d 1627 ie_result['url'] = sanitize_url(
1628 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
8791e78c 1629 if ie_result.get('original_url') and not extra_info.get('original_url'):
1630 extra_info = {'original_url': ie_result['original_url'], **extra_info}
e37d0efb 1631
057a5206 1632 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1633 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1634 or extract_flat is True):
ecb54191 1635 info_copy = ie_result.copy()
6033d980 1636 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1637 if ie and not ie_result.get('id'):
4614bc22 1638 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1639 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1640 self.add_extra_info(info_copy, extra_info)
b5475f11 1641 info_copy, _ = self.pre_process(info_copy)
94dc8604 1642 self._fill_common_fields(info_copy, False)
ecb54191 1643 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
415f8d51 1644 self._raise_pending_errors(info_copy)
4614bc22 1645 if self.params.get('force_write_download_archive', False):
1646 self.record_download_archive(info_copy)
e8ee972c
PH
1647 return ie_result
1648
8222d8de 1649 if result_type == 'video':
b6c45014 1650 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1651 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1652 self._raise_pending_errors(ie_result)
28b0eb0f 1653 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1654 if additional_urls:
e9f4ccd1 1655 # TODO: Improve MetadataParserPP to allow setting a list
14f25df2 1656 if isinstance(additional_urls, str):
9c2b75b5 1657 additional_urls = [additional_urls]
1658 self.to_screen(
1659 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1660 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1661 ie_result['additional_entries'] = [
1662 self.extract_info(
b69fd25c 1663 url, download, extra_info=extra_info,
9c2b75b5 1664 force_generic_extractor=self.params.get('force_generic_extractor'))
1665 for url in additional_urls
1666 ]
1667 return ie_result
8222d8de
JMF
1668 elif result_type == 'url':
1669 # We have to add extra_info to the results because it may be
1670 # contained in a playlist
07cce701 1671 return self.extract_info(
1672 ie_result['url'], download,
1673 ie_key=ie_result.get('ie_key'),
1674 extra_info=extra_info)
7fc3fa05
PH
1675 elif result_type == 'url_transparent':
1676 # Use the information from the embedding page
1677 info = self.extract_info(
1678 ie_result['url'], ie_key=ie_result.get('ie_key'),
1679 extra_info=extra_info, download=False, process=False)
1680
1640eb09
S
1681 # extract_info may return None when ignoreerrors is enabled and
1682 # extraction failed with an error, don't crash and return early
1683 # in this case
1684 if not info:
1685 return info
1686
3975b4d2 1687 exempted_fields = {'_type', 'url', 'ie_key'}
1688 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1689 # For video clips, the id etc of the clip extractor should be used
1690 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1691
412c617d 1692 new_result = info.copy()
3975b4d2 1693 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1694
0563f7ac
S
1695 # Extracted info may not be a video result (i.e.
1696 # info.get('_type', 'video') != video) but rather an url or
1697 # url_transparent. In such cases outer metadata (from ie_result)
1698 # should be propagated to inner one (info). For this to happen
1699 # _type of info should be overridden with url_transparent. This
067aa17e 1700 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1701 if new_result.get('_type') == 'url':
1702 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1703
1704 return self.process_ie_result(
1705 new_result, download=download, extra_info=extra_info)
40fcba5e 1706 elif result_type in ('playlist', 'multi_video'):
30a074c2 1707 # Protect from infinite recursion due to recursively nested playlists
1708 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
0bd5a039 1709 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1710 if webpage_url and webpage_url in self._playlist_urls:
7e85e872 1711 self.to_screen(
30a074c2 1712 '[download] Skipping already downloaded playlist: %s'
1713 % ie_result.get('title') or ie_result.get('id'))
1714 return
7e85e872 1715
30a074c2 1716 self._playlist_level += 1
1717 self._playlist_urls.add(webpage_url)
03f83004 1718 self._fill_common_fields(ie_result, False)
bc516a3f 1719 self._sanitize_thumbnails(ie_result)
30a074c2 1720 try:
1721 return self.__process_playlist(ie_result, download)
1722 finally:
1723 self._playlist_level -= 1
1724 if not self._playlist_level:
1725 self._playlist_urls.clear()
8222d8de 1726 elif result_type == 'compat_list':
c9bf4114
PH
1727 self.report_warning(
1728 'Extractor %s returned a compat_list result. '
1729 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1730
8222d8de 1731 def _fixup(r):
b868936c 1732 self.add_extra_info(r, {
1733 'extractor': ie_result['extractor'],
1734 'webpage_url': ie_result['webpage_url'],
1735 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1736 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1737 'extractor_key': ie_result['extractor_key'],
1738 })
8222d8de
JMF
1739 return r
1740 ie_result['entries'] = [
b6c45014 1741 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1742 for r in ie_result['entries']
1743 ]
1744 return ie_result
1745 else:
1746 raise Exception('Invalid result type: %s' % result_type)
1747
e92caff5 1748 def _ensure_dir_exists(self, path):
1749 return make_dir(path, self.report_error)
1750
3b603dbd 1751 @staticmethod
3bec830a 1752 def _playlist_infodict(ie_result, strict=False, **kwargs):
1753 info = {
1754 'playlist_count': ie_result.get('playlist_count'),
3b603dbd 1755 'playlist': ie_result.get('title') or ie_result.get('id'),
1756 'playlist_id': ie_result.get('id'),
1757 'playlist_title': ie_result.get('title'),
1758 'playlist_uploader': ie_result.get('uploader'),
1759 'playlist_uploader_id': ie_result.get('uploader_id'),
3b603dbd 1760 **kwargs,
1761 }
3bec830a 1762 if strict:
1763 return info
0bd5a039 1764 if ie_result.get('webpage_url'):
1765 info.update({
1766 'webpage_url': ie_result['webpage_url'],
1767 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1768 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1769 })
3bec830a 1770 return {
1771 **info,
1772 'playlist_index': 0,
1773 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1774 'extractor': ie_result['extractor'],
3bec830a 1775 'extractor_key': ie_result['extractor_key'],
1776 }
3b603dbd 1777
30a074c2 1778 def __process_playlist(self, ie_result, download):
7e88d7d7 1779 """Process each entry in the playlist"""
f5ea4748 1780 assert ie_result['_type'] in ('playlist', 'multi_video')
1781
3bec830a 1782 common_info = self._playlist_infodict(ie_result, strict=True)
3955b207 1783 title = common_info.get('playlist') or '<Untitled>'
3bec830a 1784 if self._match_entry(common_info, incomplete=True) is not None:
1785 return
c6e07cf1 1786 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
f0d785d3 1787
7e88d7d7 1788 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1789 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1790
1791 lazy = self.params.get('lazy_playlist')
1792 if lazy:
1793 resolved_entries, n_entries = [], 'N/A'
1794 ie_result['requested_entries'], ie_result['entries'] = None, None
1795 else:
1796 entries = resolved_entries = list(entries)
1797 n_entries = len(resolved_entries)
1798 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1799 if not ie_result.get('playlist_count'):
1800 # Better to do this after potentially exhausting entries
1801 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1802
0647d925 1803 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1804 ie_copy = collections.ChainMap(ie_result, extra)
3bec830a 1805
e08a85d8 1806 _infojson_written = False
0bfc53d0 1807 write_playlist_files = self.params.get('allow_playlist_files', True)
1808 if write_playlist_files and self.params.get('list_thumbnails'):
1809 self.list_thumbnails(ie_result)
1810 if write_playlist_files and not self.params.get('simulate'):
e08a85d8 1811 _infojson_written = self._write_info_json(
1812 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1813 if _infojson_written is None:
80c03fa9 1814 return
1815 if self._write_description('playlist', ie_result,
1816 self.prepare_filename(ie_copy, 'pl_description')) is None:
1817 return
681de68e 1818 # TODO: This should be passed to ThumbnailsConvertor if necessary
3bec830a 1819 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1820
7e9a6125 1821 if lazy:
1822 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1823 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1824 elif self.params.get('playlistreverse'):
1825 entries.reverse()
1826 elif self.params.get('playlistrandom'):
30a074c2 1827 random.shuffle(entries)
1828
bc5c2f8a 1829 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
7e88d7d7 1830 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1831
134c913c 1832 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1833 if self.params.get('extract_flat') == 'discard_in_playlist':
1834 keep_resolved_entries = ie_result['_type'] != 'playlist'
1835 if keep_resolved_entries:
1836 self.write_debug('The information of all playlist entries will be held in memory')
1837
26e2805c 1838 failures = 0
1839 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 1840 for i, (playlist_index, entry) in enumerate(entries):
1841 if lazy:
1842 resolved_entries.append((playlist_index, entry))
3bec830a 1843 if not entry:
7e88d7d7 1844 continue
1845
7e88d7d7 1846 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
7e9a6125 1847 if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1848 playlist_index = ie_result['requested_entries'][i]
1849
0647d925 1850 entry_copy = collections.ChainMap(entry, {
3bec830a 1851 **common_info,
3955b207 1852 'n_entries': int_or_none(n_entries),
71729754 1853 'playlist_index': playlist_index,
7e9a6125 1854 'playlist_autonumber': i + 1,
0647d925 1855 })
3bec830a 1856
0647d925 1857 if self._match_entry(entry_copy, incomplete=True) is not None:
f0ad6f8c 1858 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1859 resolved_entries[i] = (playlist_index, NO_DEFAULT)
3bec830a 1860 continue
1861
bc5c2f8a 1862 self.to_screen('[download] Downloading item %s of %s' % (
3bec830a 1863 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1864
a6ca61d4 1865 extra.update({
1866 'playlist_index': playlist_index,
1867 'playlist_autonumber': i + 1,
1868 })
3bec830a 1869 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1870 if not entry_result:
1871 failures += 1
1872 if failures >= max_failures:
1873 self.report_error(
7e88d7d7 1874 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 1875 break
134c913c 1876 if keep_resolved_entries:
1877 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 1878
1879 # Update with processed data
f0ad6f8c 1880 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
bc5c2f8a 1881 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1882 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1883 # Do not set for full playlist
1884 ie_result.pop('requested_entries')
e08a85d8 1885
1886 # Write the updated info to json
cb96c5be 1887 if _infojson_written is True and self._write_info_json(
e08a85d8 1888 'updated playlist', ie_result,
1889 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1890 return
ca30f449 1891
ed5835b4 1892 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 1893 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 1894 return ie_result
1895
7e88d7d7 1896 @_handle_extraction_exceptions
a0566bbf 1897 def __process_iterable_entry(self, entry, download, extra_info):
1898 return self.process_ie_result(
1899 entry, download=download, extra_info=extra_info)
1900
67134eab
JMF
1901 def _build_format_filter(self, filter_spec):
1902 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1903
1904 OPERATORS = {
1905 '<': operator.lt,
1906 '<=': operator.le,
1907 '>': operator.gt,
1908 '>=': operator.ge,
1909 '=': operator.eq,
1910 '!=': operator.ne,
1911 }
67134eab 1912 operator_rex = re.compile(r'''(?x)\s*
187986a8 1913 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1914 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1915 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1916 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1917 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1918 if m:
1919 try:
1920 comparison_value = int(m.group('value'))
1921 except ValueError:
1922 comparison_value = parse_filesize(m.group('value'))
1923 if comparison_value is None:
1924 comparison_value = parse_filesize(m.group('value') + 'B')
1925 if comparison_value is None:
1926 raise ValueError(
1927 'Invalid value %r in format specification %r' % (
67134eab 1928 m.group('value'), filter_spec))
9ddb6925
S
1929 op = OPERATORS[m.group('op')]
1930
083c9df9 1931 if not m:
9ddb6925
S
1932 STR_OPERATORS = {
1933 '=': operator.eq,
10d33b34
YCH
1934 '^=': lambda attr, value: attr.startswith(value),
1935 '$=': lambda attr, value: attr.endswith(value),
1936 '*=': lambda attr, value: value in attr,
1ce9a3cb 1937 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1938 }
187986a8 1939 str_operator_rex = re.compile(r'''(?x)\s*
1940 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1941 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1942 (?P<quote>["'])?
1943 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1944 (?(quote)(?P=quote))\s*
9ddb6925 1945 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1946 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1947 if m:
1ce9a3cb
LF
1948 if m.group('op') == '~=':
1949 comparison_value = re.compile(m.group('value'))
1950 else:
1951 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1952 str_op = STR_OPERATORS[m.group('op')]
1953 if m.group('negation'):
e118a879 1954 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1955 else:
1956 op = str_op
083c9df9 1957
9ddb6925 1958 if not m:
187986a8 1959 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1960
1961 def _filter(f):
1962 actual_value = f.get(m.group('key'))
1963 if actual_value is None:
1964 return m.group('none_inclusive')
1965 return op(actual_value, comparison_value)
67134eab
JMF
1966 return _filter
1967
9f1a1c36 1968 def _check_formats(self, formats):
1969 for f in formats:
1970 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1971 path = self.get_output_path('temp')
1972 if not self._ensure_dir_exists(f'{path}/'):
1973 continue
1974 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1975 temp_file.close()
1976 try:
1977 success, _ = self.dl(temp_file.name, f, test=True)
8a82af35 1978 except (DownloadError, OSError, ValueError) + network_exceptions:
9f1a1c36 1979 success = False
1980 finally:
1981 if os.path.exists(temp_file.name):
1982 try:
1983 os.remove(temp_file.name)
1984 except OSError:
1985 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1986 if success:
1987 yield f
1988 else:
1989 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1990
0017d9ad 1991 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1992
af0f7428
S
1993 def can_merge():
1994 merger = FFmpegMergerPP(self)
1995 return merger.available and merger.can_merge()
1996
91ebc640 1997 prefer_best = (
b7b04c78 1998 not self.params.get('simulate')
91ebc640 1999 and download
2000 and (
2001 not can_merge()
21633673 2002 or info_dict.get('is_live') and not self.params.get('live_from_start')
bf1824b3 2003 or self.params['outtmpl']['default'] == '-'))
53ed7066 2004 compat = (
2005 prefer_best
2006 or self.params.get('allow_multiple_audio_streams', False)
8a82af35 2007 or 'format-spec' in self.params['compat_opts'])
91ebc640 2008
2009 return (
53ed7066 2010 'best/bestvideo+bestaudio' if prefer_best
2011 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 2012 else 'bestvideo+bestaudio/best')
0017d9ad 2013
67134eab
JMF
2014 def build_format_selector(self, format_spec):
2015 def syntax_error(note, start):
2016 message = (
2017 'Invalid format specification: '
86e5f3ed 2018 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
2019 return SyntaxError(message)
2020
2021 PICKFIRST = 'PICKFIRST'
2022 MERGE = 'MERGE'
2023 SINGLE = 'SINGLE'
0130afb7 2024 GROUP = 'GROUP'
67134eab
JMF
2025 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2026
91ebc640 2027 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2028 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 2029
9f1a1c36 2030 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 2031
67134eab
JMF
2032 def _parse_filter(tokens):
2033 filter_parts = []
2034 for type, string, start, _, _ in tokens:
2035 if type == tokenize.OP and string == ']':
2036 return ''.join(filter_parts)
2037 else:
2038 filter_parts.append(string)
2039
232541df 2040 def _remove_unused_ops(tokens):
62b58c09
L
2041 # Remove operators that we don't use and join them with the surrounding strings.
2042 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
232541df
JMF
2043 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2044 last_string, last_start, last_end, last_line = None, None, None, None
2045 for type, string, start, end, line in tokens:
2046 if type == tokenize.OP and string == '[':
2047 if last_string:
2048 yield tokenize.NAME, last_string, last_start, last_end, last_line
2049 last_string = None
2050 yield type, string, start, end, line
2051 # everything inside brackets will be handled by _parse_filter
2052 for type, string, start, end, line in tokens:
2053 yield type, string, start, end, line
2054 if type == tokenize.OP and string == ']':
2055 break
2056 elif type == tokenize.OP and string in ALLOWED_OPS:
2057 if last_string:
2058 yield tokenize.NAME, last_string, last_start, last_end, last_line
2059 last_string = None
2060 yield type, string, start, end, line
2061 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2062 if not last_string:
2063 last_string = string
2064 last_start = start
2065 last_end = end
2066 else:
2067 last_string += string
2068 if last_string:
2069 yield tokenize.NAME, last_string, last_start, last_end, last_line
2070
cf2ac6df 2071 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2072 selectors = []
2073 current_selector = None
2074 for type, string, start, _, _ in tokens:
2075 # ENCODING is only defined in python 3.x
2076 if type == getattr(tokenize, 'ENCODING', None):
2077 continue
2078 elif type in [tokenize.NAME, tokenize.NUMBER]:
2079 current_selector = FormatSelector(SINGLE, string, [])
2080 elif type == tokenize.OP:
cf2ac6df
JMF
2081 if string == ')':
2082 if not inside_group:
2083 # ')' will be handled by the parentheses group
2084 tokens.restore_last_token()
67134eab 2085 break
cf2ac6df 2086 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2087 tokens.restore_last_token()
2088 break
cf2ac6df
JMF
2089 elif inside_choice and string == ',':
2090 tokens.restore_last_token()
2091 break
2092 elif string == ',':
0a31a350
JMF
2093 if not current_selector:
2094 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2095 selectors.append(current_selector)
2096 current_selector = None
2097 elif string == '/':
d96d604e
JMF
2098 if not current_selector:
2099 raise syntax_error('"/" must follow a format selector', start)
67134eab 2100 first_choice = current_selector
cf2ac6df 2101 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2102 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2103 elif string == '[':
2104 if not current_selector:
2105 current_selector = FormatSelector(SINGLE, 'best', [])
2106 format_filter = _parse_filter(tokens)
2107 current_selector.filters.append(format_filter)
0130afb7
JMF
2108 elif string == '(':
2109 if current_selector:
2110 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2111 group = _parse_format_selection(tokens, inside_group=True)
2112 current_selector = FormatSelector(GROUP, group, [])
67134eab 2113 elif string == '+':
d03cfdce 2114 if not current_selector:
2115 raise syntax_error('Unexpected "+"', start)
2116 selector_1 = current_selector
2117 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2118 if not selector_2:
2119 raise syntax_error('Expected a selector', start)
2120 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2121 else:
86e5f3ed 2122 raise syntax_error(f'Operator not recognized: "{string}"', start)
67134eab
JMF
2123 elif type == tokenize.ENDMARKER:
2124 break
2125 if current_selector:
2126 selectors.append(current_selector)
2127 return selectors
2128
f8d4ad9a 2129 def _merge(formats_pair):
2130 format_1, format_2 = formats_pair
2131
2132 formats_info = []
2133 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2134 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2135
2136 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2137 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2138 for (i, fmt_info) in enumerate(formats_info):
551f9388 2139 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2140 formats_info.pop(i)
2141 continue
2142 for aud_vid in ['audio', 'video']:
f8d4ad9a 2143 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2144 if get_no_more[aud_vid]:
2145 formats_info.pop(i)
f5510afe 2146 break
f8d4ad9a 2147 get_no_more[aud_vid] = True
2148
2149 if len(formats_info) == 1:
2150 return formats_info[0]
2151
2152 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2153 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2154
2155 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2156 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2157
fc61aff4
LL
2158 output_ext = get_compatible_ext(
2159 vcodecs=[f.get('vcodec') for f in video_fmts],
2160 acodecs=[f.get('acodec') for f in audio_fmts],
2161 vexts=[f['ext'] for f in video_fmts],
2162 aexts=[f['ext'] for f in audio_fmts],
2163 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2164 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
f8d4ad9a 2165
975a0d0d 2166 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2167
f8d4ad9a 2168 new_dict = {
2169 'requested_formats': formats_info,
975a0d0d 2170 'format': '+'.join(filtered('format')),
2171 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2172 'ext': output_ext,
975a0d0d 2173 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2174 'language': '+'.join(orderedSet(filtered('language'))) or None,
2175 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2176 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2177 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2178 }
2179
2180 if the_only_video:
2181 new_dict.update({
2182 'width': the_only_video.get('width'),
2183 'height': the_only_video.get('height'),
2184 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2185 'fps': the_only_video.get('fps'),
49a57e70 2186 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2187 'vcodec': the_only_video.get('vcodec'),
2188 'vbr': the_only_video.get('vbr'),
2189 'stretched_ratio': the_only_video.get('stretched_ratio'),
105bfd90 2190 'aspect_ratio': the_only_video.get('aspect_ratio'),
f8d4ad9a 2191 })
2192
2193 if the_only_audio:
2194 new_dict.update({
2195 'acodec': the_only_audio.get('acodec'),
2196 'abr': the_only_audio.get('abr'),
975a0d0d 2197 'asr': the_only_audio.get('asr'),
b8ed0f15 2198 'audio_channels': the_only_audio.get('audio_channels')
f8d4ad9a 2199 })
2200
2201 return new_dict
2202
e8e73840 2203 def _check_formats(formats):
981052c9 2204 if not check_formats:
2205 yield from formats
b5ac45b1 2206 return
9f1a1c36 2207 yield from self._check_formats(formats)
e8e73840 2208
67134eab 2209 def _build_selector_function(selector):
909d24dd 2210 if isinstance(selector, list): # ,
67134eab
JMF
2211 fs = [_build_selector_function(s) for s in selector]
2212
317f7ab6 2213 def selector_function(ctx):
67134eab 2214 for f in fs:
981052c9 2215 yield from f(ctx)
67134eab 2216 return selector_function
909d24dd 2217
2218 elif selector.type == GROUP: # ()
0130afb7 2219 selector_function = _build_selector_function(selector.selector)
909d24dd 2220
2221 elif selector.type == PICKFIRST: # /
67134eab
JMF
2222 fs = [_build_selector_function(s) for s in selector.selector]
2223
317f7ab6 2224 def selector_function(ctx):
67134eab 2225 for f in fs:
317f7ab6 2226 picked_formats = list(f(ctx))
67134eab
JMF
2227 if picked_formats:
2228 return picked_formats
2229 return []
67134eab 2230
981052c9 2231 elif selector.type == MERGE: # +
2232 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2233
2234 def selector_function(ctx):
adbc4ec4 2235 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2236 yield _merge(pair)
2237
909d24dd 2238 elif selector.type == SINGLE: # atom
598d185d 2239 format_spec = selector.selector or 'best'
909d24dd 2240
f8d4ad9a 2241 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2242 if format_spec == 'all':
2243 def selector_function(ctx):
9222c381 2244 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2245 elif format_spec == 'mergeall':
2246 def selector_function(ctx):
316f2650 2247 formats = list(_check_formats(
2248 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2249 if not formats:
2250 return
921b76ca 2251 merged_format = formats[-1]
2252 for f in formats[-2::-1]:
f8d4ad9a 2253 merged_format = _merge((merged_format, f))
2254 yield merged_format
909d24dd 2255
2256 else:
85e801a9 2257 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2258 mobj = re.match(
2259 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2260 format_spec)
2261 if mobj is not None:
2262 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2263 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2264 format_type = (mobj.group('type') or [None])[0]
2265 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2266 format_modified = mobj.group('mod') is not None
909d24dd 2267
2268 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2269 _filter_f = (
eff63539 2270 (lambda f: f.get('%scodec' % format_type) != 'none')
2271 if format_type and format_modified # bv*, ba*, wv*, wa*
2272 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2273 if format_type # bv, ba, wv, wa
2274 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2275 if not format_modified # b, w
8326b00a 2276 else lambda f: True) # b*, w*
2277 filter_f = lambda f: _filter_f(f) and (
2278 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2279 else:
48ee10ee 2280 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2281 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2282 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2283 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2284 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2285 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2286 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2287 else:
b5ae35ee 2288 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2289
2290 def selector_function(ctx):
2291 formats = list(ctx['formats'])
909d24dd 2292 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2293 if not matches:
2294 if format_fallback and ctx['incomplete_formats']:
2295 # for extractors with incomplete formats (audio only (soundcloud)
2296 # or video only (imgur)) best/worst will fallback to
2297 # best/worst {video,audio}-only format
2298 matches = formats
2299 elif seperate_fallback and not ctx['has_merged_format']:
2300 # for compatibility with youtube-dl when there is no pre-merged format
2301 matches = list(filter(seperate_fallback, formats))
981052c9 2302 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2303 try:
e8e73840 2304 yield matches[format_idx - 1]
4abea8ca 2305 except LazyList.IndexError:
981052c9 2306 return
083c9df9 2307
67134eab 2308 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2309
317f7ab6 2310 def final_selector(ctx):
adbc4ec4 2311 ctx_copy = dict(ctx)
67134eab 2312 for _filter in filters:
317f7ab6
S
2313 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2314 return selector_function(ctx_copy)
67134eab 2315 return final_selector
083c9df9 2316
0f06bcd7 2317 stream = io.BytesIO(format_spec.encode())
0130afb7 2318 try:
f9934b96 2319 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2320 except tokenize.TokenError:
2321 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2322
86e5f3ed 2323 class TokenIterator:
0130afb7
JMF
2324 def __init__(self, tokens):
2325 self.tokens = tokens
2326 self.counter = 0
2327
2328 def __iter__(self):
2329 return self
2330
2331 def __next__(self):
2332 if self.counter >= len(self.tokens):
2333 raise StopIteration()
2334 value = self.tokens[self.counter]
2335 self.counter += 1
2336 return value
2337
2338 next = __next__
2339
2340 def restore_last_token(self):
2341 self.counter -= 1
2342
2343 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2344 return _build_selector_function(parsed_selector)
a9c58ad9 2345
e5660ee6 2346 def _calc_headers(self, info_dict):
8b7539d2 2347 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6 2348
c487cf00 2349 cookies = self._calc_cookies(info_dict['url'])
e5660ee6
JMF
2350 if cookies:
2351 res['Cookie'] = cookies
2352
0016b84e
S
2353 if 'X-Forwarded-For' not in res:
2354 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2355 if x_forwarded_for_ip:
2356 res['X-Forwarded-For'] = x_forwarded_for_ip
2357
e5660ee6
JMF
2358 return res
2359
c487cf00 2360 def _calc_cookies(self, url):
2361 pr = sanitized_Request(url)
e5660ee6 2362 self.cookiejar.add_cookie_header(pr)
662435f7 2363 return pr.get_header('Cookie')
e5660ee6 2364
9f1a1c36 2365 def _sort_thumbnails(self, thumbnails):
2366 thumbnails.sort(key=lambda t: (
2367 t.get('preference') if t.get('preference') is not None else -1,
2368 t.get('width') if t.get('width') is not None else -1,
2369 t.get('height') if t.get('height') is not None else -1,
2370 t.get('id') if t.get('id') is not None else '',
2371 t.get('url')))
2372
b0249bca 2373 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2374 thumbnails = info_dict.get('thumbnails')
2375 if thumbnails is None:
2376 thumbnail = info_dict.get('thumbnail')
2377 if thumbnail:
2378 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2379 if not thumbnails:
2380 return
2381
2382 def check_thumbnails(thumbnails):
2383 for t in thumbnails:
2384 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2385 try:
2386 self.urlopen(HEADRequest(t['url']))
2387 except network_exceptions as err:
2388 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2389 continue
2390 yield t
2391
2392 self._sort_thumbnails(thumbnails)
2393 for i, t in enumerate(thumbnails):
2394 if t.get('id') is None:
2395 t['id'] = '%d' % i
2396 if t.get('width') and t.get('height'):
2397 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2398 t['url'] = sanitize_url(t['url'])
2399
2400 if self.params.get('check_formats') is True:
282f5709 2401 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2402 else:
2403 info_dict['thumbnails'] = thumbnails
bc516a3f 2404
94dc8604 2405 def _fill_common_fields(self, info_dict, final=True):
03f83004 2406 # TODO: move sanitization here
94dc8604 2407 if final:
d4736fdb 2408 title = info_dict.get('title', NO_DEFAULT)
2409 if title is NO_DEFAULT:
03f83004
LNO
2410 raise ExtractorError('Missing "title" field in extractor result',
2411 video_id=info_dict['id'], ie=info_dict['extractor'])
d4736fdb 2412 info_dict['fulltitle'] = title
2413 if not title:
2414 if title == '':
2415 self.write_debug('Extractor gave empty title. Creating a generic title')
2416 else:
2417 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2418 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2419
2420 if info_dict.get('duration') is not None:
2421 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2422
2423 for ts_key, date_key in (
2424 ('timestamp', 'upload_date'),
2425 ('release_timestamp', 'release_date'),
2426 ('modified_timestamp', 'modified_date'),
2427 ):
2428 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2429 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2430 # see http://bugs.python.org/issue1646728)
19a03940 2431 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2432 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2433 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2434
2435 live_keys = ('is_live', 'was_live')
2436 live_status = info_dict.get('live_status')
2437 if live_status is None:
2438 for key in live_keys:
2439 if info_dict.get(key) is False:
2440 continue
2441 if info_dict.get(key):
2442 live_status = key
2443 break
2444 if all(info_dict.get(key) is False for key in live_keys):
2445 live_status = 'not_live'
2446 if live_status:
2447 info_dict['live_status'] = live_status
2448 for key in live_keys:
2449 if info_dict.get(key) is None:
2450 info_dict[key] = (live_status == key)
a057779d 2451 if live_status == 'post_live':
2452 info_dict['was_live'] = True
03f83004
LNO
2453
2454 # Auto generate title fields corresponding to the *_number fields when missing
2455 # in order to always have clean titles. This is very common for TV series.
2456 for field in ('chapter', 'season', 'episode'):
94dc8604 2457 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
03f83004
LNO
2458 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2459
415f8d51 2460 def _raise_pending_errors(self, info):
2461 err = info.pop('__pending_error', None)
2462 if err:
2463 self.report_error(err, tb=False)
2464
784320c9 2465 def sort_formats(self, info_dict):
2466 formats = self._get_formats(info_dict)
2467 if not formats:
2468 return
2469 # Backward compatibility with InfoExtractor._sort_formats
2470 field_preference = formats[0].pop('__sort_fields', None)
2471 if field_preference:
2472 info_dict['_format_sort_fields'] = field_preference
2473
2474 formats.sort(key=FormatSorter(
2475 self, info_dict.get('_format_sort_fields', [])).calculate_preference)
2476
dd82ffea
JMF
2477 def process_video_result(self, info_dict, download=True):
2478 assert info_dict.get('_type', 'video') == 'video'
9c906919 2479 self._num_videos += 1
dd82ffea 2480
bec1fad2 2481 if 'id' not in info_dict:
fc08bdd6 2482 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2483 elif not info_dict.get('id'):
2484 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2485
c9969434
S
2486 def report_force_conversion(field, field_not, conversion):
2487 self.report_warning(
2488 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2489 % (field, field_not, conversion))
2490
2491 def sanitize_string_field(info, string_field):
2492 field = info.get(string_field)
14f25df2 2493 if field is None or isinstance(field, str):
c9969434
S
2494 return
2495 report_force_conversion(string_field, 'a string', 'string')
14f25df2 2496 info[string_field] = str(field)
c9969434
S
2497
2498 def sanitize_numeric_fields(info):
2499 for numeric_field in self._NUMERIC_FIELDS:
2500 field = info.get(numeric_field)
f9934b96 2501 if field is None or isinstance(field, (int, float)):
c9969434
S
2502 continue
2503 report_force_conversion(numeric_field, 'numeric', 'int')
2504 info[numeric_field] = int_or_none(field)
2505
2506 sanitize_string_field(info_dict, 'id')
2507 sanitize_numeric_fields(info_dict)
3975b4d2 2508 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2509 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2510 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2511 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2512
9eef7c4e 2513 chapters = info_dict.get('chapters') or []
a3976e07 2514 if chapters and chapters[0].get('start_time'):
2515 chapters.insert(0, {'start_time': 0})
2516
9eef7c4e 2517 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
a3976e07 2518 for idx, (prev, current, next_) in enumerate(zip(
2519 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
9eef7c4e 2520 if current.get('start_time') is None:
2521 current['start_time'] = prev.get('end_time')
2522 if not current.get('end_time'):
2523 current['end_time'] = next_.get('start_time')
a3976e07 2524 if not current.get('title'):
2525 current['title'] = f'<Untitled Chapter {idx}>'
9eef7c4e 2526
dd82ffea
JMF
2527 if 'playlist' not in info_dict:
2528 # It isn't part of a playlist
2529 info_dict['playlist'] = None
2530 info_dict['playlist_index'] = None
2531
bc516a3f 2532 self._sanitize_thumbnails(info_dict)
d5519808 2533
536a55da 2534 thumbnail = info_dict.get('thumbnail')
bc516a3f 2535 thumbnails = info_dict.get('thumbnails')
536a55da
S
2536 if thumbnail:
2537 info_dict['thumbnail'] = sanitize_url(thumbnail)
2538 elif thumbnails:
d5519808
PH
2539 info_dict['thumbnail'] = thumbnails[-1]['url']
2540
ae30b840 2541 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2542 info_dict['display_id'] = info_dict['id']
2543
03f83004 2544 self._fill_common_fields(info_dict)
33d2fc2f 2545
05108a49
S
2546 for cc_kind in ('subtitles', 'automatic_captions'):
2547 cc = info_dict.get(cc_kind)
2548 if cc:
2549 for _, subtitle in cc.items():
2550 for subtitle_format in subtitle:
2551 if subtitle_format.get('url'):
2552 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2553 if subtitle_format.get('ext') is None:
2554 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2555
2556 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2557 subtitles = info_dict.get('subtitles')
4bba3716 2558
360e1ca5 2559 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2560 info_dict['id'], subtitles, automatic_captions)
a504ced0 2561
784320c9 2562 self.sort_formats(info_dict)
aebb4f4b 2563 formats = self._get_formats(info_dict)
dd82ffea 2564
0a5a191a 2565 # or None ensures --clean-infojson removes it
2566 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2567 if not self.params.get('allow_unplayable_formats'):
2568 formats = [f for f in formats if not f.get('has_drm')]
17ffed18 2569
2570 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2571 self.report_warning(
2572 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2573 'only images are available for download. Use --list-formats to see them'.capitalize())
88acdbc2 2574
319b6059 2575 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2576 if not get_from_start:
2577 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2578 if info_dict.get('is_live') and formats:
adbc4ec4 2579 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2580 if get_from_start and not formats:
a44ca5a4 2581 self.raise_no_formats(info_dict, msg=(
2582 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2583 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2584
73af5cc8
S
2585 def is_wellformed(f):
2586 url = f.get('url')
a5ac0c47 2587 if not url:
73af5cc8
S
2588 self.report_warning(
2589 '"url" field is missing or empty - skipping format, '
2590 'there is an error in extractor')
a5ac0c47
S
2591 return False
2592 if isinstance(url, bytes):
2593 sanitize_string_field(f, 'url')
2594 return True
73af5cc8
S
2595
2596 # Filter out malformed formats for better extraction robustness
1ac7f461 2597 formats = list(filter(is_wellformed, formats or []))
2598
2599 if not formats:
2600 self.raise_no_formats(info_dict)
73af5cc8 2601
181c7053
S
2602 formats_dict = {}
2603
dd82ffea 2604 # We check that all the formats have the format and format_id fields
db95dc13 2605 for i, format in enumerate(formats):
c9969434
S
2606 sanitize_string_field(format, 'format_id')
2607 sanitize_numeric_fields(format)
dcf77cf1 2608 format['url'] = sanitize_url(format['url'])
e74e3b63 2609 if not format.get('format_id'):
14f25df2 2610 format['format_id'] = str(i)
e2effb08
S
2611 else:
2612 # Sanitize format_id from characters used in format selector expression
ec85ded8 2613 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2614 format_id = format['format_id']
2615 if format_id not in formats_dict:
2616 formats_dict[format_id] = []
2617 formats_dict[format_id].append(format)
2618
2619 # Make sure all formats have unique format_id
03b4de72 2620 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2621 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2622 ambigious_id = len(ambiguous_formats) > 1
2623 for i, format in enumerate(ambiguous_formats):
2624 if ambigious_id:
181c7053 2625 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2626 if format.get('ext') is None:
2627 format['ext'] = determine_ext(format['url']).lower()
2628 # Ensure there is no conflict between id and ext in format selection
2629 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2630 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2631 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2632
2633 for i, format in enumerate(formats):
8c51aa65 2634 if format.get('format') is None:
6febd1c1 2635 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2636 id=format['format_id'],
2637 res=self.format_resolution(format),
b868936c 2638 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2639 )
6f0be937 2640 if format.get('protocol') is None:
b5559424 2641 format['protocol'] = determine_protocol(format)
239df021 2642 if format.get('resolution') is None:
2643 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2644 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2645 format['dynamic_range'] = 'SDR'
105bfd90 2646 if format.get('aspect_ratio') is None:
2647 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
f2fe69c7 2648 if (info_dict.get('duration') and format.get('tbr')
2649 and not format.get('filesize') and not format.get('filesize_approx')):
56ba69e4 2650 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
f2fe69c7 2651
e5660ee6
JMF
2652 # Add HTTP headers, so that external programs can use them from the
2653 # json output
2654 full_format_info = info_dict.copy()
2655 full_format_info.update(format)
2656 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2657 # Remove private housekeeping stuff
2658 if '__x_forwarded_for_ip' in info_dict:
2659 del info_dict['__x_forwarded_for_ip']
dd82ffea 2660
9f1a1c36 2661 if self.params.get('check_formats') is True:
282f5709 2662 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2663
88acdbc2 2664 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2665 # only set the 'formats' fields if the original info_dict list them
2666 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2667 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2668 # which can't be exported to json
b3d9ef88 2669 info_dict['formats'] = formats
4ec82a72 2670
2671 info_dict, _ = self.pre_process(info_dict)
2672
6db9c4d5 2673 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2674 return info_dict
2675
2676 self.post_extract(info_dict)
2677 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2678
093a1710 2679 # The pre-processors may have modified the formats
aebb4f4b 2680 formats = self._get_formats(info_dict)
093a1710 2681
e4221b70 2682 list_only = self.params.get('simulate') == 'list_only'
fa9f30b8 2683 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2684 if self.params.get('list_thumbnails'):
2685 self.list_thumbnails(info_dict)
b7b04c78 2686 if self.params.get('listsubtitles'):
2687 if 'automatic_captions' in info_dict:
2688 self.list_subtitles(
2689 info_dict['id'], automatic_captions, 'automatic captions')
2690 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2691 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2692 self.list_formats(info_dict)
169dbde9 2693 if list_only:
b7b04c78 2694 # Without this printing, -F --print-json will not work
169dbde9 2695 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
c487cf00 2696 return info_dict
bfaae0a7 2697
187986a8 2698 format_selector = self.format_selector
2699 if format_selector is None:
0017d9ad 2700 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2701 self.write_debug('Default format spec: %s' % req_format)
187986a8 2702 format_selector = self.build_format_selector(req_format)
317f7ab6 2703
fa9f30b8 2704 while True:
2705 if interactive_format_selection:
2706 req_format = input(
2707 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2708 try:
2709 format_selector = self.build_format_selector(req_format)
2710 except SyntaxError as err:
2711 self.report_error(err, tb=False, is_error=False)
2712 continue
2713
85e801a9 2714 formats_to_download = list(format_selector({
fa9f30b8 2715 'formats': formats,
85e801a9 2716 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2717 'incomplete_formats': (
2718 # All formats are video-only or
2719 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2720 # all formats are audio-only
2721 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2722 }))
fa9f30b8 2723 if interactive_format_selection and not formats_to_download:
2724 self.report_error('Requested format is not available', tb=False, is_error=False)
2725 continue
2726 break
317f7ab6 2727
dd82ffea 2728 if not formats_to_download:
b7da73eb 2729 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2730 raise ExtractorError(
2731 'Requested format is not available. Use --list-formats for a list of available formats',
2732 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2733 self.report_warning('Requested format is not available')
2734 # Process what we can, even without any available formats.
2735 formats_to_download = [{}]
a13e6848 2736
0500ee3d 2737 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
5ec1b6b7 2738 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2739 if download:
0500ee3d 2740 if best_format and requested_ranges:
5ec1b6b7 2741 def to_screen(*msg):
2742 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2743
2744 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2745 (f['format_id'] for f in formats_to_download))
0500ee3d 2746 if requested_ranges != ({}, ):
5ec1b6b7 2747 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
fc2ba496 2748 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
a13e6848 2749 max_downloads_reached = False
5ec1b6b7 2750
0500ee3d 2751 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
5ec1b6b7 2752 new_info = self._copy_infodict(info_dict)
b7da73eb 2753 new_info.update(fmt)
3975b4d2 2754 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
fc2ba496 2755 end_time = offset + min(chapter.get('end_time', duration), duration)
3975b4d2 2756 if chapter or offset:
5ec1b6b7 2757 new_info.update({
3975b4d2 2758 'section_start': offset + chapter.get('start_time', 0),
2576d53a 2759 # duration may not be accurate. So allow deviations <1sec
2760 'section_end': end_time if end_time <= offset + duration + 1 else None,
5ec1b6b7 2761 'section_title': chapter.get('title'),
2762 'section_number': chapter.get('index'),
2763 })
2764 downloaded_formats.append(new_info)
a13e6848 2765 try:
2766 self.process_info(new_info)
2767 except MaxDownloadsReached:
2768 max_downloads_reached = True
415f8d51 2769 self._raise_pending_errors(new_info)
f46e2f9d 2770 # Remove copied info
2771 for key, val in tuple(new_info.items()):
2772 if info_dict.get(key) == val:
2773 new_info.pop(key)
a13e6848 2774 if max_downloads_reached:
2775 break
ebed8b37 2776
5ec1b6b7 2777 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 2778 assert write_archive.issubset({True, False, 'ignore'})
2779 if True in write_archive and False not in write_archive:
2780 self.record_download_archive(info_dict)
be72c624 2781
5ec1b6b7 2782 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 2783 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2784 if max_downloads_reached:
2785 raise MaxDownloadsReached()
ebed8b37 2786
49a57e70 2787 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2788 info_dict.update(best_format)
dd82ffea
JMF
2789 return info_dict
2790
98c70d6f 2791 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2792 """Select the requested subtitles and their format"""
d8a58ddc 2793 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2794 if normal_subtitles and self.params.get('writesubtitles'):
2795 available_subs.update(normal_subtitles)
d8a58ddc 2796 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2797 if automatic_captions and self.params.get('writeautomaticsub'):
2798 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2799 if lang not in available_subs:
2800 available_subs[lang] = cap_info
2801
d2c8aadf 2802 if not available_subs or (
2803 not self.params.get('writesubtitles')
2804 and not self.params.get('writeautomaticsub')):
4d171848 2805 return None
a504ced0 2806
d8a58ddc 2807 all_sub_langs = tuple(available_subs.keys())
a504ced0 2808 if self.params.get('allsubtitles', False):
c32b0aab 2809 requested_langs = all_sub_langs
2810 elif self.params.get('subtitleslangs', False):
5314b521 2811 try:
2812 requested_langs = orderedSet_from_options(
2813 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2814 except re.error as e:
2815 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
d8a58ddc 2816 elif normal_sub_langs:
2817 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
a504ced0 2818 else:
d8a58ddc 2819 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
ad3dc496 2820 if requested_langs:
d2c8aadf 2821 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
a504ced0
JMF
2822
2823 formats_query = self.params.get('subtitlesformat', 'best')
2824 formats_preference = formats_query.split('/') if formats_query else []
2825 subs = {}
2826 for lang in requested_langs:
2827 formats = available_subs.get(lang)
2828 if formats is None:
86e5f3ed 2829 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2830 continue
a504ced0
JMF
2831 for ext in formats_preference:
2832 if ext == 'best':
2833 f = formats[-1]
2834 break
2835 matches = list(filter(lambda f: f['ext'] == ext, formats))
2836 if matches:
2837 f = matches[-1]
2838 break
2839 else:
2840 f = formats[-1]
2841 self.report_warning(
2842 'No subtitle format found matching "%s" for language %s, '
2843 'using %s' % (formats_query, lang, f['ext']))
2844 subs[lang] = f
2845 return subs
2846
bb66c247 2847 def _forceprint(self, key, info_dict):
2848 if info_dict is None:
2849 return
2850 info_copy = info_dict.copy()
2851 info_copy['formats_table'] = self.render_formats_table(info_dict)
2852 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2853 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2854 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2855
2856 def format_tmpl(tmpl):
48c8424b 2857 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
07a1250e 2858 if not mobj:
2859 return tmpl
48c8424b 2860
2861 fmt = '%({})s'
2862 if tmpl.startswith('{'):
2863 tmpl = f'.{tmpl}'
2864 if tmpl.endswith('='):
2865 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
2866 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
8130779d 2867
bb66c247 2868 for tmpl in self.params['forceprint'].get(key, []):
2869 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2870
2871 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2872 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2873 tmpl = format_tmpl(tmpl)
2874 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2875 if self._ensure_dir_exists(filename):
86e5f3ed 2876 with open(filename, 'a', encoding='utf-8') as f:
8d93e69d 2877 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2878
d06daf23 2879 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2880 def print_mandatory(field, actual_field=None):
2881 if actual_field is None:
2882 actual_field = field
d06daf23 2883 if (self.params.get('force%s' % field, False)
53c18592 2884 and (not incomplete or info_dict.get(actual_field) is not None)):
2885 self.to_stdout(info_dict[actual_field])
d06daf23
S
2886
2887 def print_optional(field):
2888 if (self.params.get('force%s' % field, False)
2889 and info_dict.get(field) is not None):
2890 self.to_stdout(info_dict[field])
2891
53c18592 2892 info_dict = info_dict.copy()
2893 if filename is not None:
2894 info_dict['filename'] = filename
2895 if info_dict.get('requested_formats') is not None:
2896 # For RTMP URLs, also include the playpath
2897 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2898 elif info_dict.get('url'):
53c18592 2899 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2900
bb66c247 2901 if (self.params.get('forcejson')
2902 or self.params['forceprint'].get('video')
2903 or self.params['print_to_file'].get('video')):
2b8a2973 2904 self.post_extract(info_dict)
bb66c247 2905 self._forceprint('video', info_dict)
53c18592 2906
d06daf23
S
2907 print_mandatory('title')
2908 print_mandatory('id')
53c18592 2909 print_mandatory('url', 'urls')
d06daf23
S
2910 print_optional('thumbnail')
2911 print_optional('description')
53c18592 2912 print_optional('filename')
b868936c 2913 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2914 self.to_stdout(formatSeconds(info_dict['duration']))
2915 print_mandatory('format')
53c18592 2916
2b8a2973 2917 if self.params.get('forcejson'):
6e84b215 2918 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2919
e8e73840 2920 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2921 if not info.get('url'):
1151c407 2922 self.raise_no_formats(info, True)
e8e73840 2923
2924 if test:
2925 verbose = self.params.get('verbose')
2926 params = {
2927 'test': True,
a169858f 2928 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2929 'verbose': verbose,
2930 'noprogress': not verbose,
2931 'nopart': True,
2932 'skip_unavailable_fragments': False,
2933 'keep_fragments': False,
2934 'overwrites': True,
2935 '_no_ytdl_file': True,
2936 }
2937 else:
2938 params = self.params
96fccc10 2939 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2940 if not test:
2941 for ph in self._progress_hooks:
2942 fd.add_progress_hook(ph)
42676437
M
2943 urls = '", "'.join(
2944 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2945 for f in info.get('requested_formats', []) or [info])
3a408f9d 2946 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 2947
adbc4ec4
THD
2948 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2949 # But it may contain objects that are not deep-copyable
2950 new_info = self._copy_infodict(info)
e8e73840 2951 if new_info.get('http_headers') is None:
2952 new_info['http_headers'] = self._calc_headers(new_info)
2953 return fd.download(name, new_info, subtitle)
2954
e04938ab 2955 def existing_file(self, filepaths, *, default_overwrite=True):
2956 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2957 if existing_files and not self.params.get('overwrites', default_overwrite):
2958 return existing_files[0]
2959
2960 for file in existing_files:
2961 self.report_file_delete(file)
2962 os.remove(file)
2963 return None
2964
8222d8de 2965 def process_info(self, info_dict):
09b49e1f 2966 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2967
2968 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2969 original_infodict = info_dict
fd288278 2970
4513a41a 2971 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2972 info_dict['format'] = info_dict['ext']
2973
c77495e3 2974 if self._match_entry(info_dict) is not None:
9e907ebd 2975 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2976 return
2977
09b49e1f 2978 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2979 self.post_extract(info_dict)
119e40ef 2980
2981 def replace_info_dict(new_info):
2982 nonlocal info_dict
2983 if new_info == info_dict:
2984 return
2985 info_dict.clear()
2986 info_dict.update(new_info)
2987
2988 new_info, _ = self.pre_process(info_dict, 'video')
2989 replace_info_dict(new_info)
0c14d66a 2990 self._num_downloads += 1
8222d8de 2991
dcf64d43 2992 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2993 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2994 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2995 files_to_move = {}
8222d8de
JMF
2996
2997 # Forced printings
4513a41a 2998 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2999
ca6d59d2 3000 def check_max_downloads():
3001 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3002 raise MaxDownloadsReached()
3003
b7b04c78 3004 if self.params.get('simulate'):
9e907ebd 3005 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 3006 check_max_downloads()
8222d8de
JMF
3007 return
3008
de6000d9 3009 if full_filename is None:
8222d8de 3010 return
e92caff5 3011 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 3012 return
e92caff5 3013 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
3014 return
3015
80c03fa9 3016 if self._write_description('video', info_dict,
3017 self.prepare_filename(info_dict, 'description')) is None:
3018 return
3019
3020 sub_files = self._write_subtitles(info_dict, temp_filename)
3021 if sub_files is None:
3022 return
3023 files_to_move.update(dict(sub_files))
3024
3025 thumb_files = self._write_thumbnails(
3026 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3027 if thumb_files is None:
3028 return
3029 files_to_move.update(dict(thumb_files))
8222d8de 3030
80c03fa9 3031 infofn = self.prepare_filename(info_dict, 'infojson')
3032 _infojson_written = self._write_info_json('video', info_dict, infofn)
3033 if _infojson_written:
dac5df5a 3034 info_dict['infojson_filename'] = infofn
e75bb0d6 3035 # For backward compatibility, even though it was a private field
80c03fa9 3036 info_dict['__infojson_filename'] = infofn
3037 elif _infojson_written is None:
3038 return
3039
3040 # Note: Annotations are deprecated
3041 annofn = None
1fb07d10 3042 if self.params.get('writeannotations', False):
de6000d9 3043 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 3044 if annofn:
e92caff5 3045 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 3046 return
0c3d0f51 3047 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 3048 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
3049 elif not info_dict.get('annotations'):
3050 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
3051 else:
3052 try:
6febd1c1 3053 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 3054 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
3055 annofile.write(info_dict['annotations'])
3056 except (KeyError, TypeError):
6febd1c1 3057 self.report_warning('There are no annotations to write.')
86e5f3ed 3058 except OSError:
6febd1c1 3059 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 3060 return
1fb07d10 3061
732044af 3062 # Write internet shortcut files
08438d2c 3063 def _write_link_file(link_type):
60f3e995 3064 url = try_get(info_dict['webpage_url'], iri_to_uri)
3065 if not url:
3066 self.report_warning(
3067 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3068 return True
08438d2c 3069 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
3070 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3071 return False
10e3742e 3072 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 3073 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3074 return True
3075 try:
3076 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 3077 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3078 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 3079 template_vars = {'url': url}
08438d2c 3080 if link_type == 'desktop':
3081 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3082 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 3083 except OSError:
08438d2c 3084 self.report_error(f'Cannot write internet shortcut {linkfn}')
3085 return False
732044af 3086 return True
3087
08438d2c 3088 write_links = {
3089 'url': self.params.get('writeurllink'),
3090 'webloc': self.params.get('writewebloclink'),
3091 'desktop': self.params.get('writedesktoplink'),
3092 }
3093 if self.params.get('writelink'):
3094 link_type = ('webloc' if sys.platform == 'darwin'
3095 else 'desktop' if sys.platform.startswith('linux')
3096 else 'url')
3097 write_links[link_type] = True
3098
3099 if any(should_write and not _write_link_file(link_type)
3100 for link_type, should_write in write_links.items()):
3101 return
732044af 3102
415f8d51 3103 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3104 replace_info_dict(new_info)
56d868db 3105
a13e6848 3106 if self.params.get('skip_download'):
56d868db 3107 info_dict['filepath'] = temp_filename
3108 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3109 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3110 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3111 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3112 else:
3113 # Download
b868936c 3114 info_dict.setdefault('__postprocessors', [])
4340deca 3115 try:
0202b52a 3116
e04938ab 3117 def existing_video_file(*filepaths):
6b591b29 3118 ext = info_dict.get('ext')
e04938ab 3119 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3120 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3121 default_overwrite=False)
3122 if file:
3123 info_dict['ext'] = os.path.splitext(file)[1][1:]
3124 return file
0202b52a 3125
7b2c3f47 3126 fd, success = None, True
fccf90e7 3127 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3128 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
71df9b7f 3129 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
56ba69e4 3130 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3131 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3132 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3133 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3134 return
5ec1b6b7 3135
4340deca 3136 if info_dict.get('requested_formats') is not None:
81cd954a 3137 requested_formats = info_dict['requested_formats']
0202b52a 3138 old_ext = info_dict['ext']
4e3b637d 3139 if self.params.get('merge_output_format') is None:
4e3b637d 3140 if (info_dict['ext'] == 'webm'
3141 and info_dict.get('thumbnails')
3142 # check with type instead of pp_key, __name__, or isinstance
3143 # since we dont want any custom PPs to trigger this
c487cf00 3144 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3145 info_dict['ext'] = 'mkv'
3146 self.report_warning(
3147 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3148 new_ext = info_dict['ext']
0202b52a 3149
124bc071 3150 def correct_ext(filename, ext=new_ext):
96fccc10 3151 if filename == '-':
3152 return filename
0202b52a 3153 filename_real_ext = os.path.splitext(filename)[1][1:]
3154 filename_wo_ext = (
3155 os.path.splitext(filename)[0]
124bc071 3156 if filename_real_ext in (old_ext, new_ext)
0202b52a 3157 else filename)
86e5f3ed 3158 return f'{filename_wo_ext}.{ext}'
0202b52a 3159
38c6902b 3160 # Ensure filename always has a correct extension for successful merge
0202b52a 3161 full_filename = correct_ext(full_filename)
3162 temp_filename = correct_ext(temp_filename)
e04938ab 3163 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3164 info_dict['__real_download'] = False
18e674b4 3165
7b2c3f47 3166 merger = FFmpegMergerPP(self)
adbc4ec4 3167 downloaded = []
dbf5416a 3168 if dl_filename is not None:
6c7274ec 3169 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3170 elif fd:
3171 for f in requested_formats if fd != FFmpegFD else []:
3172 f['filepath'] = fname = prepend_extension(
3173 correct_ext(temp_filename, info_dict['ext']),
3174 'f%s' % f['format_id'], info_dict['ext'])
3175 downloaded.append(fname)
dbf5416a 3176 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3177 success, real_download = self.dl(temp_filename, info_dict)
3178 info_dict['__real_download'] = real_download
18e674b4 3179 else:
18e674b4 3180 if self.params.get('allow_unplayable_formats'):
3181 self.report_warning(
3182 'You have requested merging of multiple formats '
3183 'while also allowing unplayable formats to be downloaded. '
3184 'The formats won\'t be merged to prevent data corruption.')
3185 elif not merger.available:
e8969bda 3186 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3187 if not self.params.get('ignoreerrors'):
3188 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3189 return
3190 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3191
96fccc10 3192 if temp_filename == '-':
adbc4ec4 3193 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3194 else 'but the formats are incompatible for simultaneous download' if merger.available
3195 else 'but ffmpeg is not installed')
3196 self.report_warning(
3197 f'You have requested downloading multiple formats to stdout {reason}. '
3198 'The formats will be streamed one after the other')
3199 fname = temp_filename
dbf5416a 3200 for f in requested_formats:
3201 new_info = dict(info_dict)
3202 del new_info['requested_formats']
3203 new_info.update(f)
96fccc10 3204 if temp_filename != '-':
124bc071 3205 fname = prepend_extension(
3206 correct_ext(temp_filename, new_info['ext']),
3207 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3208 if not self._ensure_dir_exists(fname):
3209 return
a21e0ab1 3210 f['filepath'] = fname
96fccc10 3211 downloaded.append(fname)
dbf5416a 3212 partial_success, real_download = self.dl(fname, new_info)
3213 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3214 success = success and partial_success
adbc4ec4
THD
3215
3216 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3217 info_dict['__postprocessors'].append(merger)
3218 info_dict['__files_to_merge'] = downloaded
3219 # Even if there were no downloads, it is being merged only now
3220 info_dict['__real_download'] = True
3221 else:
3222 for file in downloaded:
3223 files_to_move[file] = None
4340deca
P
3224 else:
3225 # Just a single file
e04938ab 3226 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3227 if dl_filename is None or dl_filename == temp_filename:
3228 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3229 # So we should try to resume the download
e8e73840 3230 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3231 info_dict['__real_download'] = real_download
6c7274ec 3232 else:
3233 self.report_file_already_downloaded(dl_filename)
0202b52a 3234
0202b52a 3235 dl_filename = dl_filename or temp_filename
c571435f 3236 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3237
3158150c 3238 except network_exceptions as err:
7960b056 3239 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3240 return
86e5f3ed 3241 except OSError as err:
4340deca
P
3242 raise UnavailableVideoError(err)
3243 except (ContentTooShortError, ) as err:
86e5f3ed 3244 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3245 return
8222d8de 3246
415f8d51 3247 self._raise_pending_errors(info_dict)
de6000d9 3248 if success and full_filename != '-':
f17f8651 3249
fd7cfb64 3250 def fixup():
3251 do_fixup = True
3252 fixup_policy = self.params.get('fixup')
3253 vid = info_dict['id']
3254
3255 if fixup_policy in ('ignore', 'never'):
3256 return
3257 elif fixup_policy == 'warn':
3fe75fdc 3258 do_fixup = 'warn'
f89b3e2d 3259 elif fixup_policy != 'force':
3260 assert fixup_policy in ('detect_or_warn', None)
3261 if not info_dict.get('__real_download'):
3262 do_fixup = False
fd7cfb64 3263
3264 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3265 if not (do_fixup and cndn):
fd7cfb64 3266 return
3fe75fdc 3267 elif do_fixup == 'warn':
fd7cfb64 3268 self.report_warning(f'{vid}: {msg}')
3269 return
3270 pp = cls(self)
3271 if pp.available:
3272 info_dict['__postprocessors'].append(pp)
3273 else:
3274 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3275
3276 stretched_ratio = info_dict.get('stretched_ratio')
ca9def71
LNO
3277 ffmpeg_fixup(stretched_ratio not in (1, None),
3278 f'Non-uniform pixel ratio {stretched_ratio}',
3279 FFmpegFixupStretchedPP)
fd7cfb64 3280
993191c0 3281 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3282 downloader = downloader.FD_NAME if downloader else None
adbc4ec4 3283
ca9def71
LNO
3284 ext = info_dict.get('ext')
3285 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3286 isinstance(pp, FFmpegVideoConvertorPP)
3287 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3288 ) for pp in self._pps['post_process'])
3289
3290 if not postprocessed_by_ffmpeg:
3291 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
f2df4071 3292 'writing DASH m4a. Only some players support this container',
3293 FFmpegFixupM4aPP)
24146491 3294 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3295 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3296 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3297 FFmpegFixupM3u8PP)
3298 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3299 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3300
24146491 3301 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3302 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3303
3304 fixup()
8222d8de 3305 try:
f46e2f9d 3306 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3307 except PostProcessingError as err:
3308 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3309 return
ab8e5e51
AM
3310 try:
3311 for ph in self._post_hooks:
23c1a667 3312 ph(info_dict['filepath'])
ab8e5e51
AM
3313 except Exception as err:
3314 self.report_error('post hooks: %s' % str(err))
3315 return
9e907ebd 3316 info_dict['__write_download_archive'] = True
2d30509f 3317
c487cf00 3318 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3319 if self.params.get('force_write_download_archive'):
9e907ebd 3320 info_dict['__write_download_archive'] = True
ca6d59d2 3321 check_max_downloads()
8222d8de 3322
aa9369a2 3323 def __download_wrapper(self, func):
3324 @functools.wraps(func)
3325 def wrapper(*args, **kwargs):
3326 try:
3327 res = func(*args, **kwargs)
3328 except UnavailableVideoError as e:
3329 self.report_error(e)
b222c271 3330 except DownloadCancelled as e:
3331 self.to_screen(f'[info] {e}')
3332 if not self.params.get('break_per_url'):
3333 raise
fd404bec 3334 self._num_downloads = 0
aa9369a2 3335 else:
3336 if self.params.get('dump_single_json', False):
3337 self.post_extract(res)
3338 self.to_stdout(json.dumps(self.sanitize_info(res)))
3339 return wrapper
3340
8222d8de
JMF
3341 def download(self, url_list):
3342 """Download a given list of URLs."""
aa9369a2 3343 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3344 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3345 if (len(url_list) > 1
3346 and outtmpl != '-'
3347 and '%' not in outtmpl
3348 and self.params.get('max_downloads') != 1):
acd69589 3349 raise SameFileError(outtmpl)
8222d8de
JMF
3350
3351 for url in url_list:
aa9369a2 3352 self.__download_wrapper(self.extract_info)(
3353 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3354
3355 return self._download_retcode
3356
1dcc4c0c 3357 def download_with_info_file(self, info_filename):
31bd3925
JMF
3358 with contextlib.closing(fileinput.FileInput(
3359 [info_filename], mode='r',
3360 openhook=fileinput.hook_encoded('utf-8'))) as f:
3361 # FileInput doesn't have a read method, we can't call json.load
8012d892 3362 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3363 try:
aa9369a2 3364 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3365 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3366 if not isinstance(e, EntryNotInPlaylist):
3367 self.to_stderr('\r')
d4943898
JMF
3368 webpage_url = info.get('webpage_url')
3369 if webpage_url is not None:
aa9369a2 3370 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3371 return self.download([webpage_url])
3372 else:
3373 raise
3374 return self._download_retcode
1dcc4c0c 3375
cb202fd2 3376 @staticmethod
8012d892 3377 def sanitize_info(info_dict, remove_private_keys=False):
3378 ''' Sanitize the infodict for converting to json '''
3ad56b42 3379 if info_dict is None:
3380 return info_dict
6e84b215 3381 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3382 info_dict.setdefault('_type', 'video')
b5e7a2e6 3383 info_dict.setdefault('_version', {
3384 'version': __version__,
3385 'current_git_head': current_git_head(),
3386 'release_git_head': RELEASE_GIT_HEAD,
3387 'repository': REPOSITORY,
3388 })
09b49e1f 3389
8012d892 3390 if remove_private_keys:
0a5a191a 3391 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3392 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
0a5a191a 3393 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3394 }
ae8f99e6 3395 else:
09b49e1f 3396 reject = lambda k, v: False
adbc4ec4
THD
3397
3398 def filter_fn(obj):
3399 if isinstance(obj, dict):
3400 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3401 elif isinstance(obj, (list, tuple, set, LazyList)):
3402 return list(map(filter_fn, obj))
3403 elif obj is None or isinstance(obj, (str, int, float, bool)):
3404 return obj
3405 else:
3406 return repr(obj)
3407
5226731e 3408 return filter_fn(info_dict)
cb202fd2 3409
8012d892 3410 @staticmethod
3411 def filter_requested_info(info_dict, actually_filter=True):
3412 ''' Alias of sanitize_info for backward compatibility '''
3413 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3414
43d7f5a5 3415 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3416 for filename in set(filter(None, files_to_delete)):
3417 if msg:
3418 self.to_screen(msg % filename)
3419 try:
3420 os.remove(filename)
3421 except OSError:
3422 self.report_warning(f'Unable to delete file {filename}')
3423 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3424 del info['__files_to_move'][filename]
3425
ed5835b4 3426 @staticmethod
3427 def post_extract(info_dict):
3428 def actual_post_extract(info_dict):
3429 if info_dict.get('_type') in ('playlist', 'multi_video'):
3430 for video_dict in info_dict.get('entries', {}):
3431 actual_post_extract(video_dict or {})
3432 return
3433
09b49e1f 3434 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3435 info_dict.update(post_extractor())
ed5835b4 3436
3437 actual_post_extract(info_dict or {})
3438
dcf64d43 3439 def run_pp(self, pp, infodict):
5bfa4862 3440 files_to_delete = []
dcf64d43 3441 if '__files_to_move' not in infodict:
3442 infodict['__files_to_move'] = {}
b1940459 3443 try:
3444 files_to_delete, infodict = pp.run(infodict)
3445 except PostProcessingError as e:
3446 # Must be True and not 'only_download'
3447 if self.params.get('ignoreerrors') is True:
3448 self.report_error(e)
3449 return infodict
3450 raise
3451
5bfa4862 3452 if not files_to_delete:
dcf64d43 3453 return infodict
5bfa4862 3454 if self.params.get('keepvideo', False):
3455 for f in files_to_delete:
dcf64d43 3456 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3457 else:
43d7f5a5 3458 self._delete_downloaded_files(
3459 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3460 return infodict
5bfa4862 3461
ed5835b4 3462 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3463 self._forceprint(key, info)
ed5835b4 3464 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3465 info = self.run_pp(pp, info)
ed5835b4 3466 return info
277d6ff5 3467
56d868db 3468 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3469 info = dict(ie_info)
56d868db 3470 info['__files_to_move'] = files_to_move or {}
415f8d51 3471 try:
3472 info = self.run_all_pps(key, info)
3473 except PostProcessingError as err:
3474 msg = f'Preprocessing: {err}'
3475 info.setdefault('__pending_error', msg)
3476 self.report_error(msg, is_error=False)
56d868db 3477 return info, info.pop('__files_to_move', None)
5bfa4862 3478
f46e2f9d 3479 def post_process(self, filename, info, files_to_move=None):
8222d8de 3480 """Run all the postprocessors on the given file."""
8222d8de 3481 info['filepath'] = filename
dcf64d43 3482 info['__files_to_move'] = files_to_move or {}
ed5835b4 3483 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3484 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3485 del info['__files_to_move']
ed5835b4 3486 return self.run_all_pps('after_move', info)
c1c9a79c 3487
5db07df6 3488 def _make_archive_id(self, info_dict):
e9fef7ee
S
3489 video_id = info_dict.get('id')
3490 if not video_id:
3491 return
5db07df6
PH
3492 # Future-proof against any change in case
3493 # and backwards compatibility with prior versions
e9fef7ee 3494 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3495 if extractor is None:
1211bb6d
S
3496 url = str_or_none(info_dict.get('url'))
3497 if not url:
3498 return
e9fef7ee 3499 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3500 for ie_key, ie in self._ies.items():
1211bb6d 3501 if ie.suitable(url):
8b7491c8 3502 extractor = ie_key
e9fef7ee
S
3503 break
3504 else:
3505 return
0647d925 3506 return make_archive_id(extractor, video_id)
5db07df6
PH
3507
3508 def in_download_archive(self, info_dict):
ae103564 3509 if not self.archive:
5db07df6
PH
3510 return False
3511
1e8fe57e 3512 vid_ids = [self._make_archive_id(info_dict)]
c200096c 3513 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
1e8fe57e 3514 return any(id_ in self.archive for id_ in vid_ids)
c1c9a79c
PH
3515
3516 def record_download_archive(self, info_dict):
3517 fn = self.params.get('download_archive')
3518 if fn is None:
3519 return
5db07df6
PH
3520 vid_id = self._make_archive_id(info_dict)
3521 assert vid_id
ae103564 3522
a13e6848 3523 self.write_debug(f'Adding to archive: {vid_id}')
9c935fbc 3524 if is_path_like(fn):
ae103564 3525 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3526 archive_file.write(vid_id + '\n')
a45e8619 3527 self.archive.add(vid_id)
dd82ffea 3528
8c51aa65 3529 @staticmethod
8abeeb94 3530 def format_resolution(format, default='unknown'):
9359f3d4 3531 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3532 return 'audio only'
f49d89ee
PH
3533 if format.get('resolution') is not None:
3534 return format['resolution']
35615307 3535 if format.get('width') and format.get('height'):
ff51ed58 3536 return '%dx%d' % (format['width'], format['height'])
35615307 3537 elif format.get('height'):
ff51ed58 3538 return '%sp' % format['height']
35615307 3539 elif format.get('width'):
ff51ed58 3540 return '%dx?' % format['width']
3541 return default
8c51aa65 3542
8130779d 3543 def _list_format_headers(self, *headers):
3544 if self.params.get('listformats_table', True) is not False:
591bb9d3 3545 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3546 return headers
3547
c57f7757
PH
3548 def _format_note(self, fdict):
3549 res = ''
3550 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3551 res += '(unsupported)'
32f90364
PH
3552 if fdict.get('language'):
3553 if res:
3554 res += ' '
f304da8a 3555 res += '[%s]' % fdict['language']
c57f7757 3556 if fdict.get('format_note') is not None:
f304da8a 3557 if res:
3558 res += ' '
3559 res += fdict['format_note']
c57f7757 3560 if fdict.get('tbr') is not None:
f304da8a 3561 if res:
3562 res += ', '
3563 res += '%4dk' % fdict['tbr']
c57f7757
PH
3564 if fdict.get('container') is not None:
3565 if res:
3566 res += ', '
3567 res += '%s container' % fdict['container']
3089bc74
S
3568 if (fdict.get('vcodec') is not None
3569 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3570 if res:
3571 res += ', '
3572 res += fdict['vcodec']
91c7271a 3573 if fdict.get('vbr') is not None:
c57f7757
PH
3574 res += '@'
3575 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3576 res += 'video@'
3577 if fdict.get('vbr') is not None:
3578 res += '%4dk' % fdict['vbr']
fbb21cf5 3579 if fdict.get('fps') is not None:
5d583bdf
S
3580 if res:
3581 res += ', '
3582 res += '%sfps' % fdict['fps']
c57f7757
PH
3583 if fdict.get('acodec') is not None:
3584 if res:
3585 res += ', '
3586 if fdict['acodec'] == 'none':
3587 res += 'video only'
3588 else:
3589 res += '%-5s' % fdict['acodec']
3590 elif fdict.get('abr') is not None:
3591 if res:
3592 res += ', '
3593 res += 'audio'
3594 if fdict.get('abr') is not None:
3595 res += '@%3dk' % fdict['abr']
3596 if fdict.get('asr') is not None:
3597 res += ' (%5dHz)' % fdict['asr']
3598 if fdict.get('filesize') is not None:
3599 if res:
3600 res += ', '
3601 res += format_bytes(fdict['filesize'])
9732d77e
PH
3602 elif fdict.get('filesize_approx') is not None:
3603 if res:
3604 res += ', '
3605 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3606 return res
91c7271a 3607
aebb4f4b 3608 def _get_formats(self, info_dict):
3609 if info_dict.get('formats') is None:
3610 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3611 return [info_dict]
3612 return []
3613 return info_dict['formats']
b69fd25c 3614
aebb4f4b 3615 def render_formats_table(self, info_dict):
3616 formats = self._get_formats(info_dict)
3617 if not formats:
3618 return
8130779d 3619 if not self.params.get('listformats_table', True) is not False:
76d321f6 3620 table = [
3621 [
3622 format_field(f, 'format_id'),
3623 format_field(f, 'ext'),
3624 self.format_resolution(f),
8130779d 3625 self._format_note(f)
d5d1df8a 3626 ] for f in formats if (f.get('preference') or 0) >= -1000]
8130779d 3627 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3628
d816f61f 3629 def simplified_codec(f, field):
3630 assert field in ('acodec', 'vcodec')
3631 codec = f.get(field, 'unknown')
f5ea4748 3632 if not codec:
3633 return 'unknown'
3634 elif codec != 'none':
d816f61f 3635 return '.'.join(codec.split('.')[:4])
3636
3637 if field == 'vcodec' and f.get('acodec') == 'none':
3638 return 'images'
3639 elif field == 'acodec' and f.get('vcodec') == 'none':
3640 return ''
3641 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3642 self.Styles.SUPPRESS)
3643
591bb9d3 3644 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3645 table = [
3646 [
591bb9d3 3647 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3648 format_field(f, 'ext'),
3649 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
563e0bf8 3650 format_field(f, 'fps', '\t%d', func=round),
8130779d 3651 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
b8ed0f15 3652 format_field(f, 'audio_channels', '\t%s'),
8130779d 3653 delim,
3654 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
563e0bf8 3655 format_field(f, 'tbr', '\t%dk', func=round),
8130779d 3656 shorten_protocol_name(f.get('protocol', '')),
3657 delim,
d816f61f 3658 simplified_codec(f, 'vcodec'),
563e0bf8 3659 format_field(f, 'vbr', '\t%dk', func=round),
d816f61f 3660 simplified_codec(f, 'acodec'),
563e0bf8 3661 format_field(f, 'abr', '\t%dk', func=round),
ae61d108 3662 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
8130779d 3663 join_nonempty(
591bb9d3 3664 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
8130779d 3665 format_field(f, 'language', '[%s]'),
3666 join_nonempty(format_field(f, 'format_note'),
3667 format_field(f, 'container', ignore=(None, f.get('ext'))),
3668 delim=', '),
3669 delim=' '),
3670 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3671 header_line = self._list_format_headers(
b8ed0f15 3672 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
8130779d 3673 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3674
3675 return render_table(
3676 header_line, table, hide_empty=True,
591bb9d3 3677 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3678
3679 def render_thumbnails_table(self, info_dict):
88f23a18 3680 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3681 if not thumbnails:
8130779d 3682 return None
3683 return render_table(
ec11a9f4 3684 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
177662e0 3685 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
2412044c 3686
8130779d 3687 def render_subtitles_table(self, video_id, subtitles):
2412044c 3688 def _row(lang, formats):
49c258e1 3689 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3690 if len(set(names)) == 1:
7aee40c1 3691 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3692 return [lang, ', '.join(names), ', '.join(exts)]
3693
8130779d 3694 if not subtitles:
3695 return None
3696 return render_table(
ec11a9f4 3697 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3698 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3699 hide_empty=True)
3700
3701 def __list_table(self, video_id, name, func, *args):
3702 table = func(*args)
3703 if not table:
3704 self.to_screen(f'{video_id} has no {name}')
3705 return
3706 self.to_screen(f'[info] Available {name} for {video_id}:')
3707 self.to_stdout(table)
3708
3709 def list_formats(self, info_dict):
3710 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3711
3712 def list_thumbnails(self, info_dict):
3713 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3714
3715 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3716 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3717
dca08720
PH
3718 def urlopen(self, req):
3719 """ Start an HTTP download """
f9934b96 3720 if isinstance(req, str):
67dda517 3721 req = sanitized_Request(req)
19a41fc6 3722 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3723
3724 def print_debug_header(self):
3725 if not self.params.get('verbose'):
3726 return
49a57e70 3727
a057779d 3728 from . import _IN_CLI # Must be delayed import
3729
560738f3 3730 # These imports can be slow. So import them only as needed
3731 from .extractor.extractors import _LAZY_LOADER
3732 from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3733
49a57e70 3734 def get_encoding(stream):
2a938746 3735 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3736 if not supports_terminal_sequences(stream):
53973b4d 3737 from .utils import WINDOWS_VT_MODE # Must be imported locally
e3c7d495 3738 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3739 return ret
3740
591bb9d3 3741 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3742 locale.getpreferredencoding(),
3743 sys.getfilesystemencoding(),
591bb9d3 3744 self.get_encoding(),
3745 ', '.join(
64fa820c 3746 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
591bb9d3 3747 if stream is not None and key != 'console')
3748 )
883d4b1e 3749
3750 logger = self.params.get('logger')
3751 if logger:
3752 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3753 write_debug(encoding_str)
3754 else:
96565c7e 3755 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3756 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3757
4c88ff87 3758 source = detect_variant()
70b23409 3759 if VARIANT not in (None, 'pip'):
3760 source += '*'
36eaf303 3761 write_debug(join_nonempty(
b5e7a2e6 3762 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3763 __version__,
36eaf303 3764 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3765 '' if source == 'unknown' else f'({source})',
a057779d 3766 '' if _IN_CLI else 'API',
36eaf303 3767 delim=' '))
497074f0 3768
3769 if not _IN_CLI:
3770 write_debug(f'params: {self.params}')
3771
6e21fdd2 3772 if not _LAZY_LOADER:
3773 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3774 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3775 else:
49a57e70 3776 write_debug('Lazy loading extractors is disabled')
3ae5e797 3777 if plugin_extractors or plugin_postprocessors:
49a57e70 3778 write_debug('Plugins: %s' % [
3ae5e797 3779 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3780 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
8a82af35 3781 if self.params['compat_opts']:
3782 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
36eaf303 3783
b5e7a2e6 3784 if current_git_head():
3785 write_debug(f'Git HEAD: {current_git_head()}')
b1f94422 3786 write_debug(system_identifier())
d28b5171 3787
8913ef74 3788 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3789 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3790 if ffmpeg_features:
19a03940 3791 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3792
4c83c967 3793 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3794 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3795 exe_str = ', '.join(
2831b468 3796 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3797 ) or 'none'
49a57e70 3798 write_debug('exe versions: %s' % exe_str)
dca08720 3799
1d485a1a 3800 from .compat.compat_utils import get_package_info
9b8ee23b 3801 from .dependencies import available_dependencies
3802
3803 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3804 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3805 })) or 'none'))
2831b468 3806
97ec5bc5 3807 self._setup_opener()
dca08720
PH
3808 proxy_map = {}
3809 for handler in self._opener.handlers:
3810 if hasattr(handler, 'proxies'):
3811 proxy_map.update(handler.proxies)
49a57e70 3812 write_debug(f'Proxy map: {proxy_map}')
dca08720 3813
49a57e70 3814 # Not implemented
3815 if False and self.params.get('call_home'):
0f06bcd7 3816 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 3817 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 3818 latest_version = self.urlopen(
0f06bcd7 3819 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
3820 if version_tuple(latest_version) > version_tuple(__version__):
3821 self.report_warning(
3822 'You are using an outdated version (newest version: %s)! '
3823 'See https://yt-dl.org/update if you need help updating.' %
3824 latest_version)
3825
e344693b 3826 def _setup_opener(self):
97ec5bc5 3827 if hasattr(self, '_opener'):
3828 return
6ad14cab 3829 timeout_val = self.params.get('socket_timeout')
17bddf3e 3830 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3831
982ee69a 3832 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3833 opts_cookiefile = self.params.get('cookiefile')
3834 opts_proxy = self.params.get('proxy')
3835
982ee69a 3836 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3837
6a3f4c3f 3838 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3839 if opts_proxy is not None:
3840 if opts_proxy == '':
3841 proxies = {}
3842 else:
3843 proxies = {'http': opts_proxy, 'https': opts_proxy}
3844 else:
ac668111 3845 proxies = urllib.request.getproxies()
067aa17e 3846 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3847 if 'http' in proxies and 'https' not in proxies:
3848 proxies['https'] = proxies['http']
91410c9b 3849 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3850
3851 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3852 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3853 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3854 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3855 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3856
3857 # When passing our own FileHandler instance, build_opener won't add the
3858 # default FileHandler and allows us to disable the file protocol, which
3859 # can be used for malicious purposes (see
067aa17e 3860 # https://github.com/ytdl-org/youtube-dl/issues/8227)
ac668111 3861 file_handler = urllib.request.FileHandler()
6240b0a2
JMF
3862
3863 def file_open(*args, **kwargs):
ac668111 3864 raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3865 file_handler.file_open = file_open
3866
ac668111 3867 opener = urllib.request.build_opener(
fca6dba8 3868 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3869
dca08720
PH
3870 # Delete the default user-agent header, which would otherwise apply in
3871 # cases where our custom HTTP handler doesn't come into play
067aa17e 3872 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3873 opener.addheaders = []
3874 self._opener = opener
62fec3b2
PH
3875
3876 def encode(self, s):
3877 if isinstance(s, bytes):
3878 return s # Already encoded
3879
3880 try:
3881 return s.encode(self.get_encoding())
3882 except UnicodeEncodeError as err:
3883 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3884 raise
3885
3886 def get_encoding(self):
3887 encoding = self.params.get('encoding')
3888 if encoding is None:
3889 encoding = preferredencoding()
3890 return encoding
ec82d85a 3891
e08a85d8 3892 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3893 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3894 if overwrite is None:
3895 overwrite = self.params.get('overwrites', True)
80c03fa9 3896 if not self.params.get('writeinfojson'):
3897 return False
3898 elif not infofn:
3899 self.write_debug(f'Skipping writing {label} infojson')
3900 return False
3901 elif not self._ensure_dir_exists(infofn):
3902 return None
e08a85d8 3903 elif not overwrite and os.path.exists(infofn):
80c03fa9 3904 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3905 return 'exists'
3906
3907 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3908 try:
3909 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3910 return True
86e5f3ed 3911 except OSError:
cb96c5be 3912 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3913 return None
80c03fa9 3914
3915 def _write_description(self, label, ie_result, descfn):
3916 ''' Write description and returns True = written, False = skip, None = error '''
3917 if not self.params.get('writedescription'):
3918 return False
3919 elif not descfn:
3920 self.write_debug(f'Skipping writing {label} description')
3921 return False
3922 elif not self._ensure_dir_exists(descfn):
3923 return None
3924 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3925 self.to_screen(f'[info] {label.title()} description is already present')
3926 elif ie_result.get('description') is None:
3927 self.report_warning(f'There\'s no {label} description to write')
3928 return False
3929 else:
3930 try:
3931 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 3932 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 3933 descfile.write(ie_result['description'])
86e5f3ed 3934 except OSError:
80c03fa9 3935 self.report_error(f'Cannot write {label} description file {descfn}')
3936 return None
3937 return True
3938
3939 def _write_subtitles(self, info_dict, filename):
3940 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3941 ret = []
3942 subtitles = info_dict.get('requested_subtitles')
3943 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3944 # subtitles download errors are already managed as troubles in relevant IE
3945 # that way it will silently go on when used with unsupporting IE
3946 return ret
3947
3948 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3949 if not sub_filename_base:
3950 self.to_screen('[info] Skipping writing video subtitles')
3951 return ret
3952 for sub_lang, sub_info in subtitles.items():
3953 sub_format = sub_info['ext']
3954 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3955 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3956 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3957 if existing_sub:
80c03fa9 3958 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3959 sub_info['filepath'] = existing_sub
3960 ret.append((existing_sub, sub_filename_final))
80c03fa9 3961 continue
3962
3963 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3964 if sub_info.get('data') is not None:
3965 try:
3966 # Use newline='' to prevent conversion of newline characters
3967 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 3968 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 3969 subfile.write(sub_info['data'])
3970 sub_info['filepath'] = sub_filename
3971 ret.append((sub_filename, sub_filename_final))
3972 continue
86e5f3ed 3973 except OSError:
80c03fa9 3974 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3975 return None
3976
3977 try:
3978 sub_copy = sub_info.copy()
3979 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3980 self.dl(sub_filename, sub_copy, subtitle=True)
3981 sub_info['filepath'] = sub_filename
3982 ret.append((sub_filename, sub_filename_final))
6020e05d 3983 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3984 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3985 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3986 if not self.params.get('ignoreerrors'):
3987 self.report_error(msg)
3988 raise DownloadError(msg)
3989 self.report_warning(msg)
519804a9 3990 return ret
80c03fa9 3991
3992 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3993 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3994 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3995 thumbnails, ret = [], []
6c4fd172 3996 if write_all or self.params.get('writethumbnail', False):
0202b52a 3997 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3998 multiple = write_all and len(thumbnails) > 1
ec82d85a 3999
80c03fa9 4000 if thumb_filename_base is None:
4001 thumb_filename_base = filename
4002 if thumbnails and not thumb_filename_base:
4003 self.write_debug(f'Skipping writing {label} thumbnail')
4004 return ret
4005
dd0228ce 4006 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 4007 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 4008 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 4009 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4010 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 4011
e04938ab 4012 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4013 if existing_thumb:
aa9369a2 4014 self.to_screen('[info] %s is already present' % (
4015 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 4016 t['filepath'] = existing_thumb
4017 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 4018 else:
80c03fa9 4019 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 4020 try:
297e9952 4021 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 4022 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 4023 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 4024 shutil.copyfileobj(uf, thumbf)
80c03fa9 4025 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 4026 t['filepath'] = thumb_filename
3158150c 4027 except network_exceptions as err:
dd0228ce 4028 thumbnails.pop(idx)
80c03fa9 4029 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 4030 if ret and not write_all:
4031 break
0202b52a 4032 return ret