]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[update] Workaround #5632
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
26e63931 1import collections
31bd3925 2import contextlib
9d2ecdbc 3import datetime
c1c9a79c 4import errno
31bd3925 5import fileinput
b5ae35ee 6import functools
8222d8de 7import io
b82f815f 8import itertools
8694c600 9import json
62fec3b2 10import locale
083c9df9 11import operator
8222d8de 12import os
f8271158 13import random
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de 17import sys
21cd8fae 18import tempfile
8222d8de 19import time
67134eab 20import tokenize
8222d8de 21import traceback
524e2e4f 22import unicodedata
f9934b96 23import urllib.request
961ea474
S
24from string import ascii_letters
25
f8271158 26from .cache import Cache
14f25df2 27from .compat import compat_os_name, compat_shlex_quote
982ee69a 28from .cookies import load_cookies
f8271158 29from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
30from .downloader.rtmp import rtmpdump_version
f8271158 31from .extractor import gen_extractor_classes, get_info_extractor
fe7866d0 32from .extractor.common import UnsupportedURLIE
f8271158 33from .extractor.openload import PhantomJSwrapper
34from .minicurses import format_text
35from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
36from .postprocessor import (
37 EmbedThumbnailPP,
38 FFmpegFixupDuplicateMoovPP,
39 FFmpegFixupDurationPP,
40 FFmpegFixupM3u8PP,
41 FFmpegFixupM4aPP,
42 FFmpegFixupStretchedPP,
43 FFmpegFixupTimestampPP,
44 FFmpegMergerPP,
45 FFmpegPostProcessor,
ca9def71 46 FFmpegVideoConvertorPP,
f8271158 47 MoveFilesAfterDownloadPP,
48 get_postprocessor,
49)
ca9def71 50from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
b5e7a2e6 51from .update import REPOSITORY, current_git_head, detect_variant
8c25f81b 52from .utils import (
f8271158 53 DEFAULT_OUTTMPL,
7b2c3f47 54 IDENTITY,
f8271158 55 LINK_TEMPLATES,
8dc59305 56 MEDIA_EXTENSIONS,
f8271158 57 NO_DEFAULT,
1d485a1a 58 NUMBER_RE,
f8271158 59 OUTTMPL_TYPES,
60 POSTPROCESS_WHEN,
61 STR_FORMAT_RE_TMPL,
62 STR_FORMAT_TYPES,
63 ContentTooShortError,
64 DateRange,
65 DownloadCancelled,
66 DownloadError,
67 EntryNotInPlaylist,
68 ExistingVideoReached,
69 ExtractorError,
784320c9 70 FormatSorter,
f8271158 71 GeoRestrictedError,
72 HEADRequest,
f8271158 73 ISO3166Utils,
74 LazyList,
75 MaxDownloadsReached,
19a03940 76 Namespace,
f8271158 77 PagedList,
78 PerRequestProxyHandler,
7e88d7d7 79 PlaylistEntries,
f8271158 80 Popen,
81 PostProcessingError,
82 ReExtractInfo,
83 RejectedVideoReached,
84 SameFileError,
85 UnavailableVideoError,
693f0600 86 UserNotLive,
f8271158 87 YoutubeDLCookieProcessor,
88 YoutubeDLHandler,
89 YoutubeDLRedirectHandler,
eedb7ba5
S
90 age_restricted,
91 args_to_str,
cb794ee0 92 bug_reports_message,
ce02ed60 93 date_from_str,
da4db748 94 deprecation_warning,
ce02ed60 95 determine_ext,
b5559424 96 determine_protocol,
c0384f22 97 encode_compat_str,
ce02ed60 98 encodeFilename,
a06916d9 99 error_to_compat_str,
47cdc68e 100 escapeHTML,
590bc6f6 101 expand_path,
90137ca4 102 filter_dict,
e29663c6 103 float_or_none,
02dbf93f 104 format_bytes,
e0fd9573 105 format_decimal_suffix,
f8271158 106 format_field,
525ef922 107 formatSeconds,
fc61aff4 108 get_compatible_ext,
0bb322b9 109 get_domain,
c9969434 110 int_or_none,
732044af 111 iri_to_uri,
941e881e 112 is_path_like,
34921b43 113 join_nonempty,
ce02ed60 114 locked_file,
0647d925 115 make_archive_id,
0202b52a 116 make_dir,
dca08720 117 make_HTTPS_handler,
8b7539d2 118 merge_headers,
3158150c 119 network_exceptions,
ec11a9f4 120 number_of_digits,
cd6fc19e 121 orderedSet,
5314b521 122 orderedSet_from_options,
083c9df9 123 parse_filesize,
ce02ed60 124 preferredencoding,
eedb7ba5 125 prepend_extension,
51fb4995 126 register_socks_protocols,
3efb96a6 127 remove_terminal_sequences,
cfb56d1a 128 render_table,
eedb7ba5 129 replace_extension,
ce02ed60 130 sanitize_filename,
1bb5c511 131 sanitize_path,
dcf77cf1 132 sanitize_url,
67dda517 133 sanitized_Request,
e5660ee6 134 std_headers,
1211bb6d 135 str_or_none,
e29663c6 136 strftime_or_none,
ce02ed60 137 subtitles_filename,
819e0531 138 supports_terminal_sequences,
b1f94422 139 system_identifier,
f2ebc5c7 140 timetuple_from_msec,
732044af 141 to_high_limit_path,
324ad820 142 traverse_obj,
fc61aff4 143 try_call,
6033d980 144 try_get,
29eb5174 145 url_basename,
7d1eb38a 146 variadic,
58b1f00d 147 version_tuple,
53973b4d 148 windows_enable_vt_mode,
ce02ed60
PH
149 write_json_file,
150 write_string,
4f026faf 151)
70b23409 152from .version import RELEASE_GIT_HEAD, VARIANT, __version__
8222d8de 153
e9c0cdd3
YCH
154if compat_os_name == 'nt':
155 import ctypes
156
2459b6e1 157
86e5f3ed 158class YoutubeDL:
8222d8de
JMF
159 """YoutubeDL class.
160
161 YoutubeDL objects are the ones responsible of downloading the
162 actual video file and writing it to disk if the user has requested
163 it, among some other tasks. In most cases there should be one per
164 program. As, given a video URL, the downloader doesn't know how to
165 extract all the needed information, task that InfoExtractors do, it
166 has to pass the URL to one of them.
167
168 For this, YoutubeDL objects have a method that allows
169 InfoExtractors to be registered in a given order. When it is passed
170 a URL, the YoutubeDL object handles it to the first InfoExtractor it
171 finds that reports being able to handle it. The InfoExtractor extracts
172 all the information about the video or videos the URL refers to, and
173 YoutubeDL process the extracted information, possibly using a File
174 Downloader to download the video.
175
176 YoutubeDL objects accept a lot of parameters. In order not to saturate
177 the object constructor with arguments, it receives a dictionary of
178 options instead. These options are available through the params
179 attribute for the InfoExtractors to use. The YoutubeDL also
180 registers itself as the downloader in charge for the InfoExtractors
181 that are added to it, so this is a "mutual registration".
182
183 Available options:
184
185 username: Username for authentication purposes.
186 password: Password for authentication purposes.
180940e0 187 videopassword: Password for accessing a video.
1da50aa3
S
188 ap_mso: Adobe Pass multiple-system operator identifier.
189 ap_username: Multiple-system operator account username.
190 ap_password: Multiple-system operator account password.
8222d8de
JMF
191 usenetrc: Use netrc for authentication instead.
192 verbose: Print additional info to stdout.
193 quiet: Do not print messages to stdout.
ad8915b7 194 no_warnings: Do not print out anything for warnings.
bb66c247 195 forceprint: A dict with keys WHEN mapped to a list of templates to
196 print to stdout. The allowed keys are video or any of the
197 items in utils.POSTPROCESS_WHEN.
ca30f449 198 For compatibility, a single list is also accepted
bb66c247 199 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
200 a list of tuples with (template, filename)
8694c600 201 forcejson: Force printing info_dict as JSON.
63e0be34
PH
202 dump_single_json: Force printing the info_dict of the whole playlist
203 (or video) as a single JSON line.
c25228e5 204 force_write_download_archive: Force writing download archive regardless
205 of 'skip_download' or 'simulate'.
b7b04c78 206 simulate: Do not download the video files. If unset (or None),
207 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 208 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 209 You can also pass a function. The function takes 'ctx' as
210 argument and returns the formats to download.
211 See "build_format_selector" for an implementation
63ad4d43 212 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 213 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
214 extracting metadata even if the video is not actually
215 available for download (experimental)
0930b11f 216 format_sort: A list of fields by which to sort the video formats.
217 See "Sorting Formats" for more details.
c25228e5 218 format_sort_force: Force the given format_sort. see "Sorting Formats"
219 for more details.
08d30158 220 prefer_free_formats: Whether to prefer video formats with free containers
221 over non-free ones of same quality.
c25228e5 222 allow_multiple_video_streams: Allow multiple video streams to be merged
223 into a single file
224 allow_multiple_audio_streams: Allow multiple audio streams to be merged
225 into a single file
0ba692ac 226 check_formats Whether to test if the formats are downloadable.
9f1a1c36 227 Can be True (check all), False (check none),
228 'selected' (check selected formats),
0ba692ac 229 or None (check only if requested by extractor)
4524baf0 230 paths: Dictionary of output paths. The allowed keys are 'home'
231 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 232 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 233 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 234 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
235 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
236 restrictfilenames: Do not allow "&" and spaces in file names
237 trim_file_name: Limit length of filename (extension excluded)
4524baf0 238 windowsfilenames: Force the filenames to be windows compatible
b1940459 239 ignoreerrors: Do not stop on download/postprocessing errors.
240 Can be 'only_download' to ignore only download errors.
241 Default is 'only_download' for CLI, but False for API
26e2805c 242 skip_playlist_after_errors: Number of allowed failures until the rest of
243 the playlist is skipped
fe7866d0 244 allowed_extractors: List of regexes to match against extractor names that are allowed
0c3d0f51 245 overwrites: Overwrite all video and metadata files if True,
246 overwrite only non-video files if None
247 and don't overwrite any file if False
34488702 248 For compatibility with youtube-dl,
249 "nooverwrites" may also be used instead
c14e88f0 250 playlist_items: Specific indices of playlist to download.
75822ca7 251 playlistrandom: Download playlist items in random order.
7e9a6125 252 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
253 matchtitle: Download only matching titles.
254 rejecttitle: Reject downloads for matching titles.
8bf9319e 255 logger: Log messages to a logging.Logger instance.
17ffed18 256 logtostderr: Print everything to stderr instead of stdout.
257 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
258 writedescription: Write the video description to a .description file
259 writeinfojson: Write the video description to a .info.json file
75d43ca0 260 clean_infojson: Remove private fields from the infojson
34488702 261 getcomments: Extract video comments. This will not be written to disk
06167fbb 262 unless writeinfojson is also given
1fb07d10 263 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 264 writethumbnail: Write the thumbnail image to a file
c25228e5 265 allow_playlist_files: Whether to write playlists' description, infojson etc
266 also to disk when using the 'write*' options
ec82d85a 267 write_all_thumbnails: Write all thumbnail formats to files
732044af 268 writelink: Write an internet shortcut file, depending on the
269 current platform (.url/.webloc/.desktop)
270 writeurllink: Write a Windows internet shortcut file (.url)
271 writewebloclink: Write a macOS internet shortcut file (.webloc)
272 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 273 writesubtitles: Write the video subtitles to a file
741dd8ea 274 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 275 listsubtitles: Lists all available subtitles for the video
a504ced0 276 subtitlesformat: The format code for subtitles
c32b0aab 277 subtitleslangs: List of languages of the subtitles to download (can be regex).
278 The list may contain "all" to refer to all the available
279 subtitles. The language can be prefixed with a "-" to
62b58c09 280 exclude it from the requested languages, e.g. ['all', '-live_chat']
8222d8de
JMF
281 keepvideo: Keep the video file after post-processing
282 daterange: A DateRange object, download only if the upload_date is in the range.
283 skip_download: Skip the actual download of the video file
c35f9e72 284 cachedir: Location of the cache files in the filesystem.
a0e07d31 285 False to disable filesystem cache.
47192f92 286 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
287 age_limit: An integer representing the user's age in years.
288 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
289 min_views: An integer representing the minimum view count the video
290 must have in order to not be skipped.
291 Videos without view count information are always
292 downloaded. None for no limit.
293 max_views: An integer representing the maximum view count.
294 Videos that are more popular than that are not
295 downloaded.
296 Videos without view count information are always
297 downloaded. None for no limit.
ae103564 298 download_archive: A set, or the name of a file where all downloads are recorded.
299 Videos already present in the file are not downloaded again.
8a51f564 300 break_on_existing: Stop the download process after attempting to download a
301 file that is in the archive.
302 break_on_reject: Stop the download process when encountering a video that
303 has been filtered out.
b222c271 304 break_per_url: Whether break_on_reject and break_on_existing
305 should act on each input URL as opposed to for the entire queue
d76fa1f3 306 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8 307 cookiesfrombrowser: A tuple containing the name of the browser, the profile
9bd13fe5 308 name/path from where cookies are loaded, the name of the keyring,
309 and the container name, e.g. ('chrome', ) or
310 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
f81c62a6 311 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
312 support RFC 5746 secure renegotiation
f59f5ef8 313 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 314 client_certificate: Path to client certificate file in PEM format. May include the private key
315 client_certificate_key: Path to private key file for client certificate
316 client_certificate_password: Password for client certificate private key, if encrypted.
317 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0 318 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
c6e07cf1 319 (Only supported by some extractors)
8b7539d2 320 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 321 proxy: URL of the proxy server to use
38cce791 322 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 323 on geo-restricted sites.
e344693b 324 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
325 bidi_workaround: Work around buggy terminals without bidirectional text
326 support, using fridibi
a0ddb8a2 327 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
328 default_search: Prepend this string if an input url is not valid.
329 'auto' for elaborate guessing
62fec3b2 330 encoding: Use this encoding instead of the system-specified.
134c913c 331 extract_flat: Whether to resolve and process url_results further
332 * False: Always process (default)
333 * True: Never process
334 * 'in_playlist': Do not process inside playlist/multi_video
335 * 'discard': Always process, but don't return the result
336 from inside playlist/multi_video
337 * 'discard_in_playlist': Same as "discard", but only for
338 playlists (not multi_video)
f2ebc5c7 339 wait_for_video: If given, wait for scheduled streams to become available.
340 The value should be a tuple containing the range
341 (min_secs, max_secs) to wait between retries
4f026faf 342 postprocessors: A list of dictionaries, each with an entry
71b640cc 343 * key: The name of the postprocessor. See
7a5c1cfe 344 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 345 * when: When to run the postprocessor. Allowed values are
346 the entries of utils.POSTPROCESS_WHEN
56d868db 347 Assumed to be 'post_process' if not given
71b640cc
PH
348 progress_hooks: A list of functions that get called on download
349 progress, with a dictionary with the entries
5cda4eda 350 * status: One of "downloading", "error", or "finished".
ee69b99a 351 Check this first and ignore unknown values.
3ba7740d 352 * info_dict: The extracted info_dict
71b640cc 353
5cda4eda 354 If status is one of "downloading", or "finished", the
ee69b99a
PH
355 following properties may also be present:
356 * filename: The final filename (always present)
5cda4eda 357 * tmpfilename: The filename we're currently writing to
71b640cc
PH
358 * downloaded_bytes: Bytes on disk
359 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
360 * total_bytes_estimate: Guess of the eventual file size,
361 None if unavailable.
362 * elapsed: The number of seconds since download started.
71b640cc
PH
363 * eta: The estimated time in seconds, None if unknown
364 * speed: The download speed in bytes/second, None if
365 unknown
5cda4eda
PH
366 * fragment_index: The counter of the currently
367 downloaded video fragment.
368 * fragment_count: The number of fragments (= individual
369 files that will be merged)
71b640cc
PH
370
371 Progress hooks are guaranteed to be called at least once
372 (with status "finished") if the download is successful.
819e0531 373 postprocessor_hooks: A list of functions that get called on postprocessing
374 progress, with a dictionary with the entries
375 * status: One of "started", "processing", or "finished".
376 Check this first and ignore unknown values.
377 * postprocessor: Name of the postprocessor
378 * info_dict: The extracted info_dict
379
380 Progress hooks are guaranteed to be called at least twice
381 (with status "started" and "finished") if the processing is successful.
fc61aff4 382 merge_output_format: "/" separated list of extensions to use when merging formats.
6b591b29 383 final_ext: Expected final extension; used to detect when the file was
59a7a13e 384 already downloaded and converted
6271f1ca
PH
385 fixup: Automatically correct known faults of the file.
386 One of:
387 - "never": do nothing
388 - "warn": only emit a warning
389 - "detect_or_warn": check whether we can do anything
62cd676c 390 about it, warn otherwise (default)
504f20dd 391 source_address: Client-side IP address to bind to.
1cf376f5 392 sleep_interval_requests: Number of seconds to sleep between requests
393 during extraction
7aa589a5
S
394 sleep_interval: Number of seconds to sleep before each download when
395 used alone or a lower bound of a range for randomized
396 sleep before each download (minimum possible number
397 of seconds to sleep) when used along with
398 max_sleep_interval.
399 max_sleep_interval:Upper bound of a range for randomized sleep before each
400 download (maximum possible number of seconds to sleep).
401 Must only be used along with sleep_interval.
402 Actual sleep time will be a random float from range
403 [sleep_interval; max_sleep_interval].
1cf376f5 404 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
405 listformats: Print an overview of available video formats and exit.
406 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 407 match_filter: A function that gets called for every video with the signature
408 (info_dict, *, incomplete: bool) -> Optional[str]
409 For backward compatibility with youtube-dl, the signature
410 (info_dict) -> Optional[str] is also allowed.
411 - If it returns a message, the video is ignored.
412 - If it returns None, the video is downloaded.
413 - If it returns utils.NO_DEFAULT, the user is interactively
414 asked whether to download the video.
347de493 415 match_filter_func in utils.py is one example for this.
7e5db8c9 416 no_color: Do not emit color codes in output.
0a840f58 417 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 418 HTTP header
0a840f58 419 geo_bypass_country:
773f291d
S
420 Two-letter ISO 3166-2 country code that will be used for
421 explicit geographic restriction bypassing via faking
504f20dd 422 X-Forwarded-For HTTP header
5f95927a
S
423 geo_bypass_ip_block:
424 IP range in CIDR notation that will be used similarly to
504f20dd 425 geo_bypass_country
52a8a1e1 426 external_downloader: A dictionary of protocol keys and the executable of the
427 external downloader to use for it. The allowed protocols
428 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
429 Set the value to 'native' to use the native downloader
53ed7066 430 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 431 The following options do not work when used through the API:
b5ae35ee 432 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 433 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 434 Refer __init__.py for their implementation
819e0531 435 progress_template: Dictionary of templates for progress outputs.
436 Allowed keys are 'download', 'postprocess',
437 'download-title' (console title) and 'postprocess-title'.
438 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 439 retry_sleep_functions: Dictionary of functions that takes the number of attempts
440 as argument and returns the time to sleep in seconds.
441 Allowed keys are 'http', 'fragment', 'file_access'
0f446365
SW
442 download_ranges: A callback function that gets called for every video with
443 the signature (info_dict, ydl) -> Iterable[Section].
444 Only the returned sections will be downloaded.
445 Each Section is a dict with the following keys:
5ec1b6b7 446 * start_time: Start time of the section in seconds
447 * end_time: End time of the section in seconds
448 * title: Section title (Optional)
449 * index: Section number (Optional)
0f446365 450 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
a7dc6a89 451 noprogress: Do not print the progress bar
a831c2ea 452 live_from_start: Whether to download livestreams videos from the start
fe7e0c98 453
8222d8de 454 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 455 the downloader (see yt_dlp/downloader/common.py):
51d9739f 456 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654 457 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
a7dc6a89 458 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 459 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
460
461 The following options are used by the post processors:
c0b7d117
S
462 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
463 to the binary or its containing directory.
43820c03 464 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 465 and a list of additional command-line arguments for the
466 postprocessor/executable. The dict can also have "PP+EXE" keys
467 which are used when the given exe is used by the given PP.
468 Use 'default' as the name for arguments to passed to all PP
469 For compatibility with youtube-dl, a single list of args
470 can also be used
e409895f 471
472 The following options are used by the extractors:
62bff2c1 473 extractor_retries: Number of times to retry for known errors
474 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 475 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 476 discontinuities such as ad breaks (default: False)
5d3a0e79 477 extractor_args: A dictionary of arguments to be passed to the extractors.
478 See "EXTRACTOR ARGUMENTS" for details.
62b58c09 479 E.g. {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 480 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 481
482 The following options are deprecated and may be removed in the future:
483
fe7866d0 484 force_generic_extractor: Force downloader to use the generic extractor
485 - Use allowed_extractors = ['generic', 'default']
7e9a6125 486 playliststart: - Use playlist_items
487 Playlist item to start at.
488 playlistend: - Use playlist_items
489 Playlist item to end at.
490 playlistreverse: - Use playlist_items
491 Download playlist items in reverse order.
1890fc63 492 forceurl: - Use forceprint
493 Force printing final URL.
494 forcetitle: - Use forceprint
495 Force printing title.
496 forceid: - Use forceprint
497 Force printing ID.
498 forcethumbnail: - Use forceprint
499 Force printing thumbnail URL.
500 forcedescription: - Use forceprint
501 Force printing description.
502 forcefilename: - Use forceprint
503 Force printing final filename.
504 forceduration: - Use forceprint
505 Force printing duration.
506 allsubtitles: - Use subtitleslangs = ['all']
507 Downloads all the subtitles of the video
508 (requires writesubtitles or writeautomaticsub)
509 include_ads: - Doesn't work
510 Download ads as well
511 call_home: - Not implemented
512 Boolean, true iff we are allowed to contact the
513 yt-dlp servers for debugging.
514 post_hooks: - Register a custom postprocessor
515 A list of functions that get called as the final step
516 for each video file, after all postprocessors have been
517 called. The filename will be passed as the only argument.
518 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
519 Use the native HLS downloader instead of ffmpeg/avconv
520 if True, otherwise use ffmpeg/avconv if False, otherwise
521 use downloader suggested by extractor if None.
522 prefer_ffmpeg: - avconv support is deprecated
523 If False, use avconv instead of ffmpeg if both are available,
524 otherwise prefer ffmpeg.
525 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 526 If True (default), DASH manifests and related
62bff2c1 527 data will be downloaded and processed by extractor.
528 You can reduce network I/O by disabling it if you don't
529 care about DASH. (only for youtube)
1890fc63 530 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 531 If True (default), HLS manifests and related
62bff2c1 532 data will be downloaded and processed by extractor.
533 You can reduce network I/O by disabling it if you don't
534 care about HLS. (only for youtube)
8222d8de
JMF
535 """
536
86e5f3ed 537 _NUMERIC_FIELDS = {
b8ed0f15 538 'width', 'height', 'asr', 'audio_channels', 'fps',
539 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
e6f21b3d 540 'timestamp', 'release_timestamp',
c9969434
S
541 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
542 'average_rating', 'comment_count', 'age_limit',
543 'start_time', 'end_time',
544 'chapter_number', 'season_number', 'episode_number',
545 'track_number', 'disc_number', 'release_year',
86e5f3ed 546 }
c9969434 547
6db9c4d5 548 _format_fields = {
549 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 550 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
105bfd90 551 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
d5d1df8a 552 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
6db9c4d5 553 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
554 'preference', 'language', 'language_preference', 'quality', 'source_preference',
555 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
556 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
557 }
48ee10ee 558 _format_selection_exts = {
8dc59305 559 'audio': set(MEDIA_EXTENSIONS.common_audio),
560 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
561 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
48ee10ee 562 }
563
3511266b 564 def __init__(self, params=None, auto_init=True):
883d4b1e 565 """Create a FileDownloader object with the given options.
566 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 567 Set to 'no_verbose_header' to not print the header
883d4b1e 568 """
e9f9a10f
JMF
569 if params is None:
570 params = {}
592b7485 571 self.params = params
8b7491c8 572 self._ies = {}
56c73665 573 self._ies_instances = {}
1e43a6f7 574 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 575 self._printed_messages = set()
1cf376f5 576 self._first_webpage_request = True
ab8e5e51 577 self._post_hooks = []
933605d7 578 self._progress_hooks = []
819e0531 579 self._postprocessor_hooks = []
8222d8de
JMF
580 self._download_retcode = 0
581 self._num_downloads = 0
9c906919 582 self._num_videos = 0
592b7485 583 self._playlist_level = 0
584 self._playlist_urls = set()
a0e07d31 585 self.cache = Cache(self)
34308b30 586
819e0531 587 windows_enable_vt_mode()
591bb9d3 588 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
589 self._out_files = Namespace(
590 out=stdout,
591 error=sys.stderr,
592 screen=sys.stderr if self.params.get('quiet') else stdout,
593 console=None if compat_os_name == 'nt' else next(
cf4f42cb 594 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 595 )
596 self._allow_colors = Namespace(**{
597 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
64fa820c 598 for type_, stream in self._out_files.items_ if type_ != 'console'
591bb9d3 599 })
819e0531 600
6929b41a 601 # The code is left like this to be reused for future deprecations
602 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
eff42759 603 current_version = sys.version_info[:2]
604 if current_version < MIN_RECOMMENDED:
9d339c41 605 msg = ('Support for Python version %d.%d has been deprecated. '
24093d52 606 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
c6e07cf1 607 '\n You will no longer receive updates on this version')
eff42759 608 if current_version < MIN_SUPPORTED:
609 msg = 'Python version %d.%d is no longer supported'
610 self.deprecation_warning(
611 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
a61f4b28 612
88acdbc2 613 if self.params.get('allow_unplayable_formats'):
614 self.report_warning(
ec11a9f4 615 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 616 'This is a developer option intended for debugging. \n'
617 ' If you experience any issues while using this option, '
ec11a9f4 618 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 619
497074f0 620 if self.params.get('bidi_workaround', False):
621 try:
622 import pty
623 master, slave = pty.openpty()
624 width = shutil.get_terminal_size().columns
625 width_args = [] if width is None else ['-w', str(width)]
626 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
627 try:
628 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
629 except OSError:
630 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
631 self._output_channel = os.fdopen(master, 'rb')
632 except OSError as ose:
633 if ose.errno == errno.ENOENT:
634 self.report_warning(
635 'Could not find fribidi executable, ignoring --bidi-workaround. '
636 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
637 else:
638 raise
639
640 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
641 if auto_init and auto_init != 'no_verbose_header':
642 self.print_debug_header()
643
be5df5ee
S
644 def check_deprecated(param, option, suggestion):
645 if self.params.get(param) is not None:
86e5f3ed 646 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
647 return True
648 return False
649
650 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
651 if self.params.get('geo_verification_proxy') is None:
652 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
653
0d1bb027 654 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
655 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 656 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 657
49a57e70 658 for msg in self.params.get('_warnings', []):
0d1bb027 659 self.report_warning(msg)
ee8dd27a 660 for msg in self.params.get('_deprecation_warnings', []):
da4db748 661 self.deprecated_feature(msg)
0d1bb027 662
8a82af35 663 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 664 self.params['listformats_table'] = False
665
b5ae35ee 666 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 667 # nooverwrites was unnecessarily changed to overwrites
668 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
669 # This ensures compatibility with both keys
670 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 671 elif self.params.get('overwrites') is None:
672 self.params.pop('overwrites', None)
b868936c 673 else:
674 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 675
e4221b70 676 if self.params.get('simulate') is None and any((
677 self.params.get('list_thumbnails'),
678 self.params.get('listformats'),
679 self.params.get('listsubtitles'),
680 )):
681 self.params['simulate'] = 'list_only'
682
455a15e2 683 self.params.setdefault('forceprint', {})
684 self.params.setdefault('print_to_file', {})
bb66c247 685
686 # Compatibility with older syntax
ca30f449 687 if not isinstance(params['forceprint'], dict):
455a15e2 688 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 689
97ec5bc5 690 if auto_init:
97ec5bc5 691 self.add_default_info_extractors()
692
3089bc74
S
693 if (sys.platform != 'win32'
694 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 695 and not self.params.get('restrictfilenames', False)):
e9137224 696 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 697 self.report_warning(
6febd1c1 698 'Assuming --restrict-filenames since file system encoding '
1b725173 699 'cannot encode all characters. '
6febd1c1 700 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 701 self.params['restrictfilenames'] = True
34308b30 702
bf1824b3 703 self._parse_outtmpl()
486dd09e 704
187986a8 705 # Creating format selector here allows us to catch syntax errors before the extraction
706 self.format_selector = (
fa9f30b8 707 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 708 else self.params['format'] if callable(self.params['format'])
187986a8 709 else self.build_format_selector(self.params['format']))
710
8b7539d2 711 # Set http_headers defaults according to std_headers
712 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
713
013b50b7 714 hooks = {
715 'post_hooks': self.add_post_hook,
716 'progress_hooks': self.add_progress_hook,
717 'postprocessor_hooks': self.add_postprocessor_hook,
718 }
719 for opt, fn in hooks.items():
720 for ph in self.params.get(opt, []):
721 fn(ph)
71b640cc 722
5bfc8bee 723 for pp_def_raw in self.params.get('postprocessors', []):
724 pp_def = dict(pp_def_raw)
725 when = pp_def.pop('when', 'post_process')
726 self.add_post_processor(
f9934b96 727 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 728 when=when)
729
97ec5bc5 730 self._setup_opener()
51fb4995
YCH
731 register_socks_protocols()
732
ed39cac5 733 def preload_download_archive(fn):
734 """Preload the archive, if any is specified"""
ae103564 735 archive = set()
ed39cac5 736 if fn is None:
ae103564 737 return archive
941e881e 738 elif not is_path_like(fn):
ae103564 739 return fn
740
49a57e70 741 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 742 try:
743 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
744 for line in archive_file:
ae103564 745 archive.add(line.strip())
86e5f3ed 746 except OSError as ioe:
ed39cac5 747 if ioe.errno != errno.ENOENT:
748 raise
ae103564 749 return archive
ed39cac5 750
ae103564 751 self.archive = preload_download_archive(self.params.get('download_archive'))
ed39cac5 752
7d4111ed
PH
753 def warn_if_short_id(self, argv):
754 # short YouTube ID starting with dash?
755 idxs = [
756 i for i, a in enumerate(argv)
757 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
758 if idxs:
759 correct_argv = (
7a5c1cfe 760 ['yt-dlp']
3089bc74
S
761 + [a for i, a in enumerate(argv) if i not in idxs]
762 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
763 )
764 self.report_warning(
765 'Long argument string detected. '
49a57e70 766 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
767 args_to_str(correct_argv))
768
8222d8de
JMF
769 def add_info_extractor(self, ie):
770 """Add an InfoExtractor object to the end of the list."""
8b7491c8 771 ie_key = ie.ie_key()
772 self._ies[ie_key] = ie
e52d7f85 773 if not isinstance(ie, type):
8b7491c8 774 self._ies_instances[ie_key] = ie
e52d7f85 775 ie.set_downloader(self)
8222d8de 776
56c73665
JMF
777 def get_info_extractor(self, ie_key):
778 """
779 Get an instance of an IE with name ie_key, it will try to get one from
780 the _ies list, if there's no instance it will create a new one and add
781 it to the extractor list.
782 """
783 ie = self._ies_instances.get(ie_key)
784 if ie is None:
785 ie = get_info_extractor(ie_key)()
786 self.add_info_extractor(ie)
787 return ie
788
023fa8c4
JMF
789 def add_default_info_extractors(self):
790 """
791 Add the InfoExtractors returned by gen_extractors to the end of the list
792 """
fe7866d0 793 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
794 all_ies['end'] = UnsupportedURLIE()
795 try:
796 ie_names = orderedSet_from_options(
797 self.params.get('allowed_extractors', ['default']), {
798 'all': list(all_ies),
799 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
800 }, use_regex=True)
801 except re.error as e:
802 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
803 for name in ie_names:
804 self.add_info_extractor(all_ies[name])
805 self.write_debug(f'Loaded {len(ie_names)} extractors')
023fa8c4 806
56d868db 807 def add_post_processor(self, pp, when='post_process'):
8222d8de 808 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 809 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 810 self._pps[when].append(pp)
8222d8de
JMF
811 pp.set_downloader(self)
812
ab8e5e51
AM
813 def add_post_hook(self, ph):
814 """Add the post hook"""
815 self._post_hooks.append(ph)
816
933605d7 817 def add_progress_hook(self, ph):
819e0531 818 """Add the download progress hook"""
933605d7 819 self._progress_hooks.append(ph)
8ab470f1 820
819e0531 821 def add_postprocessor_hook(self, ph):
822 """Add the postprocessing progress hook"""
823 self._postprocessor_hooks.append(ph)
5bfc8bee 824 for pps in self._pps.values():
825 for pp in pps:
826 pp.add_progress_hook(ph)
819e0531 827
1c088fa8 828 def _bidi_workaround(self, message):
5d681e96 829 if not hasattr(self, '_output_channel'):
1c088fa8
PH
830 return message
831
5d681e96 832 assert hasattr(self, '_output_process')
14f25df2 833 assert isinstance(message, str)
6febd1c1 834 line_count = message.count('\n') + 1
0f06bcd7 835 self._output_process.stdin.write((message + '\n').encode())
5d681e96 836 self._output_process.stdin.flush()
0f06bcd7 837 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 838 for _ in range(line_count))
6febd1c1 839 return res[:-len('\n')]
1c088fa8 840
b35496d8 841 def _write_string(self, message, out=None, only_once=False):
842 if only_once:
843 if message in self._printed_messages:
844 return
845 self._printed_messages.add(message)
846 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 847
cf4f42cb 848 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 849 """Print message to stdout"""
cf4f42cb 850 if quiet is not None:
da4db748 851 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
852 'Use "YoutubeDL.to_screen" instead')
8a82af35 853 if skip_eol is not False:
da4db748 854 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
855 'Use "YoutubeDL.to_screen" instead')
0bf9dc1e 856 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 857
dfea94f8 858 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
cf4f42cb 859 """Print message to screen if not in quiet mode"""
8bf9319e 860 if self.params.get('logger'):
43afe285 861 self.params['logger'].debug(message)
cf4f42cb 862 return
863 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
864 return
865 self._write_string(
866 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
dfea94f8 867 self._out_files.screen, only_once=only_once)
8222d8de 868
b35496d8 869 def to_stderr(self, message, only_once=False):
0760b0a7 870 """Print message to stderr"""
14f25df2 871 assert isinstance(message, str)
8bf9319e 872 if self.params.get('logger'):
43afe285
IB
873 self.params['logger'].error(message)
874 else:
5792c950 875 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 876
877 def _send_console_code(self, code):
591bb9d3 878 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 879 return
591bb9d3 880 self._write_string(code, self._out_files.console)
8222d8de 881
1e5b9a95
PH
882 def to_console_title(self, message):
883 if not self.params.get('consoletitle', False):
884 return
3efb96a6 885 message = remove_terminal_sequences(message)
4bede0d8
C
886 if compat_os_name == 'nt':
887 if ctypes.windll.kernel32.GetConsoleWindow():
888 # c_wchar_p() might not be necessary if `message` is
889 # already of type unicode()
890 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 891 else:
892 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 893
bdde425c 894 def save_console_title(self):
cf4f42cb 895 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 896 return
592b7485 897 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
898
899 def restore_console_title(self):
cf4f42cb 900 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 901 return
592b7485 902 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
903
904 def __enter__(self):
905 self.save_console_title()
906 return self
907
908 def __exit__(self, *args):
909 self.restore_console_title()
f89197d7 910
dca08720 911 if self.params.get('cookiefile') is not None:
1bab3437 912 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 913
fa9f30b8 914 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
915 """Determine action to take when a download problem appears.
916
917 Depending on if the downloader has been configured to ignore
918 download errors or not, this method may throw an exception or
919 not when errors are found, after printing the message.
920
fa9f30b8 921 @param tb If given, is additional traceback information
922 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
923 """
924 if message is not None:
925 self.to_stderr(message)
926 if self.params.get('verbose'):
927 if tb is None:
928 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 929 tb = ''
8222d8de 930 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 931 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 932 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
933 else:
934 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 935 tb = ''.join(tb_data)
c19bc311 936 if tb:
937 self.to_stderr(tb)
fa9f30b8 938 if not is_error:
939 return
b1940459 940 if not self.params.get('ignoreerrors'):
8222d8de
JMF
941 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
942 exc_info = sys.exc_info()[1].exc_info
943 else:
944 exc_info = sys.exc_info()
945 raise DownloadError(message, exc_info)
946 self._download_retcode = 1
947
19a03940 948 Styles = Namespace(
949 HEADERS='yellow',
950 EMPHASIS='light blue',
492272fe 951 FILENAME='green',
19a03940 952 ID='green',
953 DELIM='blue',
954 ERROR='red',
955 WARNING='yellow',
956 SUPPRESS='light black',
957 )
ec11a9f4 958
7578d77d 959 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 960 text = str(text)
ec11a9f4 961 if test_encoding:
962 original_text = text
5c104538 963 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
964 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 965 text = text.encode(encoding, 'ignore').decode(encoding)
966 if fallback is not None and text != original_text:
967 text = fallback
7578d77d 968 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 969
591bb9d3 970 def _format_out(self, *args, **kwargs):
971 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
972
ec11a9f4 973 def _format_screen(self, *args, **kwargs):
591bb9d3 974 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 975
976 def _format_err(self, *args, **kwargs):
591bb9d3 977 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 978
c84aeac6 979 def report_warning(self, message, only_once=False):
8222d8de
JMF
980 '''
981 Print the message to stderr, it will be prefixed with 'WARNING:'
982 If stderr is a tty file the 'WARNING:' will be colored
983 '''
6d07ce01
JMF
984 if self.params.get('logger') is not None:
985 self.params['logger'].warning(message)
8222d8de 986 else:
ad8915b7
PH
987 if self.params.get('no_warnings'):
988 return
ec11a9f4 989 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 990
da4db748 991 def deprecation_warning(self, message, *, stacklevel=0):
992 deprecation_warning(
993 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
994
995 def deprecated_feature(self, message):
ee8dd27a 996 if self.params.get('logger') is not None:
da4db748 997 self.params['logger'].warning(f'Deprecated Feature: {message}')
998 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
ee8dd27a 999
fa9f30b8 1000 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
1001 '''
1002 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1003 in red if stderr is a tty file.
1004 '''
fa9f30b8 1005 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 1006
b35496d8 1007 def write_debug(self, message, only_once=False):
0760b0a7 1008 '''Log debug message or Print message to stderr'''
1009 if not self.params.get('verbose', False):
1010 return
8a82af35 1011 message = f'[debug] {message}'
0760b0a7 1012 if self.params.get('logger'):
1013 self.params['logger'].debug(message)
1014 else:
b35496d8 1015 self.to_stderr(message, only_once)
0760b0a7 1016
8222d8de
JMF
1017 def report_file_already_downloaded(self, file_name):
1018 """Report file has already been fully downloaded."""
1019 try:
6febd1c1 1020 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 1021 except UnicodeEncodeError:
6febd1c1 1022 self.to_screen('[download] The file has already been downloaded')
8222d8de 1023
0c3d0f51 1024 def report_file_delete(self, file_name):
1025 """Report that existing file will be deleted."""
1026 try:
c25228e5 1027 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 1028 except UnicodeEncodeError:
c25228e5 1029 self.to_screen('Deleting existing file')
0c3d0f51 1030
319b6059 1031 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 1032 has_drm = info.get('_has_drm')
319b6059 1033 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1034 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1035 if forced or not ignored:
1151c407 1036 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1037 expected=has_drm or ignored or expected)
88acdbc2 1038 else:
1039 self.report_warning(msg)
1040
de6000d9 1041 def parse_outtmpl(self):
bf1824b3 1042 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1043 self._parse_outtmpl()
1044 return self.params['outtmpl']
1045
1046 def _parse_outtmpl(self):
7b2c3f47 1047 sanitize = IDENTITY
bf1824b3 1048 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1049 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1050
1051 outtmpl = self.params.setdefault('outtmpl', {})
1052 if not isinstance(outtmpl, dict):
1053 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1054 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1055
21cd8fae 1056 def get_output_path(self, dir_type='', filename=None):
1057 paths = self.params.get('paths', {})
d2c8aadf 1058 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
21cd8fae 1059 path = os.path.join(
1060 expand_path(paths.get('home', '').strip()),
1061 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1062 filename or '')
21cd8fae 1063 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1064
76a264ac 1065 @staticmethod
901130bb 1066 def _outtmpl_expandpath(outtmpl):
1067 # expand_path translates '%%' into '%' and '$$' into '$'
1068 # correspondingly that is not what we want since we need to keep
1069 # '%%' intact for template dict substitution step. Working around
1070 # with boundary-alike separator hack.
efa944f4 1071 sep = ''.join(random.choices(ascii_letters, k=32))
86e5f3ed 1072 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1073
1074 # outtmpl should be expand_path'ed before template dict substitution
1075 # because meta fields may contain env variables we don't want to
62b58c09 1076 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
901130bb 1077 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1078 return expand_path(outtmpl).replace(sep, '')
1079
1080 @staticmethod
1081 def escape_outtmpl(outtmpl):
1082 ''' Escape any remaining strings like %s, %abc% etc. '''
1083 return re.sub(
1084 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1085 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1086 outtmpl)
1087
1088 @classmethod
1089 def validate_outtmpl(cls, outtmpl):
76a264ac 1090 ''' @return None or Exception object '''
7d1eb38a 1091 outtmpl = re.sub(
47cdc68e 1092 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
7d1eb38a 1093 lambda mobj: f'{mobj.group(0)[:-1]}s',
1094 cls._outtmpl_expandpath(outtmpl))
76a264ac 1095 try:
7d1eb38a 1096 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1097 return None
1098 except ValueError as err:
1099 return err
1100
03b4de72 1101 @staticmethod
1102 def _copy_infodict(info_dict):
1103 info_dict = dict(info_dict)
09b49e1f 1104 info_dict.pop('__postprocessors', None)
415f8d51 1105 info_dict.pop('__pending_error', None)
03b4de72 1106 return info_dict
1107
e0fd9573 1108 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1109 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1110 @param sanitize Whether to sanitize the output as a filename.
1111 For backward compatibility, a function can also be passed
1112 """
1113
6e84b215 1114 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1115
03b4de72 1116 info_dict = self._copy_infodict(info_dict)
752cda38 1117 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1118 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1119 if info_dict.get('duration', None) is not None
1120 else None)
1d485a1a 1121 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1122 info_dict['video_autonumber'] = self._num_videos
752cda38 1123 if info_dict.get('resolution') is None:
1124 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1125
e6f21b3d 1126 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1127 # of %(field)s to %(field)0Nd for backward compatibility
1128 field_size_compat_map = {
0a5a191a 1129 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1130 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1131 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1132 }
752cda38 1133
385a27fa 1134 TMPL_DICT = {}
47cdc68e 1135 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
385a27fa 1136 MATH_FUNCTIONS = {
1137 '+': float.__add__,
1138 '-': float.__sub__,
1139 }
e625be0d 1140 # Field is of the form key1.key2...
07a1250e 1141 # where keys (except first) can be string, int, slice or "{field, ...}"
1142 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1143 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1144 'inner': FIELD_INNER_RE,
1145 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1146 }
1d485a1a 1147 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1148 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1d485a1a 1149 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
e625be0d 1150 (?P<negate>-)?
1d485a1a 1151 (?P<fields>{FIELD_RE})
1152 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1153 (?:>(?P<strf_format>.+?))?
34baa9fd 1154 (?P<remaining>
1155 (?P<alternate>(?<!\\),[^|&)]+)?
1156 (?:&(?P<replacement>.*?))?
1157 (?:\|(?P<default>.*?))?
1d485a1a 1158 )$''')
752cda38 1159
07a1250e 1160 def _traverse_infodict(fields):
1161 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1162 for f in ([x] if x.startswith('{') else x.split('.'))]
1163 for i in (0, -1):
1164 if fields and not fields[i]:
1165 fields.pop(i)
1166
1167 for i, f in enumerate(fields):
1168 if not f.startswith('{'):
1169 continue
1170 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1171 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1172
1173 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
76a264ac 1174
752cda38 1175 def get_value(mdict):
1176 # Object traversal
2b8a2973 1177 value = _traverse_infodict(mdict['fields'])
752cda38 1178 # Negative
1179 if mdict['negate']:
1180 value = float_or_none(value)
1181 if value is not None:
1182 value *= -1
1183 # Do maths
385a27fa 1184 offset_key = mdict['maths']
1185 if offset_key:
752cda38 1186 value = float_or_none(value)
1187 operator = None
385a27fa 1188 while offset_key:
1189 item = re.match(
1190 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1191 offset_key).group(0)
1192 offset_key = offset_key[len(item):]
1193 if operator is None:
752cda38 1194 operator = MATH_FUNCTIONS[item]
385a27fa 1195 continue
1196 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1197 offset = float_or_none(item)
1198 if offset is None:
2b8a2973 1199 offset = float_or_none(_traverse_infodict(item))
385a27fa 1200 try:
1201 value = operator(value, multiplier * offset)
1202 except (TypeError, ZeroDivisionError):
1203 return None
1204 operator = None
752cda38 1205 # Datetime formatting
1206 if mdict['strf_format']:
7c37ff97 1207 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1208
a6bcaf71 1209 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1210 if sanitize and value == '':
1211 value = None
752cda38 1212 return value
1213
b868936c 1214 na = self.params.get('outtmpl_na_placeholder', 'NA')
1215
e0fd9573 1216 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1217 return sanitize_filename(str(value), restricted=restricted, is_id=(
1218 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1219 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1220 else NO_DEFAULT))
e0fd9573 1221
1222 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1223 sanitize = bool(sanitize)
1224
6e84b215 1225 def _dumpjson_default(obj):
1226 if isinstance(obj, (set, LazyList)):
1227 return list(obj)
adbc4ec4 1228 return repr(obj)
6e84b215 1229
752cda38 1230 def create_key(outer_mobj):
1231 if not outer_mobj.group('has_key'):
b836dc94 1232 return outer_mobj.group(0)
752cda38 1233 key = outer_mobj.group('key')
752cda38 1234 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1235 initial_field = mobj.group('fields') if mobj else ''
e978789f 1236 value, replacement, default = None, None, na
7c37ff97 1237 while mobj:
e625be0d 1238 mobj = mobj.groupdict()
7c37ff97 1239 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1240 value = get_value(mobj)
e978789f 1241 replacement = mobj['replacement']
7c37ff97 1242 if value is None and mobj['alternate']:
34baa9fd 1243 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1244 else:
1245 break
752cda38 1246
b868936c 1247 fmt = outer_mobj.group('format')
752cda38 1248 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1249 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1250
e978789f 1251 value = default if value is None else value if replacement is None else replacement
752cda38 1252
4476d2c7 1253 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1254 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1255 if fmt[-1] == 'l': # list
4476d2c7 1256 delim = '\n' if '#' in flags else ', '
9e907ebd 1257 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1258 elif fmt[-1] == 'j': # json
deae7c17 1259 value, fmt = json.dumps(
1260 value, default=_dumpjson_default,
9b9dad11 1261 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
47cdc68e 1262 elif fmt[-1] == 'h': # html
deae7c17 1263 value, fmt = escapeHTML(str(value)), str_fmt
524e2e4f 1264 elif fmt[-1] == 'q': # quoted
4476d2c7 1265 value = map(str, variadic(value) if '#' in flags else [value])
1266 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1267 elif fmt[-1] == 'B': # bytes
0f06bcd7 1268 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1269 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1270 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1271 value, fmt = unicodedata.normalize(
1272 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1273 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1274 value), str_fmt
e0fd9573 1275 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1276 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1277 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1278 factor=1024 if '#' in flags else 1000)
37893bb0 1279 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1280 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1281 elif fmt[-1] == 'c':
524e2e4f 1282 if value:
1283 value = str(value)[0]
76a264ac 1284 else:
524e2e4f 1285 fmt = str_fmt
76a264ac 1286 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1287 value = float_or_none(value)
752cda38 1288 if value is None:
1289 value, fmt = default, 's'
901130bb 1290
752cda38 1291 if sanitize:
1292 if fmt[-1] == 'r':
1293 # If value is an object, sanitize might convert it to a string
1294 # So we convert it to repr first
7d1eb38a 1295 value, fmt = repr(value), str_fmt
639f1cea 1296 if fmt[-1] in 'csr':
e0fd9573 1297 value = sanitizer(initial_field, value)
901130bb 1298
b868936c 1299 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1300 TMPL_DICT[key] = value
b868936c 1301 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1302
385a27fa 1303 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1304
819e0531 1305 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1306 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1307 return self.escape_outtmpl(outtmpl) % info_dict
1308
5127e92a 1309 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1310 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1311 if outtmpl is None:
bf1824b3 1312 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1313 try:
5127e92a 1314 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1315 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1316 if not filename:
1317 return None
15da37c7 1318
5127e92a 1319 if tmpl_type in ('', 'temp'):
6a0546e3 1320 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1321 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1322 filename = replace_extension(filename, ext, final_ext)
5127e92a 1323 elif tmpl_type:
6a0546e3 1324 force_ext = OUTTMPL_TYPES[tmpl_type]
1325 if force_ext:
1326 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1327
bdc3fd2f
U
1328 # https://github.com/blackjack4494/youtube-dlc/issues/85
1329 trim_file_name = self.params.get('trim_file_name', False)
1330 if trim_file_name:
5c22c63d 1331 no_ext, *ext = filename.rsplit('.', 2)
1332 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1333
0202b52a 1334 return filename
8222d8de 1335 except ValueError as err:
6febd1c1 1336 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1337 return None
1338
5127e92a 1339 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1340 """Generate the output filename"""
1341 if outtmpl:
1342 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1343 dir_type = None
1344 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1345 if not filename and dir_type not in ('', 'temp'):
1346 return ''
de6000d9 1347
c84aeac6 1348 if warn:
21cd8fae 1349 if not self.params.get('paths'):
de6000d9 1350 pass
1351 elif filename == '-':
c84aeac6 1352 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1353 elif os.path.isabs(filename):
c84aeac6 1354 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1355 if filename == '-' or not filename:
1356 return filename
1357
21cd8fae 1358 return self.get_output_path(dir_type, filename)
0202b52a 1359
120fe513 1360 def _match_entry(self, info_dict, incomplete=False, silent=False):
6368e2e6 1361 """Returns None if the file should be downloaded"""
d7b460d0 1362 _type = info_dict.get('_type', 'video')
1363 assert incomplete or _type == 'video', 'Only video result can be considered complete'
8222d8de 1364
3bec830a 1365 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
c77495e3 1366
8b0d7497 1367 def check_filter():
d7b460d0 1368 if _type in ('playlist', 'multi_video'):
1369 return
1370 elif _type in ('url', 'url_transparent') and not try_call(
1371 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1372 return
1373
8b0d7497 1374 if 'title' in info_dict:
1375 # This can happen when we're just evaluating the playlist
1376 title = info_dict['title']
1377 matchtitle = self.params.get('matchtitle', False)
1378 if matchtitle:
1379 if not re.search(matchtitle, title, re.IGNORECASE):
1380 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1381 rejecttitle = self.params.get('rejecttitle', False)
1382 if rejecttitle:
1383 if re.search(rejecttitle, title, re.IGNORECASE):
1384 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
6368e2e6 1385
8b0d7497 1386 date = info_dict.get('upload_date')
1387 if date is not None:
1388 dateRange = self.params.get('daterange', DateRange())
1389 if date not in dateRange:
86e5f3ed 1390 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1391 view_count = info_dict.get('view_count')
1392 if view_count is not None:
1393 min_views = self.params.get('min_views')
1394 if min_views is not None and view_count < min_views:
1395 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1396 max_views = self.params.get('max_views')
1397 if max_views is not None and view_count > max_views:
1398 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1399 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1400 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1401
8f18aca8 1402 match_filter = self.params.get('match_filter')
1403 if match_filter is not None:
1404 try:
1405 ret = match_filter(info_dict, incomplete=incomplete)
1406 except TypeError:
1407 # For backward compatibility
1408 ret = None if incomplete else match_filter(info_dict)
492272fe 1409 if ret is NO_DEFAULT:
1410 while True:
1411 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1412 reply = input(self._format_screen(
1413 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1414 if reply in {'y', ''}:
1415 return None
1416 elif reply == 'n':
1417 return f'Skipping {video_title}'
492272fe 1418 elif ret is not None:
8f18aca8 1419 return ret
8b0d7497 1420 return None
1421
c77495e3 1422 if self.in_download_archive(info_dict):
1423 reason = '%s has already been recorded in the archive' % video_title
1424 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1425 else:
1426 reason = check_filter()
1427 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1428 if reason is not None:
120fe513 1429 if not silent:
1430 self.to_screen('[download] ' + reason)
c77495e3 1431 if self.params.get(break_opt, False):
1432 raise break_err()
8b0d7497 1433 return reason
fe7e0c98 1434
b6c45014
JMF
1435 @staticmethod
1436 def add_extra_info(info_dict, extra_info):
1437 '''Set the keys from extra_info in info dict if they are missing'''
1438 for key, value in extra_info.items():
1439 info_dict.setdefault(key, value)
1440
409e1828 1441 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1442 process=True, force_generic_extractor=False):
41d1cca3 1443 """
17ffed18 1444 Extract and return the information dictionary of the URL
41d1cca3 1445
1446 Arguments:
17ffed18 1447 @param url URL to extract
41d1cca3 1448
1449 Keyword arguments:
17ffed18 1450 @param download Whether to download videos
1451 @param process Whether to resolve all unresolved references (URLs, playlist items).
1452 Must be True for download to work
1453 @param ie_key Use only the extractor with this key
1454
1455 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1456 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
41d1cca3 1457 """
fe7e0c98 1458
409e1828 1459 if extra_info is None:
1460 extra_info = {}
1461
61aa5ba3 1462 if not ie_key and force_generic_extractor:
d22dec74
S
1463 ie_key = 'Generic'
1464
8222d8de 1465 if ie_key:
fe7866d0 1466 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
8222d8de
JMF
1467 else:
1468 ies = self._ies
1469
fe7866d0 1470 for key, ie in ies.items():
8222d8de
JMF
1471 if not ie.suitable(url):
1472 continue
1473
1474 if not ie.working():
6febd1c1
PH
1475 self.report_warning('The program functionality for this site has been marked as broken, '
1476 'and will probably not work.')
8222d8de 1477
1151c407 1478 temp_id = ie.get_temp_id(url)
fe7866d0 1479 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1480 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
5e5be0c0 1481 if self.params.get('break_on_existing', False):
1482 raise ExistingVideoReached()
a0566bbf 1483 break
fe7866d0 1484 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
a0566bbf 1485 else:
fe7866d0 1486 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1487 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1488 tb=False if extractors_restricted else None)
a0566bbf 1489
7e88d7d7 1490 def _handle_extraction_exceptions(func):
b5ae35ee 1491 @functools.wraps(func)
a0566bbf 1492 def wrapper(self, *args, **kwargs):
6da22e7d 1493 while True:
1494 try:
1495 return func(self, *args, **kwargs)
1496 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1497 raise
6da22e7d 1498 except ReExtractInfo as e:
1499 if e.expected:
1500 self.to_screen(f'{e}; Re-extracting data')
1501 else:
1502 self.to_stderr('\r')
1503 self.report_warning(f'{e}; Re-extracting data')
1504 continue
1505 except GeoRestrictedError as e:
1506 msg = e.msg
1507 if e.countries:
1508 msg += '\nThis video is available in %s.' % ', '.join(
1509 map(ISO3166Utils.short2full, e.countries))
1510 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1511 self.report_error(msg)
1512 except ExtractorError as e: # An error we somewhat expected
1513 self.report_error(str(e), e.format_traceback())
1514 except Exception as e:
1515 if self.params.get('ignoreerrors'):
1516 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1517 else:
1518 raise
1519 break
a0566bbf 1520 return wrapper
1521
693f0600 1522 def _wait_for_video(self, ie_result={}):
f2ebc5c7 1523 if (not self.params.get('wait_for_video')
1524 or ie_result.get('_type', 'video') != 'video'
1525 or ie_result.get('formats') or ie_result.get('url')):
1526 return
1527
1528 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1529 last_msg = ''
1530
1531 def progress(msg):
1532 nonlocal last_msg
a7dc6a89 1533 full_msg = f'{msg}\n'
1534 if not self.params.get('noprogress'):
1535 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1536 elif last_msg:
1537 return
1538 self.to_screen(full_msg, skip_eol=True)
f2ebc5c7 1539 last_msg = msg
1540
1541 min_wait, max_wait = self.params.get('wait_for_video')
1542 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1543 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1544 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1545 self.report_warning('Release time of video is not known')
693f0600 1546 elif ie_result and (diff or 0) <= 0:
f2ebc5c7 1547 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1548 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1549 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1550
1551 wait_till = time.time() + diff
1552 try:
1553 while True:
1554 diff = wait_till - time.time()
1555 if diff <= 0:
1556 progress('')
1557 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1558 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1559 time.sleep(1)
1560 except KeyboardInterrupt:
1561 progress('')
1562 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1563 except BaseException as e:
1564 if not isinstance(e, ReExtractInfo):
1565 self.to_screen('')
1566 raise
1567
7e88d7d7 1568 @_handle_extraction_exceptions
58f197b7 1569 def __extract_info(self, url, ie, download, extra_info, process):
693f0600 1570 try:
1571 ie_result = ie.extract(url)
1572 except UserNotLive as e:
1573 if process:
1574 if self.params.get('wait_for_video'):
1575 self.report_warning(e)
1576 self._wait_for_video()
1577 raise
a0566bbf 1578 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
cb794ee0 1579 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
a0566bbf 1580 return
1581 if isinstance(ie_result, list):
1582 # Backwards compatibility: old IE result format
1583 ie_result = {
1584 '_type': 'compat_list',
1585 'entries': ie_result,
1586 }
e37d0efb 1587 if extra_info.get('original_url'):
1588 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1589 self.add_default_extra_info(ie_result, ie, url)
1590 if process:
f2ebc5c7 1591 self._wait_for_video(ie_result)
a0566bbf 1592 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1593 else:
a0566bbf 1594 return ie_result
fe7e0c98 1595
ea38e55f 1596 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1597 if url is not None:
1598 self.add_extra_info(ie_result, {
1599 'webpage_url': url,
1600 'original_url': url,
57ebfca3 1601 })
1602 webpage_url = ie_result.get('webpage_url')
1603 if webpage_url:
1604 self.add_extra_info(ie_result, {
1605 'webpage_url_basename': url_basename(webpage_url),
1606 'webpage_url_domain': get_domain(webpage_url),
6033d980 1607 })
1608 if ie is not None:
1609 self.add_extra_info(ie_result, {
1610 'extractor': ie.IE_NAME,
1611 'extractor_key': ie.ie_key(),
1612 })
ea38e55f 1613
58adec46 1614 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1615 """
1616 Take the result of the ie(may be modified) and resolve all unresolved
1617 references (URLs, playlist items).
1618
1619 It will also download the videos if 'download'.
1620 Returns the resolved ie_result.
1621 """
58adec46 1622 if extra_info is None:
1623 extra_info = {}
e8ee972c
PH
1624 result_type = ie_result.get('_type', 'video')
1625
057a5206 1626 if result_type in ('url', 'url_transparent'):
8f97a15d 1627 ie_result['url'] = sanitize_url(
1628 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
8791e78c 1629 if ie_result.get('original_url') and not extra_info.get('original_url'):
1630 extra_info = {'original_url': ie_result['original_url'], **extra_info}
e37d0efb 1631
057a5206 1632 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1633 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1634 or extract_flat is True):
ecb54191 1635 info_copy = ie_result.copy()
6033d980 1636 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1637 if ie and not ie_result.get('id'):
4614bc22 1638 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1639 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1640 self.add_extra_info(info_copy, extra_info)
b5475f11 1641 info_copy, _ = self.pre_process(info_copy)
94dc8604 1642 self._fill_common_fields(info_copy, False)
ecb54191 1643 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
415f8d51 1644 self._raise_pending_errors(info_copy)
4614bc22 1645 if self.params.get('force_write_download_archive', False):
1646 self.record_download_archive(info_copy)
e8ee972c
PH
1647 return ie_result
1648
8222d8de 1649 if result_type == 'video':
b6c45014 1650 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1651 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1652 self._raise_pending_errors(ie_result)
28b0eb0f 1653 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1654 if additional_urls:
e9f4ccd1 1655 # TODO: Improve MetadataParserPP to allow setting a list
14f25df2 1656 if isinstance(additional_urls, str):
9c2b75b5 1657 additional_urls = [additional_urls]
1658 self.to_screen(
1659 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1660 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1661 ie_result['additional_entries'] = [
1662 self.extract_info(
b69fd25c 1663 url, download, extra_info=extra_info,
9c2b75b5 1664 force_generic_extractor=self.params.get('force_generic_extractor'))
1665 for url in additional_urls
1666 ]
1667 return ie_result
8222d8de
JMF
1668 elif result_type == 'url':
1669 # We have to add extra_info to the results because it may be
1670 # contained in a playlist
07cce701 1671 return self.extract_info(
1672 ie_result['url'], download,
1673 ie_key=ie_result.get('ie_key'),
1674 extra_info=extra_info)
7fc3fa05
PH
1675 elif result_type == 'url_transparent':
1676 # Use the information from the embedding page
1677 info = self.extract_info(
1678 ie_result['url'], ie_key=ie_result.get('ie_key'),
1679 extra_info=extra_info, download=False, process=False)
1680
1640eb09
S
1681 # extract_info may return None when ignoreerrors is enabled and
1682 # extraction failed with an error, don't crash and return early
1683 # in this case
1684 if not info:
1685 return info
1686
3975b4d2 1687 exempted_fields = {'_type', 'url', 'ie_key'}
1688 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1689 # For video clips, the id etc of the clip extractor should be used
1690 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1691
412c617d 1692 new_result = info.copy()
3975b4d2 1693 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1694
0563f7ac
S
1695 # Extracted info may not be a video result (i.e.
1696 # info.get('_type', 'video') != video) but rather an url or
1697 # url_transparent. In such cases outer metadata (from ie_result)
1698 # should be propagated to inner one (info). For this to happen
1699 # _type of info should be overridden with url_transparent. This
067aa17e 1700 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1701 if new_result.get('_type') == 'url':
1702 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1703
1704 return self.process_ie_result(
1705 new_result, download=download, extra_info=extra_info)
40fcba5e 1706 elif result_type in ('playlist', 'multi_video'):
30a074c2 1707 # Protect from infinite recursion due to recursively nested playlists
1708 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
0bd5a039 1709 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1710 if webpage_url and webpage_url in self._playlist_urls:
7e85e872 1711 self.to_screen(
30a074c2 1712 '[download] Skipping already downloaded playlist: %s'
1713 % ie_result.get('title') or ie_result.get('id'))
1714 return
7e85e872 1715
30a074c2 1716 self._playlist_level += 1
1717 self._playlist_urls.add(webpage_url)
03f83004 1718 self._fill_common_fields(ie_result, False)
bc516a3f 1719 self._sanitize_thumbnails(ie_result)
30a074c2 1720 try:
1721 return self.__process_playlist(ie_result, download)
1722 finally:
1723 self._playlist_level -= 1
1724 if not self._playlist_level:
1725 self._playlist_urls.clear()
8222d8de 1726 elif result_type == 'compat_list':
c9bf4114
PH
1727 self.report_warning(
1728 'Extractor %s returned a compat_list result. '
1729 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1730
8222d8de 1731 def _fixup(r):
b868936c 1732 self.add_extra_info(r, {
1733 'extractor': ie_result['extractor'],
1734 'webpage_url': ie_result['webpage_url'],
1735 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1736 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1737 'extractor_key': ie_result['extractor_key'],
1738 })
8222d8de
JMF
1739 return r
1740 ie_result['entries'] = [
b6c45014 1741 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1742 for r in ie_result['entries']
1743 ]
1744 return ie_result
1745 else:
1746 raise Exception('Invalid result type: %s' % result_type)
1747
e92caff5 1748 def _ensure_dir_exists(self, path):
1749 return make_dir(path, self.report_error)
1750
3b603dbd 1751 @staticmethod
3bec830a 1752 def _playlist_infodict(ie_result, strict=False, **kwargs):
1753 info = {
1754 'playlist_count': ie_result.get('playlist_count'),
3b603dbd 1755 'playlist': ie_result.get('title') or ie_result.get('id'),
1756 'playlist_id': ie_result.get('id'),
1757 'playlist_title': ie_result.get('title'),
1758 'playlist_uploader': ie_result.get('uploader'),
1759 'playlist_uploader_id': ie_result.get('uploader_id'),
3b603dbd 1760 **kwargs,
1761 }
3bec830a 1762 if strict:
1763 return info
0bd5a039 1764 if ie_result.get('webpage_url'):
1765 info.update({
1766 'webpage_url': ie_result['webpage_url'],
1767 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1768 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1769 })
3bec830a 1770 return {
1771 **info,
1772 'playlist_index': 0,
1773 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1774 'extractor': ie_result['extractor'],
3bec830a 1775 'extractor_key': ie_result['extractor_key'],
1776 }
3b603dbd 1777
30a074c2 1778 def __process_playlist(self, ie_result, download):
7e88d7d7 1779 """Process each entry in the playlist"""
f5ea4748 1780 assert ie_result['_type'] in ('playlist', 'multi_video')
1781
3bec830a 1782 common_info = self._playlist_infodict(ie_result, strict=True)
3955b207 1783 title = common_info.get('playlist') or '<Untitled>'
3bec830a 1784 if self._match_entry(common_info, incomplete=True) is not None:
1785 return
c6e07cf1 1786 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
f0d785d3 1787
7e88d7d7 1788 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1789 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1790
1791 lazy = self.params.get('lazy_playlist')
1792 if lazy:
1793 resolved_entries, n_entries = [], 'N/A'
1794 ie_result['requested_entries'], ie_result['entries'] = None, None
1795 else:
1796 entries = resolved_entries = list(entries)
1797 n_entries = len(resolved_entries)
1798 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1799 if not ie_result.get('playlist_count'):
1800 # Better to do this after potentially exhausting entries
1801 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1802
0647d925 1803 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1804 ie_copy = collections.ChainMap(ie_result, extra)
3bec830a 1805
e08a85d8 1806 _infojson_written = False
0bfc53d0 1807 write_playlist_files = self.params.get('allow_playlist_files', True)
1808 if write_playlist_files and self.params.get('list_thumbnails'):
1809 self.list_thumbnails(ie_result)
1810 if write_playlist_files and not self.params.get('simulate'):
e08a85d8 1811 _infojson_written = self._write_info_json(
1812 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1813 if _infojson_written is None:
80c03fa9 1814 return
1815 if self._write_description('playlist', ie_result,
1816 self.prepare_filename(ie_copy, 'pl_description')) is None:
1817 return
681de68e 1818 # TODO: This should be passed to ThumbnailsConvertor if necessary
3bec830a 1819 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1820
7e9a6125 1821 if lazy:
1822 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1823 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1824 elif self.params.get('playlistreverse'):
1825 entries.reverse()
1826 elif self.params.get('playlistrandom'):
30a074c2 1827 random.shuffle(entries)
1828
bc5c2f8a 1829 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
7e88d7d7 1830 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1831
134c913c 1832 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1833 if self.params.get('extract_flat') == 'discard_in_playlist':
1834 keep_resolved_entries = ie_result['_type'] != 'playlist'
1835 if keep_resolved_entries:
1836 self.write_debug('The information of all playlist entries will be held in memory')
1837
26e2805c 1838 failures = 0
1839 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 1840 for i, (playlist_index, entry) in enumerate(entries):
1841 if lazy:
1842 resolved_entries.append((playlist_index, entry))
3bec830a 1843 if not entry:
7e88d7d7 1844 continue
1845
7e88d7d7 1846 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
7e9a6125 1847 if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1848 playlist_index = ie_result['requested_entries'][i]
1849
0647d925 1850 entry_copy = collections.ChainMap(entry, {
3bec830a 1851 **common_info,
3955b207 1852 'n_entries': int_or_none(n_entries),
71729754 1853 'playlist_index': playlist_index,
7e9a6125 1854 'playlist_autonumber': i + 1,
0647d925 1855 })
3bec830a 1856
0647d925 1857 if self._match_entry(entry_copy, incomplete=True) is not None:
f0ad6f8c 1858 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1859 resolved_entries[i] = (playlist_index, NO_DEFAULT)
3bec830a 1860 continue
1861
bc5c2f8a 1862 self.to_screen('[download] Downloading item %s of %s' % (
3bec830a 1863 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1864
ec54bd43 1865 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
a6ca61d4 1866 'playlist_index': playlist_index,
1867 'playlist_autonumber': i + 1,
ec54bd43 1868 }, extra))
26e2805c 1869 if not entry_result:
1870 failures += 1
1871 if failures >= max_failures:
1872 self.report_error(
7e88d7d7 1873 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 1874 break
134c913c 1875 if keep_resolved_entries:
1876 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 1877
1878 # Update with processed data
f0ad6f8c 1879 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
bc5c2f8a 1880 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1881 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1882 # Do not set for full playlist
1883 ie_result.pop('requested_entries')
e08a85d8 1884
1885 # Write the updated info to json
cb96c5be 1886 if _infojson_written is True and self._write_info_json(
e08a85d8 1887 'updated playlist', ie_result,
1888 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1889 return
ca30f449 1890
ed5835b4 1891 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 1892 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 1893 return ie_result
1894
7e88d7d7 1895 @_handle_extraction_exceptions
a0566bbf 1896 def __process_iterable_entry(self, entry, download, extra_info):
1897 return self.process_ie_result(
1898 entry, download=download, extra_info=extra_info)
1899
67134eab
JMF
1900 def _build_format_filter(self, filter_spec):
1901 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1902
1903 OPERATORS = {
1904 '<': operator.lt,
1905 '<=': operator.le,
1906 '>': operator.gt,
1907 '>=': operator.ge,
1908 '=': operator.eq,
1909 '!=': operator.ne,
1910 }
67134eab 1911 operator_rex = re.compile(r'''(?x)\s*
187986a8 1912 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1913 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1914 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1915 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1916 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1917 if m:
1918 try:
1919 comparison_value = int(m.group('value'))
1920 except ValueError:
1921 comparison_value = parse_filesize(m.group('value'))
1922 if comparison_value is None:
1923 comparison_value = parse_filesize(m.group('value') + 'B')
1924 if comparison_value is None:
1925 raise ValueError(
1926 'Invalid value %r in format specification %r' % (
67134eab 1927 m.group('value'), filter_spec))
9ddb6925
S
1928 op = OPERATORS[m.group('op')]
1929
083c9df9 1930 if not m:
9ddb6925
S
1931 STR_OPERATORS = {
1932 '=': operator.eq,
10d33b34
YCH
1933 '^=': lambda attr, value: attr.startswith(value),
1934 '$=': lambda attr, value: attr.endswith(value),
1935 '*=': lambda attr, value: value in attr,
1ce9a3cb 1936 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1937 }
187986a8 1938 str_operator_rex = re.compile(r'''(?x)\s*
1939 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1940 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1941 (?P<quote>["'])?
1942 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1943 (?(quote)(?P=quote))\s*
9ddb6925 1944 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1945 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1946 if m:
1ce9a3cb
LF
1947 if m.group('op') == '~=':
1948 comparison_value = re.compile(m.group('value'))
1949 else:
1950 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1951 str_op = STR_OPERATORS[m.group('op')]
1952 if m.group('negation'):
e118a879 1953 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1954 else:
1955 op = str_op
083c9df9 1956
9ddb6925 1957 if not m:
187986a8 1958 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1959
1960 def _filter(f):
1961 actual_value = f.get(m.group('key'))
1962 if actual_value is None:
1963 return m.group('none_inclusive')
1964 return op(actual_value, comparison_value)
67134eab
JMF
1965 return _filter
1966
9f1a1c36 1967 def _check_formats(self, formats):
1968 for f in formats:
1969 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1970 path = self.get_output_path('temp')
1971 if not self._ensure_dir_exists(f'{path}/'):
1972 continue
1973 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1974 temp_file.close()
1975 try:
1976 success, _ = self.dl(temp_file.name, f, test=True)
8a82af35 1977 except (DownloadError, OSError, ValueError) + network_exceptions:
9f1a1c36 1978 success = False
1979 finally:
1980 if os.path.exists(temp_file.name):
1981 try:
1982 os.remove(temp_file.name)
1983 except OSError:
1984 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1985 if success:
1986 yield f
1987 else:
1988 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1989
0017d9ad 1990 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1991
af0f7428
S
1992 def can_merge():
1993 merger = FFmpegMergerPP(self)
1994 return merger.available and merger.can_merge()
1995
91ebc640 1996 prefer_best = (
b7b04c78 1997 not self.params.get('simulate')
91ebc640 1998 and download
1999 and (
2000 not can_merge()
21633673 2001 or info_dict.get('is_live') and not self.params.get('live_from_start')
bf1824b3 2002 or self.params['outtmpl']['default'] == '-'))
53ed7066 2003 compat = (
2004 prefer_best
2005 or self.params.get('allow_multiple_audio_streams', False)
8a82af35 2006 or 'format-spec' in self.params['compat_opts'])
91ebc640 2007
2008 return (
53ed7066 2009 'best/bestvideo+bestaudio' if prefer_best
2010 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 2011 else 'bestvideo+bestaudio/best')
0017d9ad 2012
67134eab
JMF
2013 def build_format_selector(self, format_spec):
2014 def syntax_error(note, start):
2015 message = (
2016 'Invalid format specification: '
86e5f3ed 2017 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
2018 return SyntaxError(message)
2019
2020 PICKFIRST = 'PICKFIRST'
2021 MERGE = 'MERGE'
2022 SINGLE = 'SINGLE'
0130afb7 2023 GROUP = 'GROUP'
67134eab
JMF
2024 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2025
91ebc640 2026 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2027 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 2028
9f1a1c36 2029 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 2030
67134eab
JMF
2031 def _parse_filter(tokens):
2032 filter_parts = []
2033 for type, string, start, _, _ in tokens:
2034 if type == tokenize.OP and string == ']':
2035 return ''.join(filter_parts)
2036 else:
2037 filter_parts.append(string)
2038
232541df 2039 def _remove_unused_ops(tokens):
62b58c09
L
2040 # Remove operators that we don't use and join them with the surrounding strings.
2041 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
232541df
JMF
2042 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2043 last_string, last_start, last_end, last_line = None, None, None, None
2044 for type, string, start, end, line in tokens:
2045 if type == tokenize.OP and string == '[':
2046 if last_string:
2047 yield tokenize.NAME, last_string, last_start, last_end, last_line
2048 last_string = None
2049 yield type, string, start, end, line
2050 # everything inside brackets will be handled by _parse_filter
2051 for type, string, start, end, line in tokens:
2052 yield type, string, start, end, line
2053 if type == tokenize.OP and string == ']':
2054 break
2055 elif type == tokenize.OP and string in ALLOWED_OPS:
2056 if last_string:
2057 yield tokenize.NAME, last_string, last_start, last_end, last_line
2058 last_string = None
2059 yield type, string, start, end, line
2060 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2061 if not last_string:
2062 last_string = string
2063 last_start = start
2064 last_end = end
2065 else:
2066 last_string += string
2067 if last_string:
2068 yield tokenize.NAME, last_string, last_start, last_end, last_line
2069
cf2ac6df 2070 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2071 selectors = []
2072 current_selector = None
2073 for type, string, start, _, _ in tokens:
2074 # ENCODING is only defined in python 3.x
2075 if type == getattr(tokenize, 'ENCODING', None):
2076 continue
2077 elif type in [tokenize.NAME, tokenize.NUMBER]:
2078 current_selector = FormatSelector(SINGLE, string, [])
2079 elif type == tokenize.OP:
cf2ac6df
JMF
2080 if string == ')':
2081 if not inside_group:
2082 # ')' will be handled by the parentheses group
2083 tokens.restore_last_token()
67134eab 2084 break
cf2ac6df 2085 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2086 tokens.restore_last_token()
2087 break
cf2ac6df
JMF
2088 elif inside_choice and string == ',':
2089 tokens.restore_last_token()
2090 break
2091 elif string == ',':
0a31a350
JMF
2092 if not current_selector:
2093 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2094 selectors.append(current_selector)
2095 current_selector = None
2096 elif string == '/':
d96d604e
JMF
2097 if not current_selector:
2098 raise syntax_error('"/" must follow a format selector', start)
67134eab 2099 first_choice = current_selector
cf2ac6df 2100 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2101 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2102 elif string == '[':
2103 if not current_selector:
2104 current_selector = FormatSelector(SINGLE, 'best', [])
2105 format_filter = _parse_filter(tokens)
2106 current_selector.filters.append(format_filter)
0130afb7
JMF
2107 elif string == '(':
2108 if current_selector:
2109 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2110 group = _parse_format_selection(tokens, inside_group=True)
2111 current_selector = FormatSelector(GROUP, group, [])
67134eab 2112 elif string == '+':
d03cfdce 2113 if not current_selector:
2114 raise syntax_error('Unexpected "+"', start)
2115 selector_1 = current_selector
2116 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2117 if not selector_2:
2118 raise syntax_error('Expected a selector', start)
2119 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2120 else:
86e5f3ed 2121 raise syntax_error(f'Operator not recognized: "{string}"', start)
67134eab
JMF
2122 elif type == tokenize.ENDMARKER:
2123 break
2124 if current_selector:
2125 selectors.append(current_selector)
2126 return selectors
2127
f8d4ad9a 2128 def _merge(formats_pair):
2129 format_1, format_2 = formats_pair
2130
2131 formats_info = []
2132 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2133 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2134
2135 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2136 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2137 for (i, fmt_info) in enumerate(formats_info):
551f9388 2138 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2139 formats_info.pop(i)
2140 continue
2141 for aud_vid in ['audio', 'video']:
f8d4ad9a 2142 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2143 if get_no_more[aud_vid]:
2144 formats_info.pop(i)
f5510afe 2145 break
f8d4ad9a 2146 get_no_more[aud_vid] = True
2147
2148 if len(formats_info) == 1:
2149 return formats_info[0]
2150
2151 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2152 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2153
2154 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2155 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2156
fc61aff4
LL
2157 output_ext = get_compatible_ext(
2158 vcodecs=[f.get('vcodec') for f in video_fmts],
2159 acodecs=[f.get('acodec') for f in audio_fmts],
2160 vexts=[f['ext'] for f in video_fmts],
2161 aexts=[f['ext'] for f in audio_fmts],
2162 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2163 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
f8d4ad9a 2164
975a0d0d 2165 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2166
f8d4ad9a 2167 new_dict = {
2168 'requested_formats': formats_info,
975a0d0d 2169 'format': '+'.join(filtered('format')),
2170 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2171 'ext': output_ext,
975a0d0d 2172 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2173 'language': '+'.join(orderedSet(filtered('language'))) or None,
2174 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2175 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2176 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2177 }
2178
2179 if the_only_video:
2180 new_dict.update({
2181 'width': the_only_video.get('width'),
2182 'height': the_only_video.get('height'),
2183 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2184 'fps': the_only_video.get('fps'),
49a57e70 2185 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2186 'vcodec': the_only_video.get('vcodec'),
2187 'vbr': the_only_video.get('vbr'),
2188 'stretched_ratio': the_only_video.get('stretched_ratio'),
105bfd90 2189 'aspect_ratio': the_only_video.get('aspect_ratio'),
f8d4ad9a 2190 })
2191
2192 if the_only_audio:
2193 new_dict.update({
2194 'acodec': the_only_audio.get('acodec'),
2195 'abr': the_only_audio.get('abr'),
975a0d0d 2196 'asr': the_only_audio.get('asr'),
b8ed0f15 2197 'audio_channels': the_only_audio.get('audio_channels')
f8d4ad9a 2198 })
2199
2200 return new_dict
2201
e8e73840 2202 def _check_formats(formats):
981052c9 2203 if not check_formats:
2204 yield from formats
b5ac45b1 2205 return
9f1a1c36 2206 yield from self._check_formats(formats)
e8e73840 2207
67134eab 2208 def _build_selector_function(selector):
909d24dd 2209 if isinstance(selector, list): # ,
67134eab
JMF
2210 fs = [_build_selector_function(s) for s in selector]
2211
317f7ab6 2212 def selector_function(ctx):
67134eab 2213 for f in fs:
981052c9 2214 yield from f(ctx)
67134eab 2215 return selector_function
909d24dd 2216
2217 elif selector.type == GROUP: # ()
0130afb7 2218 selector_function = _build_selector_function(selector.selector)
909d24dd 2219
2220 elif selector.type == PICKFIRST: # /
67134eab
JMF
2221 fs = [_build_selector_function(s) for s in selector.selector]
2222
317f7ab6 2223 def selector_function(ctx):
67134eab 2224 for f in fs:
317f7ab6 2225 picked_formats = list(f(ctx))
67134eab
JMF
2226 if picked_formats:
2227 return picked_formats
2228 return []
67134eab 2229
981052c9 2230 elif selector.type == MERGE: # +
2231 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2232
2233 def selector_function(ctx):
adbc4ec4 2234 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2235 yield _merge(pair)
2236
909d24dd 2237 elif selector.type == SINGLE: # atom
598d185d 2238 format_spec = selector.selector or 'best'
909d24dd 2239
f8d4ad9a 2240 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2241 if format_spec == 'all':
2242 def selector_function(ctx):
9222c381 2243 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2244 elif format_spec == 'mergeall':
2245 def selector_function(ctx):
316f2650 2246 formats = list(_check_formats(
2247 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2248 if not formats:
2249 return
921b76ca 2250 merged_format = formats[-1]
2251 for f in formats[-2::-1]:
f8d4ad9a 2252 merged_format = _merge((merged_format, f))
2253 yield merged_format
909d24dd 2254
2255 else:
85e801a9 2256 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2257 mobj = re.match(
2258 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2259 format_spec)
2260 if mobj is not None:
2261 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2262 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2263 format_type = (mobj.group('type') or [None])[0]
2264 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2265 format_modified = mobj.group('mod') is not None
909d24dd 2266
2267 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2268 _filter_f = (
eff63539 2269 (lambda f: f.get('%scodec' % format_type) != 'none')
2270 if format_type and format_modified # bv*, ba*, wv*, wa*
2271 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2272 if format_type # bv, ba, wv, wa
2273 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2274 if not format_modified # b, w
8326b00a 2275 else lambda f: True) # b*, w*
2276 filter_f = lambda f: _filter_f(f) and (
2277 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2278 else:
48ee10ee 2279 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2280 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2281 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2282 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2283 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2284 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2285 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2286 else:
b5ae35ee 2287 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2288
2289 def selector_function(ctx):
2290 formats = list(ctx['formats'])
909d24dd 2291 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2292 if not matches:
2293 if format_fallback and ctx['incomplete_formats']:
2294 # for extractors with incomplete formats (audio only (soundcloud)
2295 # or video only (imgur)) best/worst will fallback to
2296 # best/worst {video,audio}-only format
2297 matches = formats
2298 elif seperate_fallback and not ctx['has_merged_format']:
2299 # for compatibility with youtube-dl when there is no pre-merged format
2300 matches = list(filter(seperate_fallback, formats))
981052c9 2301 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2302 try:
e8e73840 2303 yield matches[format_idx - 1]
4abea8ca 2304 except LazyList.IndexError:
981052c9 2305 return
083c9df9 2306
67134eab 2307 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2308
317f7ab6 2309 def final_selector(ctx):
adbc4ec4 2310 ctx_copy = dict(ctx)
67134eab 2311 for _filter in filters:
317f7ab6
S
2312 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2313 return selector_function(ctx_copy)
67134eab 2314 return final_selector
083c9df9 2315
0f06bcd7 2316 stream = io.BytesIO(format_spec.encode())
0130afb7 2317 try:
f9934b96 2318 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2319 except tokenize.TokenError:
2320 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2321
86e5f3ed 2322 class TokenIterator:
0130afb7
JMF
2323 def __init__(self, tokens):
2324 self.tokens = tokens
2325 self.counter = 0
2326
2327 def __iter__(self):
2328 return self
2329
2330 def __next__(self):
2331 if self.counter >= len(self.tokens):
2332 raise StopIteration()
2333 value = self.tokens[self.counter]
2334 self.counter += 1
2335 return value
2336
2337 next = __next__
2338
2339 def restore_last_token(self):
2340 self.counter -= 1
2341
2342 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2343 return _build_selector_function(parsed_selector)
a9c58ad9 2344
e5660ee6 2345 def _calc_headers(self, info_dict):
8b7539d2 2346 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6 2347
c487cf00 2348 cookies = self._calc_cookies(info_dict['url'])
e5660ee6
JMF
2349 if cookies:
2350 res['Cookie'] = cookies
2351
0016b84e
S
2352 if 'X-Forwarded-For' not in res:
2353 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2354 if x_forwarded_for_ip:
2355 res['X-Forwarded-For'] = x_forwarded_for_ip
2356
e5660ee6
JMF
2357 return res
2358
c487cf00 2359 def _calc_cookies(self, url):
2360 pr = sanitized_Request(url)
e5660ee6 2361 self.cookiejar.add_cookie_header(pr)
662435f7 2362 return pr.get_header('Cookie')
e5660ee6 2363
9f1a1c36 2364 def _sort_thumbnails(self, thumbnails):
2365 thumbnails.sort(key=lambda t: (
2366 t.get('preference') if t.get('preference') is not None else -1,
2367 t.get('width') if t.get('width') is not None else -1,
2368 t.get('height') if t.get('height') is not None else -1,
2369 t.get('id') if t.get('id') is not None else '',
2370 t.get('url')))
2371
b0249bca 2372 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2373 thumbnails = info_dict.get('thumbnails')
2374 if thumbnails is None:
2375 thumbnail = info_dict.get('thumbnail')
2376 if thumbnail:
2377 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2378 if not thumbnails:
2379 return
2380
2381 def check_thumbnails(thumbnails):
2382 for t in thumbnails:
2383 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2384 try:
2385 self.urlopen(HEADRequest(t['url']))
2386 except network_exceptions as err:
2387 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2388 continue
2389 yield t
2390
2391 self._sort_thumbnails(thumbnails)
2392 for i, t in enumerate(thumbnails):
2393 if t.get('id') is None:
2394 t['id'] = '%d' % i
2395 if t.get('width') and t.get('height'):
2396 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2397 t['url'] = sanitize_url(t['url'])
2398
2399 if self.params.get('check_formats') is True:
282f5709 2400 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2401 else:
2402 info_dict['thumbnails'] = thumbnails
bc516a3f 2403
94dc8604 2404 def _fill_common_fields(self, info_dict, final=True):
03f83004 2405 # TODO: move sanitization here
94dc8604 2406 if final:
d4736fdb 2407 title = info_dict.get('title', NO_DEFAULT)
2408 if title is NO_DEFAULT:
03f83004
LNO
2409 raise ExtractorError('Missing "title" field in extractor result',
2410 video_id=info_dict['id'], ie=info_dict['extractor'])
d4736fdb 2411 info_dict['fulltitle'] = title
2412 if not title:
2413 if title == '':
2414 self.write_debug('Extractor gave empty title. Creating a generic title')
2415 else:
2416 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2417 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2418
2419 if info_dict.get('duration') is not None:
2420 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2421
2422 for ts_key, date_key in (
2423 ('timestamp', 'upload_date'),
2424 ('release_timestamp', 'release_date'),
2425 ('modified_timestamp', 'modified_date'),
2426 ):
2427 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2428 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2429 # see http://bugs.python.org/issue1646728)
19a03940 2430 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2431 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2432 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2433
2434 live_keys = ('is_live', 'was_live')
2435 live_status = info_dict.get('live_status')
2436 if live_status is None:
2437 for key in live_keys:
2438 if info_dict.get(key) is False:
2439 continue
2440 if info_dict.get(key):
2441 live_status = key
2442 break
2443 if all(info_dict.get(key) is False for key in live_keys):
2444 live_status = 'not_live'
2445 if live_status:
2446 info_dict['live_status'] = live_status
2447 for key in live_keys:
2448 if info_dict.get(key) is None:
2449 info_dict[key] = (live_status == key)
a057779d 2450 if live_status == 'post_live':
2451 info_dict['was_live'] = True
03f83004
LNO
2452
2453 # Auto generate title fields corresponding to the *_number fields when missing
2454 # in order to always have clean titles. This is very common for TV series.
2455 for field in ('chapter', 'season', 'episode'):
94dc8604 2456 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
03f83004
LNO
2457 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2458
415f8d51 2459 def _raise_pending_errors(self, info):
2460 err = info.pop('__pending_error', None)
2461 if err:
2462 self.report_error(err, tb=False)
2463
784320c9 2464 def sort_formats(self, info_dict):
2465 formats = self._get_formats(info_dict)
2466 if not formats:
2467 return
2468 # Backward compatibility with InfoExtractor._sort_formats
2469 field_preference = formats[0].pop('__sort_fields', None)
2470 if field_preference:
2471 info_dict['_format_sort_fields'] = field_preference
2472
2473 formats.sort(key=FormatSorter(
2474 self, info_dict.get('_format_sort_fields', [])).calculate_preference)
2475
dd82ffea
JMF
2476 def process_video_result(self, info_dict, download=True):
2477 assert info_dict.get('_type', 'video') == 'video'
9c906919 2478 self._num_videos += 1
dd82ffea 2479
bec1fad2 2480 if 'id' not in info_dict:
fc08bdd6 2481 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2482 elif not info_dict.get('id'):
2483 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2484
c9969434
S
2485 def report_force_conversion(field, field_not, conversion):
2486 self.report_warning(
2487 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2488 % (field, field_not, conversion))
2489
2490 def sanitize_string_field(info, string_field):
2491 field = info.get(string_field)
14f25df2 2492 if field is None or isinstance(field, str):
c9969434
S
2493 return
2494 report_force_conversion(string_field, 'a string', 'string')
14f25df2 2495 info[string_field] = str(field)
c9969434
S
2496
2497 def sanitize_numeric_fields(info):
2498 for numeric_field in self._NUMERIC_FIELDS:
2499 field = info.get(numeric_field)
f9934b96 2500 if field is None or isinstance(field, (int, float)):
c9969434
S
2501 continue
2502 report_force_conversion(numeric_field, 'numeric', 'int')
2503 info[numeric_field] = int_or_none(field)
2504
2505 sanitize_string_field(info_dict, 'id')
2506 sanitize_numeric_fields(info_dict)
3975b4d2 2507 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2508 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2509 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2510 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2511
9eef7c4e 2512 chapters = info_dict.get('chapters') or []
a3976e07 2513 if chapters and chapters[0].get('start_time'):
2514 chapters.insert(0, {'start_time': 0})
2515
9eef7c4e 2516 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
a3976e07 2517 for idx, (prev, current, next_) in enumerate(zip(
2518 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
9eef7c4e 2519 if current.get('start_time') is None:
2520 current['start_time'] = prev.get('end_time')
2521 if not current.get('end_time'):
2522 current['end_time'] = next_.get('start_time')
a3976e07 2523 if not current.get('title'):
2524 current['title'] = f'<Untitled Chapter {idx}>'
9eef7c4e 2525
dd82ffea
JMF
2526 if 'playlist' not in info_dict:
2527 # It isn't part of a playlist
2528 info_dict['playlist'] = None
2529 info_dict['playlist_index'] = None
2530
bc516a3f 2531 self._sanitize_thumbnails(info_dict)
d5519808 2532
536a55da 2533 thumbnail = info_dict.get('thumbnail')
bc516a3f 2534 thumbnails = info_dict.get('thumbnails')
536a55da
S
2535 if thumbnail:
2536 info_dict['thumbnail'] = sanitize_url(thumbnail)
2537 elif thumbnails:
d5519808
PH
2538 info_dict['thumbnail'] = thumbnails[-1]['url']
2539
ae30b840 2540 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2541 info_dict['display_id'] = info_dict['id']
2542
03f83004 2543 self._fill_common_fields(info_dict)
33d2fc2f 2544
05108a49
S
2545 for cc_kind in ('subtitles', 'automatic_captions'):
2546 cc = info_dict.get(cc_kind)
2547 if cc:
2548 for _, subtitle in cc.items():
2549 for subtitle_format in subtitle:
2550 if subtitle_format.get('url'):
2551 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2552 if subtitle_format.get('ext') is None:
2553 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2554
2555 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2556 subtitles = info_dict.get('subtitles')
4bba3716 2557
360e1ca5 2558 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2559 info_dict['id'], subtitles, automatic_captions)
a504ced0 2560
784320c9 2561 self.sort_formats(info_dict)
aebb4f4b 2562 formats = self._get_formats(info_dict)
dd82ffea 2563
0a5a191a 2564 # or None ensures --clean-infojson removes it
2565 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2566 if not self.params.get('allow_unplayable_formats'):
2567 formats = [f for f in formats if not f.get('has_drm')]
17ffed18 2568
2569 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2570 self.report_warning(
2571 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2572 'only images are available for download. Use --list-formats to see them'.capitalize())
88acdbc2 2573
319b6059 2574 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2575 if not get_from_start:
2576 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2577 if info_dict.get('is_live') and formats:
adbc4ec4 2578 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2579 if get_from_start and not formats:
a44ca5a4 2580 self.raise_no_formats(info_dict, msg=(
2581 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2582 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2583
73af5cc8
S
2584 def is_wellformed(f):
2585 url = f.get('url')
a5ac0c47 2586 if not url:
73af5cc8
S
2587 self.report_warning(
2588 '"url" field is missing or empty - skipping format, '
2589 'there is an error in extractor')
a5ac0c47
S
2590 return False
2591 if isinstance(url, bytes):
2592 sanitize_string_field(f, 'url')
2593 return True
73af5cc8
S
2594
2595 # Filter out malformed formats for better extraction robustness
1ac7f461 2596 formats = list(filter(is_wellformed, formats or []))
2597
2598 if not formats:
2599 self.raise_no_formats(info_dict)
73af5cc8 2600
181c7053
S
2601 formats_dict = {}
2602
dd82ffea 2603 # We check that all the formats have the format and format_id fields
db95dc13 2604 for i, format in enumerate(formats):
c9969434
S
2605 sanitize_string_field(format, 'format_id')
2606 sanitize_numeric_fields(format)
dcf77cf1 2607 format['url'] = sanitize_url(format['url'])
e74e3b63 2608 if not format.get('format_id'):
14f25df2 2609 format['format_id'] = str(i)
e2effb08
S
2610 else:
2611 # Sanitize format_id from characters used in format selector expression
ec85ded8 2612 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2613 format_id = format['format_id']
2614 if format_id not in formats_dict:
2615 formats_dict[format_id] = []
2616 formats_dict[format_id].append(format)
2617
2618 # Make sure all formats have unique format_id
03b4de72 2619 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2620 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2621 ambigious_id = len(ambiguous_formats) > 1
2622 for i, format in enumerate(ambiguous_formats):
2623 if ambigious_id:
181c7053 2624 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2625 if format.get('ext') is None:
2626 format['ext'] = determine_ext(format['url']).lower()
2627 # Ensure there is no conflict between id and ext in format selection
2628 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2629 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2630 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2631
2632 for i, format in enumerate(formats):
8c51aa65 2633 if format.get('format') is None:
6febd1c1 2634 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2635 id=format['format_id'],
2636 res=self.format_resolution(format),
b868936c 2637 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2638 )
6f0be937 2639 if format.get('protocol') is None:
b5559424 2640 format['protocol'] = determine_protocol(format)
239df021 2641 if format.get('resolution') is None:
2642 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2643 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2644 format['dynamic_range'] = 'SDR'
105bfd90 2645 if format.get('aspect_ratio') is None:
2646 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
f2fe69c7 2647 if (info_dict.get('duration') and format.get('tbr')
2648 and not format.get('filesize') and not format.get('filesize_approx')):
56ba69e4 2649 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
f2fe69c7 2650
e5660ee6
JMF
2651 # Add HTTP headers, so that external programs can use them from the
2652 # json output
2653 full_format_info = info_dict.copy()
2654 full_format_info.update(format)
2655 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2656 # Remove private housekeeping stuff
2657 if '__x_forwarded_for_ip' in info_dict:
2658 del info_dict['__x_forwarded_for_ip']
dd82ffea 2659
9f1a1c36 2660 if self.params.get('check_formats') is True:
282f5709 2661 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2662
88acdbc2 2663 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2664 # only set the 'formats' fields if the original info_dict list them
2665 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2666 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2667 # which can't be exported to json
b3d9ef88 2668 info_dict['formats'] = formats
4ec82a72 2669
2670 info_dict, _ = self.pre_process(info_dict)
2671
6db9c4d5 2672 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2673 return info_dict
2674
2675 self.post_extract(info_dict)
2676 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2677
093a1710 2678 # The pre-processors may have modified the formats
aebb4f4b 2679 formats = self._get_formats(info_dict)
093a1710 2680
e4221b70 2681 list_only = self.params.get('simulate') == 'list_only'
fa9f30b8 2682 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2683 if self.params.get('list_thumbnails'):
2684 self.list_thumbnails(info_dict)
b7b04c78 2685 if self.params.get('listsubtitles'):
2686 if 'automatic_captions' in info_dict:
2687 self.list_subtitles(
2688 info_dict['id'], automatic_captions, 'automatic captions')
2689 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2690 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2691 self.list_formats(info_dict)
169dbde9 2692 if list_only:
b7b04c78 2693 # Without this printing, -F --print-json will not work
169dbde9 2694 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
c487cf00 2695 return info_dict
bfaae0a7 2696
187986a8 2697 format_selector = self.format_selector
2698 if format_selector is None:
0017d9ad 2699 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2700 self.write_debug('Default format spec: %s' % req_format)
187986a8 2701 format_selector = self.build_format_selector(req_format)
317f7ab6 2702
fa9f30b8 2703 while True:
2704 if interactive_format_selection:
2705 req_format = input(
2706 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2707 try:
2708 format_selector = self.build_format_selector(req_format)
2709 except SyntaxError as err:
2710 self.report_error(err, tb=False, is_error=False)
2711 continue
2712
85e801a9 2713 formats_to_download = list(format_selector({
fa9f30b8 2714 'formats': formats,
85e801a9 2715 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2716 'incomplete_formats': (
2717 # All formats are video-only or
2718 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2719 # all formats are audio-only
2720 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2721 }))
fa9f30b8 2722 if interactive_format_selection and not formats_to_download:
2723 self.report_error('Requested format is not available', tb=False, is_error=False)
2724 continue
2725 break
317f7ab6 2726
dd82ffea 2727 if not formats_to_download:
b7da73eb 2728 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2729 raise ExtractorError(
2730 'Requested format is not available. Use --list-formats for a list of available formats',
2731 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2732 self.report_warning('Requested format is not available')
2733 # Process what we can, even without any available formats.
2734 formats_to_download = [{}]
a13e6848 2735
0500ee3d 2736 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
5ec1b6b7 2737 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2738 if download:
0500ee3d 2739 if best_format and requested_ranges:
5ec1b6b7 2740 def to_screen(*msg):
2741 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2742
2743 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2744 (f['format_id'] for f in formats_to_download))
0500ee3d 2745 if requested_ranges != ({}, ):
5ec1b6b7 2746 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
fc2ba496 2747 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
a13e6848 2748 max_downloads_reached = False
5ec1b6b7 2749
0500ee3d 2750 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
5ec1b6b7 2751 new_info = self._copy_infodict(info_dict)
b7da73eb 2752 new_info.update(fmt)
3975b4d2 2753 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
fc2ba496 2754 end_time = offset + min(chapter.get('end_time', duration), duration)
3975b4d2 2755 if chapter or offset:
5ec1b6b7 2756 new_info.update({
3975b4d2 2757 'section_start': offset + chapter.get('start_time', 0),
2576d53a 2758 # duration may not be accurate. So allow deviations <1sec
2759 'section_end': end_time if end_time <= offset + duration + 1 else None,
5ec1b6b7 2760 'section_title': chapter.get('title'),
2761 'section_number': chapter.get('index'),
2762 })
2763 downloaded_formats.append(new_info)
a13e6848 2764 try:
2765 self.process_info(new_info)
2766 except MaxDownloadsReached:
2767 max_downloads_reached = True
415f8d51 2768 self._raise_pending_errors(new_info)
f46e2f9d 2769 # Remove copied info
2770 for key, val in tuple(new_info.items()):
2771 if info_dict.get(key) == val:
2772 new_info.pop(key)
a13e6848 2773 if max_downloads_reached:
2774 break
ebed8b37 2775
5ec1b6b7 2776 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 2777 assert write_archive.issubset({True, False, 'ignore'})
2778 if True in write_archive and False not in write_archive:
2779 self.record_download_archive(info_dict)
be72c624 2780
5ec1b6b7 2781 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 2782 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2783 if max_downloads_reached:
2784 raise MaxDownloadsReached()
ebed8b37 2785
49a57e70 2786 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2787 info_dict.update(best_format)
dd82ffea
JMF
2788 return info_dict
2789
98c70d6f 2790 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2791 """Select the requested subtitles and their format"""
d8a58ddc 2792 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2793 if normal_subtitles and self.params.get('writesubtitles'):
2794 available_subs.update(normal_subtitles)
d8a58ddc 2795 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2796 if automatic_captions and self.params.get('writeautomaticsub'):
2797 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2798 if lang not in available_subs:
2799 available_subs[lang] = cap_info
2800
d2c8aadf 2801 if not available_subs or (
2802 not self.params.get('writesubtitles')
2803 and not self.params.get('writeautomaticsub')):
4d171848 2804 return None
a504ced0 2805
d8a58ddc 2806 all_sub_langs = tuple(available_subs.keys())
a504ced0 2807 if self.params.get('allsubtitles', False):
c32b0aab 2808 requested_langs = all_sub_langs
2809 elif self.params.get('subtitleslangs', False):
5314b521 2810 try:
2811 requested_langs = orderedSet_from_options(
2812 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2813 except re.error as e:
2814 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
d8a58ddc 2815 elif normal_sub_langs:
2816 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
a504ced0 2817 else:
d8a58ddc 2818 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
ad3dc496 2819 if requested_langs:
d2c8aadf 2820 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
a504ced0
JMF
2821
2822 formats_query = self.params.get('subtitlesformat', 'best')
2823 formats_preference = formats_query.split('/') if formats_query else []
2824 subs = {}
2825 for lang in requested_langs:
2826 formats = available_subs.get(lang)
2827 if formats is None:
86e5f3ed 2828 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2829 continue
a504ced0
JMF
2830 for ext in formats_preference:
2831 if ext == 'best':
2832 f = formats[-1]
2833 break
2834 matches = list(filter(lambda f: f['ext'] == ext, formats))
2835 if matches:
2836 f = matches[-1]
2837 break
2838 else:
2839 f = formats[-1]
2840 self.report_warning(
2841 'No subtitle format found matching "%s" for language %s, '
2842 'using %s' % (formats_query, lang, f['ext']))
2843 subs[lang] = f
2844 return subs
2845
bb66c247 2846 def _forceprint(self, key, info_dict):
2847 if info_dict is None:
2848 return
2849 info_copy = info_dict.copy()
2850 info_copy['formats_table'] = self.render_formats_table(info_dict)
2851 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2852 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2853 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2854
2855 def format_tmpl(tmpl):
48c8424b 2856 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
07a1250e 2857 if not mobj:
2858 return tmpl
48c8424b 2859
2860 fmt = '%({})s'
2861 if tmpl.startswith('{'):
2862 tmpl = f'.{tmpl}'
2863 if tmpl.endswith('='):
2864 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
2865 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
8130779d 2866
bb66c247 2867 for tmpl in self.params['forceprint'].get(key, []):
2868 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2869
2870 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2871 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2872 tmpl = format_tmpl(tmpl)
2873 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2874 if self._ensure_dir_exists(filename):
86e5f3ed 2875 with open(filename, 'a', encoding='utf-8') as f:
8d93e69d 2876 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2877
d06daf23 2878 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2879 def print_mandatory(field, actual_field=None):
2880 if actual_field is None:
2881 actual_field = field
d06daf23 2882 if (self.params.get('force%s' % field, False)
53c18592 2883 and (not incomplete or info_dict.get(actual_field) is not None)):
2884 self.to_stdout(info_dict[actual_field])
d06daf23
S
2885
2886 def print_optional(field):
2887 if (self.params.get('force%s' % field, False)
2888 and info_dict.get(field) is not None):
2889 self.to_stdout(info_dict[field])
2890
53c18592 2891 info_dict = info_dict.copy()
2892 if filename is not None:
2893 info_dict['filename'] = filename
2894 if info_dict.get('requested_formats') is not None:
2895 # For RTMP URLs, also include the playpath
2896 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2897 elif info_dict.get('url'):
53c18592 2898 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2899
bb66c247 2900 if (self.params.get('forcejson')
2901 or self.params['forceprint'].get('video')
2902 or self.params['print_to_file'].get('video')):
2b8a2973 2903 self.post_extract(info_dict)
bb66c247 2904 self._forceprint('video', info_dict)
53c18592 2905
d06daf23
S
2906 print_mandatory('title')
2907 print_mandatory('id')
53c18592 2908 print_mandatory('url', 'urls')
d06daf23
S
2909 print_optional('thumbnail')
2910 print_optional('description')
53c18592 2911 print_optional('filename')
b868936c 2912 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2913 self.to_stdout(formatSeconds(info_dict['duration']))
2914 print_mandatory('format')
53c18592 2915
2b8a2973 2916 if self.params.get('forcejson'):
6e84b215 2917 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2918
e8e73840 2919 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2920 if not info.get('url'):
1151c407 2921 self.raise_no_formats(info, True)
e8e73840 2922
2923 if test:
2924 verbose = self.params.get('verbose')
2925 params = {
2926 'test': True,
a169858f 2927 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2928 'verbose': verbose,
2929 'noprogress': not verbose,
2930 'nopart': True,
2931 'skip_unavailable_fragments': False,
2932 'keep_fragments': False,
2933 'overwrites': True,
2934 '_no_ytdl_file': True,
2935 }
2936 else:
2937 params = self.params
96fccc10 2938 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2939 if not test:
2940 for ph in self._progress_hooks:
2941 fd.add_progress_hook(ph)
42676437
M
2942 urls = '", "'.join(
2943 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2944 for f in info.get('requested_formats', []) or [info])
3a408f9d 2945 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 2946
adbc4ec4
THD
2947 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2948 # But it may contain objects that are not deep-copyable
2949 new_info = self._copy_infodict(info)
e8e73840 2950 if new_info.get('http_headers') is None:
2951 new_info['http_headers'] = self._calc_headers(new_info)
2952 return fd.download(name, new_info, subtitle)
2953
e04938ab 2954 def existing_file(self, filepaths, *, default_overwrite=True):
2955 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2956 if existing_files and not self.params.get('overwrites', default_overwrite):
2957 return existing_files[0]
2958
2959 for file in existing_files:
2960 self.report_file_delete(file)
2961 os.remove(file)
2962 return None
2963
8222d8de 2964 def process_info(self, info_dict):
09b49e1f 2965 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2966
2967 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2968 original_infodict = info_dict
fd288278 2969
4513a41a 2970 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2971 info_dict['format'] = info_dict['ext']
2972
c77495e3 2973 if self._match_entry(info_dict) is not None:
9e907ebd 2974 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2975 return
2976
09b49e1f 2977 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2978 self.post_extract(info_dict)
119e40ef 2979
2980 def replace_info_dict(new_info):
2981 nonlocal info_dict
2982 if new_info == info_dict:
2983 return
2984 info_dict.clear()
2985 info_dict.update(new_info)
2986
2987 new_info, _ = self.pre_process(info_dict, 'video')
2988 replace_info_dict(new_info)
0c14d66a 2989 self._num_downloads += 1
8222d8de 2990
dcf64d43 2991 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2992 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2993 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2994 files_to_move = {}
8222d8de
JMF
2995
2996 # Forced printings
4513a41a 2997 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2998
ca6d59d2 2999 def check_max_downloads():
3000 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3001 raise MaxDownloadsReached()
3002
b7b04c78 3003 if self.params.get('simulate'):
9e907ebd 3004 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 3005 check_max_downloads()
8222d8de
JMF
3006 return
3007
de6000d9 3008 if full_filename is None:
8222d8de 3009 return
e92caff5 3010 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 3011 return
e92caff5 3012 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
3013 return
3014
80c03fa9 3015 if self._write_description('video', info_dict,
3016 self.prepare_filename(info_dict, 'description')) is None:
3017 return
3018
3019 sub_files = self._write_subtitles(info_dict, temp_filename)
3020 if sub_files is None:
3021 return
3022 files_to_move.update(dict(sub_files))
3023
3024 thumb_files = self._write_thumbnails(
3025 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3026 if thumb_files is None:
3027 return
3028 files_to_move.update(dict(thumb_files))
8222d8de 3029
80c03fa9 3030 infofn = self.prepare_filename(info_dict, 'infojson')
3031 _infojson_written = self._write_info_json('video', info_dict, infofn)
3032 if _infojson_written:
dac5df5a 3033 info_dict['infojson_filename'] = infofn
e75bb0d6 3034 # For backward compatibility, even though it was a private field
80c03fa9 3035 info_dict['__infojson_filename'] = infofn
3036 elif _infojson_written is None:
3037 return
3038
3039 # Note: Annotations are deprecated
3040 annofn = None
1fb07d10 3041 if self.params.get('writeannotations', False):
de6000d9 3042 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 3043 if annofn:
e92caff5 3044 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 3045 return
0c3d0f51 3046 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 3047 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
3048 elif not info_dict.get('annotations'):
3049 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
3050 else:
3051 try:
6febd1c1 3052 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 3053 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
3054 annofile.write(info_dict['annotations'])
3055 except (KeyError, TypeError):
6febd1c1 3056 self.report_warning('There are no annotations to write.')
86e5f3ed 3057 except OSError:
6febd1c1 3058 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 3059 return
1fb07d10 3060
732044af 3061 # Write internet shortcut files
08438d2c 3062 def _write_link_file(link_type):
60f3e995 3063 url = try_get(info_dict['webpage_url'], iri_to_uri)
3064 if not url:
3065 self.report_warning(
3066 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3067 return True
08438d2c 3068 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
3069 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3070 return False
10e3742e 3071 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 3072 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3073 return True
3074 try:
3075 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 3076 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3077 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 3078 template_vars = {'url': url}
08438d2c 3079 if link_type == 'desktop':
3080 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3081 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 3082 except OSError:
08438d2c 3083 self.report_error(f'Cannot write internet shortcut {linkfn}')
3084 return False
732044af 3085 return True
3086
08438d2c 3087 write_links = {
3088 'url': self.params.get('writeurllink'),
3089 'webloc': self.params.get('writewebloclink'),
3090 'desktop': self.params.get('writedesktoplink'),
3091 }
3092 if self.params.get('writelink'):
3093 link_type = ('webloc' if sys.platform == 'darwin'
3094 else 'desktop' if sys.platform.startswith('linux')
3095 else 'url')
3096 write_links[link_type] = True
3097
3098 if any(should_write and not _write_link_file(link_type)
3099 for link_type, should_write in write_links.items()):
3100 return
732044af 3101
415f8d51 3102 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3103 replace_info_dict(new_info)
56d868db 3104
a13e6848 3105 if self.params.get('skip_download'):
56d868db 3106 info_dict['filepath'] = temp_filename
3107 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3108 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3109 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3110 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3111 else:
3112 # Download
b868936c 3113 info_dict.setdefault('__postprocessors', [])
4340deca 3114 try:
0202b52a 3115
e04938ab 3116 def existing_video_file(*filepaths):
6b591b29 3117 ext = info_dict.get('ext')
e04938ab 3118 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3119 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3120 default_overwrite=False)
3121 if file:
3122 info_dict['ext'] = os.path.splitext(file)[1][1:]
3123 return file
0202b52a 3124
7b2c3f47 3125 fd, success = None, True
fccf90e7 3126 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3127 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
71df9b7f 3128 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
56ba69e4 3129 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3130 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3131 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3132 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3133 return
5ec1b6b7 3134
4340deca 3135 if info_dict.get('requested_formats') is not None:
81cd954a 3136 requested_formats = info_dict['requested_formats']
0202b52a 3137 old_ext = info_dict['ext']
4e3b637d 3138 if self.params.get('merge_output_format') is None:
4e3b637d 3139 if (info_dict['ext'] == 'webm'
3140 and info_dict.get('thumbnails')
3141 # check with type instead of pp_key, __name__, or isinstance
3142 # since we dont want any custom PPs to trigger this
c487cf00 3143 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3144 info_dict['ext'] = 'mkv'
3145 self.report_warning(
3146 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3147 new_ext = info_dict['ext']
0202b52a 3148
124bc071 3149 def correct_ext(filename, ext=new_ext):
96fccc10 3150 if filename == '-':
3151 return filename
0202b52a 3152 filename_real_ext = os.path.splitext(filename)[1][1:]
3153 filename_wo_ext = (
3154 os.path.splitext(filename)[0]
124bc071 3155 if filename_real_ext in (old_ext, new_ext)
0202b52a 3156 else filename)
86e5f3ed 3157 return f'{filename_wo_ext}.{ext}'
0202b52a 3158
38c6902b 3159 # Ensure filename always has a correct extension for successful merge
0202b52a 3160 full_filename = correct_ext(full_filename)
3161 temp_filename = correct_ext(temp_filename)
e04938ab 3162 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3163 info_dict['__real_download'] = False
18e674b4 3164
7b2c3f47 3165 merger = FFmpegMergerPP(self)
adbc4ec4 3166 downloaded = []
dbf5416a 3167 if dl_filename is not None:
6c7274ec 3168 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3169 elif fd:
3170 for f in requested_formats if fd != FFmpegFD else []:
3171 f['filepath'] = fname = prepend_extension(
3172 correct_ext(temp_filename, info_dict['ext']),
3173 'f%s' % f['format_id'], info_dict['ext'])
3174 downloaded.append(fname)
dbf5416a 3175 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3176 success, real_download = self.dl(temp_filename, info_dict)
3177 info_dict['__real_download'] = real_download
18e674b4 3178 else:
18e674b4 3179 if self.params.get('allow_unplayable_formats'):
3180 self.report_warning(
3181 'You have requested merging of multiple formats '
3182 'while also allowing unplayable formats to be downloaded. '
3183 'The formats won\'t be merged to prevent data corruption.')
3184 elif not merger.available:
e8969bda 3185 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3186 if not self.params.get('ignoreerrors'):
3187 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3188 return
3189 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3190
96fccc10 3191 if temp_filename == '-':
adbc4ec4 3192 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3193 else 'but the formats are incompatible for simultaneous download' if merger.available
3194 else 'but ffmpeg is not installed')
3195 self.report_warning(
3196 f'You have requested downloading multiple formats to stdout {reason}. '
3197 'The formats will be streamed one after the other')
3198 fname = temp_filename
dbf5416a 3199 for f in requested_formats:
3200 new_info = dict(info_dict)
3201 del new_info['requested_formats']
3202 new_info.update(f)
96fccc10 3203 if temp_filename != '-':
124bc071 3204 fname = prepend_extension(
3205 correct_ext(temp_filename, new_info['ext']),
3206 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3207 if not self._ensure_dir_exists(fname):
3208 return
a21e0ab1 3209 f['filepath'] = fname
96fccc10 3210 downloaded.append(fname)
dbf5416a 3211 partial_success, real_download = self.dl(fname, new_info)
3212 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3213 success = success and partial_success
adbc4ec4
THD
3214
3215 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3216 info_dict['__postprocessors'].append(merger)
3217 info_dict['__files_to_merge'] = downloaded
3218 # Even if there were no downloads, it is being merged only now
3219 info_dict['__real_download'] = True
3220 else:
3221 for file in downloaded:
3222 files_to_move[file] = None
4340deca
P
3223 else:
3224 # Just a single file
e04938ab 3225 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3226 if dl_filename is None or dl_filename == temp_filename:
3227 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3228 # So we should try to resume the download
e8e73840 3229 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3230 info_dict['__real_download'] = real_download
6c7274ec 3231 else:
3232 self.report_file_already_downloaded(dl_filename)
0202b52a 3233
0202b52a 3234 dl_filename = dl_filename or temp_filename
c571435f 3235 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3236
3158150c 3237 except network_exceptions as err:
7960b056 3238 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3239 return
86e5f3ed 3240 except OSError as err:
4340deca
P
3241 raise UnavailableVideoError(err)
3242 except (ContentTooShortError, ) as err:
86e5f3ed 3243 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3244 return
8222d8de 3245
415f8d51 3246 self._raise_pending_errors(info_dict)
de6000d9 3247 if success and full_filename != '-':
f17f8651 3248
fd7cfb64 3249 def fixup():
3250 do_fixup = True
3251 fixup_policy = self.params.get('fixup')
3252 vid = info_dict['id']
3253
3254 if fixup_policy in ('ignore', 'never'):
3255 return
3256 elif fixup_policy == 'warn':
3fe75fdc 3257 do_fixup = 'warn'
f89b3e2d 3258 elif fixup_policy != 'force':
3259 assert fixup_policy in ('detect_or_warn', None)
3260 if not info_dict.get('__real_download'):
3261 do_fixup = False
fd7cfb64 3262
3263 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3264 if not (do_fixup and cndn):
fd7cfb64 3265 return
3fe75fdc 3266 elif do_fixup == 'warn':
fd7cfb64 3267 self.report_warning(f'{vid}: {msg}')
3268 return
3269 pp = cls(self)
3270 if pp.available:
3271 info_dict['__postprocessors'].append(pp)
3272 else:
3273 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3274
3275 stretched_ratio = info_dict.get('stretched_ratio')
ca9def71
LNO
3276 ffmpeg_fixup(stretched_ratio not in (1, None),
3277 f'Non-uniform pixel ratio {stretched_ratio}',
3278 FFmpegFixupStretchedPP)
fd7cfb64 3279
993191c0 3280 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3281 downloader = downloader.FD_NAME if downloader else None
adbc4ec4 3282
ca9def71
LNO
3283 ext = info_dict.get('ext')
3284 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3285 isinstance(pp, FFmpegVideoConvertorPP)
3286 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3287 ) for pp in self._pps['post_process'])
3288
3289 if not postprocessed_by_ffmpeg:
3290 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
f2df4071 3291 'writing DASH m4a. Only some players support this container',
3292 FFmpegFixupM4aPP)
24146491 3293 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3294 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3295 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3296 FFmpegFixupM3u8PP)
3297 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3298 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3299
24146491 3300 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3301 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3302
3303 fixup()
8222d8de 3304 try:
f46e2f9d 3305 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3306 except PostProcessingError as err:
3307 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3308 return
ab8e5e51
AM
3309 try:
3310 for ph in self._post_hooks:
23c1a667 3311 ph(info_dict['filepath'])
ab8e5e51
AM
3312 except Exception as err:
3313 self.report_error('post hooks: %s' % str(err))
3314 return
9e907ebd 3315 info_dict['__write_download_archive'] = True
2d30509f 3316
c487cf00 3317 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3318 if self.params.get('force_write_download_archive'):
9e907ebd 3319 info_dict['__write_download_archive'] = True
ca6d59d2 3320 check_max_downloads()
8222d8de 3321
aa9369a2 3322 def __download_wrapper(self, func):
3323 @functools.wraps(func)
3324 def wrapper(*args, **kwargs):
3325 try:
3326 res = func(*args, **kwargs)
3327 except UnavailableVideoError as e:
3328 self.report_error(e)
b222c271 3329 except DownloadCancelled as e:
3330 self.to_screen(f'[info] {e}')
3331 if not self.params.get('break_per_url'):
3332 raise
fd404bec 3333 self._num_downloads = 0
aa9369a2 3334 else:
3335 if self.params.get('dump_single_json', False):
3336 self.post_extract(res)
3337 self.to_stdout(json.dumps(self.sanitize_info(res)))
3338 return wrapper
3339
8222d8de
JMF
3340 def download(self, url_list):
3341 """Download a given list of URLs."""
aa9369a2 3342 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3343 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3344 if (len(url_list) > 1
3345 and outtmpl != '-'
3346 and '%' not in outtmpl
3347 and self.params.get('max_downloads') != 1):
acd69589 3348 raise SameFileError(outtmpl)
8222d8de
JMF
3349
3350 for url in url_list:
aa9369a2 3351 self.__download_wrapper(self.extract_info)(
3352 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3353
3354 return self._download_retcode
3355
1dcc4c0c 3356 def download_with_info_file(self, info_filename):
31bd3925
JMF
3357 with contextlib.closing(fileinput.FileInput(
3358 [info_filename], mode='r',
3359 openhook=fileinput.hook_encoded('utf-8'))) as f:
3360 # FileInput doesn't have a read method, we can't call json.load
8012d892 3361 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3362 try:
aa9369a2 3363 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3364 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3365 if not isinstance(e, EntryNotInPlaylist):
3366 self.to_stderr('\r')
d4943898
JMF
3367 webpage_url = info.get('webpage_url')
3368 if webpage_url is not None:
aa9369a2 3369 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3370 return self.download([webpage_url])
3371 else:
3372 raise
3373 return self._download_retcode
1dcc4c0c 3374
cb202fd2 3375 @staticmethod
8012d892 3376 def sanitize_info(info_dict, remove_private_keys=False):
3377 ''' Sanitize the infodict for converting to json '''
3ad56b42 3378 if info_dict is None:
3379 return info_dict
6e84b215 3380 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3381 info_dict.setdefault('_type', 'video')
b5e7a2e6 3382 info_dict.setdefault('_version', {
3383 'version': __version__,
3384 'current_git_head': current_git_head(),
3385 'release_git_head': RELEASE_GIT_HEAD,
3386 'repository': REPOSITORY,
3387 })
09b49e1f 3388
8012d892 3389 if remove_private_keys:
0a5a191a 3390 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3391 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
0a5a191a 3392 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3393 }
ae8f99e6 3394 else:
09b49e1f 3395 reject = lambda k, v: False
adbc4ec4
THD
3396
3397 def filter_fn(obj):
3398 if isinstance(obj, dict):
3399 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3400 elif isinstance(obj, (list, tuple, set, LazyList)):
3401 return list(map(filter_fn, obj))
3402 elif obj is None or isinstance(obj, (str, int, float, bool)):
3403 return obj
3404 else:
3405 return repr(obj)
3406
5226731e 3407 return filter_fn(info_dict)
cb202fd2 3408
8012d892 3409 @staticmethod
3410 def filter_requested_info(info_dict, actually_filter=True):
3411 ''' Alias of sanitize_info for backward compatibility '''
3412 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3413
43d7f5a5 3414 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3415 for filename in set(filter(None, files_to_delete)):
3416 if msg:
3417 self.to_screen(msg % filename)
3418 try:
3419 os.remove(filename)
3420 except OSError:
3421 self.report_warning(f'Unable to delete file {filename}')
3422 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3423 del info['__files_to_move'][filename]
3424
ed5835b4 3425 @staticmethod
3426 def post_extract(info_dict):
3427 def actual_post_extract(info_dict):
3428 if info_dict.get('_type') in ('playlist', 'multi_video'):
3429 for video_dict in info_dict.get('entries', {}):
3430 actual_post_extract(video_dict or {})
3431 return
3432
09b49e1f 3433 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3434 info_dict.update(post_extractor())
ed5835b4 3435
3436 actual_post_extract(info_dict or {})
3437
dcf64d43 3438 def run_pp(self, pp, infodict):
5bfa4862 3439 files_to_delete = []
dcf64d43 3440 if '__files_to_move' not in infodict:
3441 infodict['__files_to_move'] = {}
b1940459 3442 try:
3443 files_to_delete, infodict = pp.run(infodict)
3444 except PostProcessingError as e:
3445 # Must be True and not 'only_download'
3446 if self.params.get('ignoreerrors') is True:
3447 self.report_error(e)
3448 return infodict
3449 raise
3450
5bfa4862 3451 if not files_to_delete:
dcf64d43 3452 return infodict
5bfa4862 3453 if self.params.get('keepvideo', False):
3454 for f in files_to_delete:
dcf64d43 3455 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3456 else:
43d7f5a5 3457 self._delete_downloaded_files(
3458 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3459 return infodict
5bfa4862 3460
ed5835b4 3461 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3462 self._forceprint(key, info)
ed5835b4 3463 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3464 info = self.run_pp(pp, info)
ed5835b4 3465 return info
277d6ff5 3466
56d868db 3467 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3468 info = dict(ie_info)
56d868db 3469 info['__files_to_move'] = files_to_move or {}
415f8d51 3470 try:
3471 info = self.run_all_pps(key, info)
3472 except PostProcessingError as err:
3473 msg = f'Preprocessing: {err}'
3474 info.setdefault('__pending_error', msg)
3475 self.report_error(msg, is_error=False)
56d868db 3476 return info, info.pop('__files_to_move', None)
5bfa4862 3477
f46e2f9d 3478 def post_process(self, filename, info, files_to_move=None):
8222d8de 3479 """Run all the postprocessors on the given file."""
8222d8de 3480 info['filepath'] = filename
dcf64d43 3481 info['__files_to_move'] = files_to_move or {}
ed5835b4 3482 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3483 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3484 del info['__files_to_move']
ed5835b4 3485 return self.run_all_pps('after_move', info)
c1c9a79c 3486
5db07df6 3487 def _make_archive_id(self, info_dict):
e9fef7ee
S
3488 video_id = info_dict.get('id')
3489 if not video_id:
3490 return
5db07df6
PH
3491 # Future-proof against any change in case
3492 # and backwards compatibility with prior versions
e9fef7ee 3493 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3494 if extractor is None:
1211bb6d
S
3495 url = str_or_none(info_dict.get('url'))
3496 if not url:
3497 return
e9fef7ee 3498 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3499 for ie_key, ie in self._ies.items():
1211bb6d 3500 if ie.suitable(url):
8b7491c8 3501 extractor = ie_key
e9fef7ee
S
3502 break
3503 else:
3504 return
0647d925 3505 return make_archive_id(extractor, video_id)
5db07df6
PH
3506
3507 def in_download_archive(self, info_dict):
ae103564 3508 if not self.archive:
5db07df6
PH
3509 return False
3510
1e8fe57e 3511 vid_ids = [self._make_archive_id(info_dict)]
c200096c 3512 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
1e8fe57e 3513 return any(id_ in self.archive for id_ in vid_ids)
c1c9a79c
PH
3514
3515 def record_download_archive(self, info_dict):
3516 fn = self.params.get('download_archive')
3517 if fn is None:
3518 return
5db07df6
PH
3519 vid_id = self._make_archive_id(info_dict)
3520 assert vid_id
ae103564 3521
a13e6848 3522 self.write_debug(f'Adding to archive: {vid_id}')
9c935fbc 3523 if is_path_like(fn):
ae103564 3524 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3525 archive_file.write(vid_id + '\n')
a45e8619 3526 self.archive.add(vid_id)
dd82ffea 3527
8c51aa65 3528 @staticmethod
8abeeb94 3529 def format_resolution(format, default='unknown'):
9359f3d4 3530 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3531 return 'audio only'
f49d89ee
PH
3532 if format.get('resolution') is not None:
3533 return format['resolution']
35615307 3534 if format.get('width') and format.get('height'):
ff51ed58 3535 return '%dx%d' % (format['width'], format['height'])
35615307 3536 elif format.get('height'):
ff51ed58 3537 return '%sp' % format['height']
35615307 3538 elif format.get('width'):
ff51ed58 3539 return '%dx?' % format['width']
3540 return default
8c51aa65 3541
8130779d 3542 def _list_format_headers(self, *headers):
3543 if self.params.get('listformats_table', True) is not False:
591bb9d3 3544 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3545 return headers
3546
c57f7757
PH
3547 def _format_note(self, fdict):
3548 res = ''
3549 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3550 res += '(unsupported)'
32f90364
PH
3551 if fdict.get('language'):
3552 if res:
3553 res += ' '
f304da8a 3554 res += '[%s]' % fdict['language']
c57f7757 3555 if fdict.get('format_note') is not None:
f304da8a 3556 if res:
3557 res += ' '
3558 res += fdict['format_note']
c57f7757 3559 if fdict.get('tbr') is not None:
f304da8a 3560 if res:
3561 res += ', '
3562 res += '%4dk' % fdict['tbr']
c57f7757
PH
3563 if fdict.get('container') is not None:
3564 if res:
3565 res += ', '
3566 res += '%s container' % fdict['container']
3089bc74
S
3567 if (fdict.get('vcodec') is not None
3568 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3569 if res:
3570 res += ', '
3571 res += fdict['vcodec']
91c7271a 3572 if fdict.get('vbr') is not None:
c57f7757
PH
3573 res += '@'
3574 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3575 res += 'video@'
3576 if fdict.get('vbr') is not None:
3577 res += '%4dk' % fdict['vbr']
fbb21cf5 3578 if fdict.get('fps') is not None:
5d583bdf
S
3579 if res:
3580 res += ', '
3581 res += '%sfps' % fdict['fps']
c57f7757
PH
3582 if fdict.get('acodec') is not None:
3583 if res:
3584 res += ', '
3585 if fdict['acodec'] == 'none':
3586 res += 'video only'
3587 else:
3588 res += '%-5s' % fdict['acodec']
3589 elif fdict.get('abr') is not None:
3590 if res:
3591 res += ', '
3592 res += 'audio'
3593 if fdict.get('abr') is not None:
3594 res += '@%3dk' % fdict['abr']
3595 if fdict.get('asr') is not None:
3596 res += ' (%5dHz)' % fdict['asr']
3597 if fdict.get('filesize') is not None:
3598 if res:
3599 res += ', '
3600 res += format_bytes(fdict['filesize'])
9732d77e
PH
3601 elif fdict.get('filesize_approx') is not None:
3602 if res:
3603 res += ', '
3604 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3605 return res
91c7271a 3606
aebb4f4b 3607 def _get_formats(self, info_dict):
3608 if info_dict.get('formats') is None:
3609 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3610 return [info_dict]
3611 return []
3612 return info_dict['formats']
b69fd25c 3613
aebb4f4b 3614 def render_formats_table(self, info_dict):
3615 formats = self._get_formats(info_dict)
3616 if not formats:
3617 return
8130779d 3618 if not self.params.get('listformats_table', True) is not False:
76d321f6 3619 table = [
3620 [
3621 format_field(f, 'format_id'),
3622 format_field(f, 'ext'),
3623 self.format_resolution(f),
8130779d 3624 self._format_note(f)
d5d1df8a 3625 ] for f in formats if (f.get('preference') or 0) >= -1000]
8130779d 3626 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3627
d816f61f 3628 def simplified_codec(f, field):
3629 assert field in ('acodec', 'vcodec')
3630 codec = f.get(field, 'unknown')
f5ea4748 3631 if not codec:
3632 return 'unknown'
3633 elif codec != 'none':
d816f61f 3634 return '.'.join(codec.split('.')[:4])
3635
3636 if field == 'vcodec' and f.get('acodec') == 'none':
3637 return 'images'
3638 elif field == 'acodec' and f.get('vcodec') == 'none':
3639 return ''
3640 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3641 self.Styles.SUPPRESS)
3642
591bb9d3 3643 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3644 table = [
3645 [
591bb9d3 3646 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3647 format_field(f, 'ext'),
3648 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
563e0bf8 3649 format_field(f, 'fps', '\t%d', func=round),
8130779d 3650 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
b8ed0f15 3651 format_field(f, 'audio_channels', '\t%s'),
8130779d 3652 delim,
3653 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
563e0bf8 3654 format_field(f, 'tbr', '\t%dk', func=round),
8130779d 3655 shorten_protocol_name(f.get('protocol', '')),
3656 delim,
d816f61f 3657 simplified_codec(f, 'vcodec'),
563e0bf8 3658 format_field(f, 'vbr', '\t%dk', func=round),
d816f61f 3659 simplified_codec(f, 'acodec'),
563e0bf8 3660 format_field(f, 'abr', '\t%dk', func=round),
ae61d108 3661 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
8130779d 3662 join_nonempty(
591bb9d3 3663 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
8130779d 3664 format_field(f, 'language', '[%s]'),
3665 join_nonempty(format_field(f, 'format_note'),
3666 format_field(f, 'container', ignore=(None, f.get('ext'))),
3667 delim=', '),
3668 delim=' '),
3669 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3670 header_line = self._list_format_headers(
b8ed0f15 3671 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
8130779d 3672 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3673
3674 return render_table(
3675 header_line, table, hide_empty=True,
591bb9d3 3676 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3677
3678 def render_thumbnails_table(self, info_dict):
88f23a18 3679 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3680 if not thumbnails:
8130779d 3681 return None
3682 return render_table(
ec11a9f4 3683 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
177662e0 3684 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
2412044c 3685
8130779d 3686 def render_subtitles_table(self, video_id, subtitles):
2412044c 3687 def _row(lang, formats):
49c258e1 3688 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3689 if len(set(names)) == 1:
7aee40c1 3690 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3691 return [lang, ', '.join(names), ', '.join(exts)]
3692
8130779d 3693 if not subtitles:
3694 return None
3695 return render_table(
ec11a9f4 3696 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3697 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3698 hide_empty=True)
3699
3700 def __list_table(self, video_id, name, func, *args):
3701 table = func(*args)
3702 if not table:
3703 self.to_screen(f'{video_id} has no {name}')
3704 return
3705 self.to_screen(f'[info] Available {name} for {video_id}:')
3706 self.to_stdout(table)
3707
3708 def list_formats(self, info_dict):
3709 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3710
3711 def list_thumbnails(self, info_dict):
3712 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3713
3714 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3715 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3716
dca08720
PH
3717 def urlopen(self, req):
3718 """ Start an HTTP download """
f9934b96 3719 if isinstance(req, str):
67dda517 3720 req = sanitized_Request(req)
19a41fc6 3721 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3722
3723 def print_debug_header(self):
3724 if not self.params.get('verbose'):
3725 return
49a57e70 3726
a057779d 3727 from . import _IN_CLI # Must be delayed import
3728
560738f3 3729 # These imports can be slow. So import them only as needed
3730 from .extractor.extractors import _LAZY_LOADER
3731 from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3732
49a57e70 3733 def get_encoding(stream):
2a938746 3734 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3735 if not supports_terminal_sequences(stream):
53973b4d 3736 from .utils import WINDOWS_VT_MODE # Must be imported locally
e3c7d495 3737 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3738 return ret
3739
591bb9d3 3740 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3741 locale.getpreferredencoding(),
3742 sys.getfilesystemencoding(),
591bb9d3 3743 self.get_encoding(),
3744 ', '.join(
64fa820c 3745 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
591bb9d3 3746 if stream is not None and key != 'console')
3747 )
883d4b1e 3748
3749 logger = self.params.get('logger')
3750 if logger:
3751 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3752 write_debug(encoding_str)
3753 else:
96565c7e 3754 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3755 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3756
4c88ff87 3757 source = detect_variant()
70b23409 3758 if VARIANT not in (None, 'pip'):
3759 source += '*'
36eaf303 3760 write_debug(join_nonempty(
b5e7a2e6 3761 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3762 __version__,
36eaf303 3763 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3764 '' if source == 'unknown' else f'({source})',
a057779d 3765 '' if _IN_CLI else 'API',
36eaf303 3766 delim=' '))
497074f0 3767
3768 if not _IN_CLI:
3769 write_debug(f'params: {self.params}')
3770
6e21fdd2 3771 if not _LAZY_LOADER:
3772 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3773 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3774 else:
49a57e70 3775 write_debug('Lazy loading extractors is disabled')
3ae5e797 3776 if plugin_extractors or plugin_postprocessors:
49a57e70 3777 write_debug('Plugins: %s' % [
3ae5e797 3778 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3779 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
8a82af35 3780 if self.params['compat_opts']:
3781 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
36eaf303 3782
b5e7a2e6 3783 if current_git_head():
3784 write_debug(f'Git HEAD: {current_git_head()}')
b1f94422 3785 write_debug(system_identifier())
d28b5171 3786
8913ef74 3787 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3788 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3789 if ffmpeg_features:
19a03940 3790 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3791
4c83c967 3792 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3793 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3794 exe_str = ', '.join(
2831b468 3795 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3796 ) or 'none'
49a57e70 3797 write_debug('exe versions: %s' % exe_str)
dca08720 3798
1d485a1a 3799 from .compat.compat_utils import get_package_info
9b8ee23b 3800 from .dependencies import available_dependencies
3801
3802 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3803 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3804 })) or 'none'))
2831b468 3805
97ec5bc5 3806 self._setup_opener()
dca08720
PH
3807 proxy_map = {}
3808 for handler in self._opener.handlers:
3809 if hasattr(handler, 'proxies'):
3810 proxy_map.update(handler.proxies)
49a57e70 3811 write_debug(f'Proxy map: {proxy_map}')
dca08720 3812
49a57e70 3813 # Not implemented
3814 if False and self.params.get('call_home'):
0f06bcd7 3815 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 3816 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 3817 latest_version = self.urlopen(
0f06bcd7 3818 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
3819 if version_tuple(latest_version) > version_tuple(__version__):
3820 self.report_warning(
3821 'You are using an outdated version (newest version: %s)! '
3822 'See https://yt-dl.org/update if you need help updating.' %
3823 latest_version)
3824
e344693b 3825 def _setup_opener(self):
97ec5bc5 3826 if hasattr(self, '_opener'):
3827 return
6ad14cab 3828 timeout_val = self.params.get('socket_timeout')
17bddf3e 3829 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3830
982ee69a 3831 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3832 opts_cookiefile = self.params.get('cookiefile')
3833 opts_proxy = self.params.get('proxy')
3834
982ee69a 3835 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3836
6a3f4c3f 3837 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3838 if opts_proxy is not None:
3839 if opts_proxy == '':
3840 proxies = {}
3841 else:
3842 proxies = {'http': opts_proxy, 'https': opts_proxy}
3843 else:
ac668111 3844 proxies = urllib.request.getproxies()
067aa17e 3845 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3846 if 'http' in proxies and 'https' not in proxies:
3847 proxies['https'] = proxies['http']
91410c9b 3848 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3849
3850 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3851 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3852 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3853 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3854 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3855
3856 # When passing our own FileHandler instance, build_opener won't add the
3857 # default FileHandler and allows us to disable the file protocol, which
3858 # can be used for malicious purposes (see
067aa17e 3859 # https://github.com/ytdl-org/youtube-dl/issues/8227)
ac668111 3860 file_handler = urllib.request.FileHandler()
6240b0a2
JMF
3861
3862 def file_open(*args, **kwargs):
ac668111 3863 raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3864 file_handler.file_open = file_open
3865
ac668111 3866 opener = urllib.request.build_opener(
fca6dba8 3867 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3868
dca08720
PH
3869 # Delete the default user-agent header, which would otherwise apply in
3870 # cases where our custom HTTP handler doesn't come into play
067aa17e 3871 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3872 opener.addheaders = []
3873 self._opener = opener
62fec3b2
PH
3874
3875 def encode(self, s):
3876 if isinstance(s, bytes):
3877 return s # Already encoded
3878
3879 try:
3880 return s.encode(self.get_encoding())
3881 except UnicodeEncodeError as err:
3882 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3883 raise
3884
3885 def get_encoding(self):
3886 encoding = self.params.get('encoding')
3887 if encoding is None:
3888 encoding = preferredencoding()
3889 return encoding
ec82d85a 3890
e08a85d8 3891 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3892 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3893 if overwrite is None:
3894 overwrite = self.params.get('overwrites', True)
80c03fa9 3895 if not self.params.get('writeinfojson'):
3896 return False
3897 elif not infofn:
3898 self.write_debug(f'Skipping writing {label} infojson')
3899 return False
3900 elif not self._ensure_dir_exists(infofn):
3901 return None
e08a85d8 3902 elif not overwrite and os.path.exists(infofn):
80c03fa9 3903 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3904 return 'exists'
3905
3906 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3907 try:
3908 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3909 return True
86e5f3ed 3910 except OSError:
cb96c5be 3911 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3912 return None
80c03fa9 3913
3914 def _write_description(self, label, ie_result, descfn):
3915 ''' Write description and returns True = written, False = skip, None = error '''
3916 if not self.params.get('writedescription'):
3917 return False
3918 elif not descfn:
3919 self.write_debug(f'Skipping writing {label} description')
3920 return False
3921 elif not self._ensure_dir_exists(descfn):
3922 return None
3923 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3924 self.to_screen(f'[info] {label.title()} description is already present')
3925 elif ie_result.get('description') is None:
3926 self.report_warning(f'There\'s no {label} description to write')
3927 return False
3928 else:
3929 try:
3930 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 3931 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 3932 descfile.write(ie_result['description'])
86e5f3ed 3933 except OSError:
80c03fa9 3934 self.report_error(f'Cannot write {label} description file {descfn}')
3935 return None
3936 return True
3937
3938 def _write_subtitles(self, info_dict, filename):
3939 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3940 ret = []
3941 subtitles = info_dict.get('requested_subtitles')
3942 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3943 # subtitles download errors are already managed as troubles in relevant IE
3944 # that way it will silently go on when used with unsupporting IE
3945 return ret
3946
3947 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3948 if not sub_filename_base:
3949 self.to_screen('[info] Skipping writing video subtitles')
3950 return ret
3951 for sub_lang, sub_info in subtitles.items():
3952 sub_format = sub_info['ext']
3953 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3954 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3955 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3956 if existing_sub:
80c03fa9 3957 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3958 sub_info['filepath'] = existing_sub
3959 ret.append((existing_sub, sub_filename_final))
80c03fa9 3960 continue
3961
3962 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3963 if sub_info.get('data') is not None:
3964 try:
3965 # Use newline='' to prevent conversion of newline characters
3966 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 3967 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 3968 subfile.write(sub_info['data'])
3969 sub_info['filepath'] = sub_filename
3970 ret.append((sub_filename, sub_filename_final))
3971 continue
86e5f3ed 3972 except OSError:
80c03fa9 3973 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3974 return None
3975
3976 try:
3977 sub_copy = sub_info.copy()
3978 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3979 self.dl(sub_filename, sub_copy, subtitle=True)
3980 sub_info['filepath'] = sub_filename
3981 ret.append((sub_filename, sub_filename_final))
6020e05d 3982 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3983 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3984 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3985 if not self.params.get('ignoreerrors'):
3986 self.report_error(msg)
3987 raise DownloadError(msg)
3988 self.report_warning(msg)
519804a9 3989 return ret
80c03fa9 3990
3991 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3992 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3993 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3994 thumbnails, ret = [], []
6c4fd172 3995 if write_all or self.params.get('writethumbnail', False):
0202b52a 3996 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3997 multiple = write_all and len(thumbnails) > 1
ec82d85a 3998
80c03fa9 3999 if thumb_filename_base is None:
4000 thumb_filename_base = filename
4001 if thumbnails and not thumb_filename_base:
4002 self.write_debug(f'Skipping writing {label} thumbnail')
4003 return ret
4004
dd0228ce 4005 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 4006 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 4007 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 4008 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4009 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 4010
e04938ab 4011 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4012 if existing_thumb:
aa9369a2 4013 self.to_screen('[info] %s is already present' % (
4014 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 4015 t['filepath'] = existing_thumb
4016 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 4017 else:
80c03fa9 4018 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 4019 try:
297e9952 4020 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 4021 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 4022 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 4023 shutil.copyfileobj(uf, thumbf)
80c03fa9 4024 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 4025 t['filepath'] = thumb_filename
3158150c 4026 except network_exceptions as err:
dd0228ce 4027 thumbnails.pop(idx)
80c03fa9 4028 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 4029 if ret and not write_all:
4030 break
0202b52a 4031 return ret