26 from .cache
import Cache
27 from .compat
import functools
, urllib
# isort: split
28 from .compat
import compat_os_name
, compat_shlex_quote
, urllib_req_to_req
29 from .cookies
import LenientSimpleCookie
, load_cookies
30 from .downloader
import FFmpegFD
, get_suitable_downloader
, shorten_protocol_name
31 from .downloader
.rtmp
import rtmpdump_version
32 from .extractor
import gen_extractor_classes
, get_info_extractor
33 from .extractor
.common
import UnsupportedURLIE
34 from .extractor
.openload
import PhantomJSwrapper
35 from .minicurses
import format_text
36 from .networking
import HEADRequest
, Request
, RequestDirector
37 from .networking
.common
import _REQUEST_HANDLERS
, _RH_PREFERENCES
38 from .networking
.exceptions
import (
46 from .plugins
import directories
as plugin_directories
47 from .postprocessor
import _PLUGIN_CLASSES
as plugin_pps
48 from .postprocessor
import (
50 FFmpegFixupDuplicateMoovPP
,
51 FFmpegFixupDurationPP
,
54 FFmpegFixupStretchedPP
,
55 FFmpegFixupTimestampPP
,
58 FFmpegVideoConvertorPP
,
59 MoveFilesAfterDownloadPP
,
62 from .postprocessor
.ffmpeg
import resolve_mapping
as resolve_recode_mapping
63 from .update
import REPOSITORY
, _get_system_deprecation
, _make_label
, current_git_head
, detect_variant
95 UnavailableVideoError
,
113 format_decimal_suffix
,
127 orderedSet_from_options
,
131 remove_terminal_sequences
,
140 supports_terminal_sequences
,
150 windows_enable_vt_mode
,
154 from .utils
._utils
import _YDLLogger
155 from .utils
.networking
import (
161 from .version
import CHANNEL
, ORIGIN
, RELEASE_GIT_HEAD
, VARIANT
, __version__
163 if compat_os_name
== 'nt':
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
202 netrc_cmd: Use a shell command to get credentials
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
205 no_warnings: Do not print out anything for warnings.
206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
209 For compatibility, a single list is also accepted
210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
212 forcejson: Force printing info_dict as JSON.
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
219 format: Video format code. see "FORMAT SELECTION" for more details.
220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
229 format_sort_force: Force the given format_sort. see "Sorting Formats"
231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
233 allow_multiple_video_streams: Allow multiple video streams to be merged
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
237 check_formats Whether to test if the formats are downloadable.
238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
240 or None (check only if requested by extractor)
241 paths: Dictionary of output paths. The allowed keys are 'home'
242 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
243 outtmpl: Dictionary of templates for output names. Allowed keys
244 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
245 For compatibility with youtube-dl, a single string can also be used
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
249 windowsfilenames: Force the filenames to be windows compatible
250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
255 allowed_extractors: List of regexes to match against extractor names that are allowed
256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
259 playlist_items: Specific indices of playlist to download.
260 playlistrandom: Download playlist items in random order.
261 lazy_playlist: Process playlist entries as they are received.
262 matchtitle: Download only matching titles.
263 rejecttitle: Reject downloads for matching titles.
264 logger: Log messages to a logging.Logger instance.
265 logtostderr: Print everything to stderr instead of stdout.
266 consoletitle: Display progress in console window's titlebar.
267 writedescription: Write the video description to a .description file
268 writeinfojson: Write the video description to a .info.json file
269 clean_infojson: Remove internal metadata from the infojson
270 getcomments: Extract video comments. This will not be written to disk
271 unless writeinfojson is also given
272 writeannotations: Write the video annotations to a .annotations.xml file
273 writethumbnail: Write the thumbnail image to a file
274 allow_playlist_files: Whether to write playlists' description, infojson etc
275 also to disk when using the 'write*' options
276 write_all_thumbnails: Write all thumbnail formats to files
277 writelink: Write an internet shortcut file, depending on the
278 current platform (.url/.webloc/.desktop)
279 writeurllink: Write a Windows internet shortcut file (.url)
280 writewebloclink: Write a macOS internet shortcut file (.webloc)
281 writedesktoplink: Write a Linux internet shortcut file (.desktop)
282 writesubtitles: Write the video subtitles to a file
283 writeautomaticsub: Write the automatically generated subtitles to a file
284 listsubtitles: Lists all available subtitles for the video
285 subtitlesformat: The format code for subtitles
286 subtitleslangs: List of languages of the subtitles to download (can be regex).
287 The list may contain "all" to refer to all the available
288 subtitles. The language can be prefixed with a "-" to
289 exclude it from the requested languages, e.g. ['all', '-live_chat']
290 keepvideo: Keep the video file after post-processing
291 daterange: A utils.DateRange object, download only if the upload_date is in the range.
292 skip_download: Skip the actual download of the video file
293 cachedir: Location of the cache files in the filesystem.
294 False to disable filesystem cache.
295 noplaylist: Download single video instead of a playlist if in doubt.
296 age_limit: An integer representing the user's age in years.
297 Unsuitable videos for the given age are skipped.
298 min_views: An integer representing the minimum view count the video
299 must have in order to not be skipped.
300 Videos without view count information are always
301 downloaded. None for no limit.
302 max_views: An integer representing the maximum view count.
303 Videos that are more popular than that are not
305 Videos without view count information are always
306 downloaded. None for no limit.
307 download_archive: A set, or the name of a file where all downloads are recorded.
308 Videos already present in the file are not downloaded again.
309 break_on_existing: Stop the download process after attempting to download a
310 file that is in the archive.
311 break_per_url: Whether break_on_reject and break_on_existing
312 should act on each input URL as opposed to for the entire queue
313 cookiefile: File name or text stream from where cookies should be read and dumped to
314 cookiesfrombrowser: A tuple containing the name of the browser, the profile
315 name/path from where cookies are loaded, the name of the keyring,
316 and the container name, e.g. ('chrome', ) or
317 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
318 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
319 support RFC 5746 secure renegotiation
320 nocheckcertificate: Do not verify SSL certificates
321 client_certificate: Path to client certificate file in PEM format. May include the private key
322 client_certificate_key: Path to private key file for client certificate
323 client_certificate_password: Password for client certificate private key, if encrypted.
324 If not provided and the key is encrypted, yt-dlp will ask interactively
325 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
326 (Only supported by some extractors)
327 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
328 http_headers: A dictionary of custom headers to be used for all requests
329 proxy: URL of the proxy server to use
330 geo_verification_proxy: URL of the proxy to use for IP address verification
331 on geo-restricted sites.
332 socket_timeout: Time to wait for unresponsive hosts, in seconds
333 bidi_workaround: Work around buggy terminals without bidirectional text
334 support, using fridibi
335 debug_printtraffic:Print out sent and received HTTP traffic
336 default_search: Prepend this string if an input url is not valid.
337 'auto' for elaborate guessing
338 encoding: Use this encoding instead of the system-specified.
339 extract_flat: Whether to resolve and process url_results further
340 * False: Always process. Default for API
341 * True: Never process
342 * 'in_playlist': Do not process inside playlist/multi_video
343 * 'discard': Always process, but don't return the result
344 from inside playlist/multi_video
345 * 'discard_in_playlist': Same as "discard", but only for
346 playlists (not multi_video). Default for CLI
347 wait_for_video: If given, wait for scheduled streams to become available.
348 The value should be a tuple containing the range
349 (min_secs, max_secs) to wait between retries
350 postprocessors: A list of dictionaries, each with an entry
351 * key: The name of the postprocessor. See
352 yt_dlp/postprocessor/__init__.py for a list.
353 * when: When to run the postprocessor. Allowed values are
354 the entries of utils.POSTPROCESS_WHEN
355 Assumed to be 'post_process' if not given
356 progress_hooks: A list of functions that get called on download
357 progress, with a dictionary with the entries
358 * status: One of "downloading", "error", or "finished".
359 Check this first and ignore unknown values.
360 * info_dict: The extracted info_dict
362 If status is one of "downloading", or "finished", the
363 following properties may also be present:
364 * filename: The final filename (always present)
365 * tmpfilename: The filename we're currently writing to
366 * downloaded_bytes: Bytes on disk
367 * total_bytes: Size of the whole file, None if unknown
368 * total_bytes_estimate: Guess of the eventual file size,
370 * elapsed: The number of seconds since download started.
371 * eta: The estimated time in seconds, None if unknown
372 * speed: The download speed in bytes/second, None if
374 * fragment_index: The counter of the currently
375 downloaded video fragment.
376 * fragment_count: The number of fragments (= individual
377 files that will be merged)
379 Progress hooks are guaranteed to be called at least once
380 (with status "finished") if the download is successful.
381 postprocessor_hooks: A list of functions that get called on postprocessing
382 progress, with a dictionary with the entries
383 * status: One of "started", "processing", or "finished".
384 Check this first and ignore unknown values.
385 * postprocessor: Name of the postprocessor
386 * info_dict: The extracted info_dict
388 Progress hooks are guaranteed to be called at least twice
389 (with status "started" and "finished") if the processing is successful.
390 merge_output_format: "/" separated list of extensions to use when merging formats.
391 final_ext: Expected final extension; used to detect when the file was
392 already downloaded and converted
393 fixup: Automatically correct known faults of the file.
395 - "never": do nothing
396 - "warn": only emit a warning
397 - "detect_or_warn": check whether we can do anything
398 about it, warn otherwise (default)
399 source_address: Client-side IP address to bind to.
400 sleep_interval_requests: Number of seconds to sleep between requests
402 sleep_interval: Number of seconds to sleep before each download when
403 used alone or a lower bound of a range for randomized
404 sleep before each download (minimum possible number
405 of seconds to sleep) when used along with
407 max_sleep_interval:Upper bound of a range for randomized sleep before each
408 download (maximum possible number of seconds to sleep).
409 Must only be used along with sleep_interval.
410 Actual sleep time will be a random float from range
411 [sleep_interval; max_sleep_interval].
412 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
413 listformats: Print an overview of available video formats and exit.
414 list_thumbnails: Print a table of all thumbnails and exit.
415 match_filter: A function that gets called for every video with the signature
416 (info_dict, *, incomplete: bool) -> Optional[str]
417 For backward compatibility with youtube-dl, the signature
418 (info_dict) -> Optional[str] is also allowed.
419 - If it returns a message, the video is ignored.
420 - If it returns None, the video is downloaded.
421 - If it returns utils.NO_DEFAULT, the user is interactively
422 asked whether to download the video.
423 - Raise utils.DownloadCancelled(msg) to abort remaining
424 downloads when a video is rejected.
425 match_filter_func in utils/_utils.py is one example for this.
426 color: A Dictionary with output stream names as keys
427 and their respective color policy as values.
428 Can also just be a single color policy,
429 in which case it applies to all outputs.
430 Valid stream names are 'stdout' and 'stderr'.
431 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
432 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
435 Two-letter ISO 3166-2 country code that will be used for
436 explicit geographic restriction bypassing via faking
437 X-Forwarded-For HTTP header
439 IP range in CIDR notation that will be used similarly to
441 external_downloader: A dictionary of protocol keys and the executable of the
442 external downloader to use for it. The allowed protocols
443 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
444 Set the value to 'native' to use the native downloader
445 compat_opts: Compatibility options. See "Differences in default behavior".
446 The following options do not work when used through the API:
447 filename, abort-on-error, multistreams, no-live-chat, format-sort
448 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
449 Refer __init__.py for their implementation
450 progress_template: Dictionary of templates for progress outputs.
451 Allowed keys are 'download', 'postprocess',
452 'download-title' (console title) and 'postprocess-title'.
453 The template is mapped on a dictionary with keys 'progress' and 'info'
454 retry_sleep_functions: Dictionary of functions that takes the number of attempts
455 as argument and returns the time to sleep in seconds.
456 Allowed keys are 'http', 'fragment', 'file_access'
457 download_ranges: A callback function that gets called for every video with
458 the signature (info_dict, ydl) -> Iterable[Section].
459 Only the returned sections will be downloaded.
460 Each Section is a dict with the following keys:
461 * start_time: Start time of the section in seconds
462 * end_time: End time of the section in seconds
463 * title: Section title (Optional)
464 * index: Section number (Optional)
465 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
466 noprogress: Do not print the progress bar
467 live_from_start: Whether to download livestreams videos from the start
469 The following parameters are not used by YoutubeDL itself, they are used by
470 the downloader (see yt_dlp/downloader/common.py):
471 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
472 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
473 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
474 external_downloader_args, concurrent_fragment_downloads.
476 The following options are used by the post processors:
477 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
478 to the binary or its containing directory.
479 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
480 and a list of additional command-line arguments for the
481 postprocessor/executable. The dict can also have "PP+EXE" keys
482 which are used when the given exe is used by the given PP.
483 Use 'default' as the name for arguments to passed to all PP
484 For compatibility with youtube-dl, a single list of args
487 The following options are used by the extractors:
488 extractor_retries: Number of times to retry for known errors (default: 3)
489 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
490 hls_split_discontinuity: Split HLS playlists to different formats at
491 discontinuities such as ad breaks (default: False)
492 extractor_args: A dictionary of arguments to be passed to the extractors.
493 See "EXTRACTOR ARGUMENTS" for details.
494 E.g. {'youtube': {'skip': ['dash', 'hls']}}
495 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
497 The following options are deprecated and may be removed in the future:
499 break_on_reject: Stop the download process when encountering a video that
500 has been filtered out.
501 - `raise DownloadCancelled(msg)` in match_filter instead
502 force_generic_extractor: Force downloader to use the generic extractor
503 - Use allowed_extractors = ['generic', 'default']
504 playliststart: - Use playlist_items
505 Playlist item to start at.
506 playlistend: - Use playlist_items
507 Playlist item to end at.
508 playlistreverse: - Use playlist_items
509 Download playlist items in reverse order.
510 forceurl: - Use forceprint
511 Force printing final URL.
512 forcetitle: - Use forceprint
513 Force printing title.
514 forceid: - Use forceprint
516 forcethumbnail: - Use forceprint
517 Force printing thumbnail URL.
518 forcedescription: - Use forceprint
519 Force printing description.
520 forcefilename: - Use forceprint
521 Force printing final filename.
522 forceduration: - Use forceprint
523 Force printing duration.
524 allsubtitles: - Use subtitleslangs = ['all']
525 Downloads all the subtitles of the video
526 (requires writesubtitles or writeautomaticsub)
527 include_ads: - Doesn't work
529 call_home: - Not implemented
530 Boolean, true iff we are allowed to contact the
531 yt-dlp servers for debugging.
532 post_hooks: - Register a custom postprocessor
533 A list of functions that get called as the final step
534 for each video file, after all postprocessors have been
535 called. The filename will be passed as the only argument.
536 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
537 Use the native HLS downloader instead of ffmpeg/avconv
538 if True, otherwise use ffmpeg/avconv if False, otherwise
539 use downloader suggested by extractor if None.
540 prefer_ffmpeg: - avconv support is deprecated
541 If False, use avconv instead of ffmpeg if both are available,
542 otherwise prefer ffmpeg.
543 youtube_include_dash_manifest: - Use extractor_args
544 If True (default), DASH manifests and related
545 data will be downloaded and processed by extractor.
546 You can reduce network I/O by disabling it if you don't
547 care about DASH. (only for youtube)
548 youtube_include_hls_manifest: - Use extractor_args
549 If True (default), HLS manifests and related
550 data will be downloaded and processed by extractor.
551 You can reduce network I/O by disabling it if you don't
552 care about HLS. (only for youtube)
553 no_color: Same as `color='no_color'`
554 no_overwrites: Same as `overwrites=False`
558 'width', 'height', 'asr', 'audio_channels', 'fps',
559 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
560 'timestamp', 'release_timestamp',
561 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
562 'average_rating', 'comment_count', 'age_limit',
563 'start_time', 'end_time',
564 'chapter_number', 'season_number', 'episode_number',
565 'track_number', 'disc_number', 'release_year',
569 # NB: Keep in sync with the docstring of extractor/common.py
570 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
571 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
572 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
573 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
574 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
575 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
576 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
578 _format_selection_exts
= {
579 'audio': set(MEDIA_EXTENSIONS
.common_audio
),
580 'video': set(MEDIA_EXTENSIONS
.common_video
+ ('3gp', )),
581 'storyboards': set(MEDIA_EXTENSIONS
.storyboards
),
584 def __init__(self
, params
=None, auto_init
=True):
585 """Create a FileDownloader object with the given options.
586 @param auto_init Whether to load the default extractors and print header (if verbose).
587 Set to 'no_verbose_header' to not print the header
593 self
._ies
_instances
= {}
594 self
._pps
= {k: [] for k in POSTPROCESS_WHEN}
595 self
._printed
_messages
= set()
596 self
._first
_webpage
_request
= True
597 self
._post
_hooks
= []
598 self
._progress
_hooks
= []
599 self
._postprocessor
_hooks
= []
600 self
._download
_retcode
= 0
601 self
._num
_downloads
= 0
603 self
._playlist
_level
= 0
604 self
._playlist
_urls
= set()
605 self
.cache
= Cache(self
)
606 self
.__header
_cookies
= []
608 stdout
= sys
.stderr
if self
.params
.get('logtostderr') else sys
.stdout
609 self
._out
_files
= Namespace(
612 screen
=sys
.stderr
if self
.params
.get('quiet') else stdout
,
613 console
=None if compat_os_name
== 'nt' else next(
614 filter(supports_terminal_sequences
, (sys
.stderr
, sys
.stdout
)), None)
618 windows_enable_vt_mode()
619 except Exception as e
:
620 self
.write_debug(f
'Failed to enable VT mode: {e}')
622 if self
.params
.get('no_color'):
623 if self
.params
.get('color') is not None:
624 self
.params
.setdefault('_warnings', []).append(
625 'Overwriting params from "color" with "no_color"')
626 self
.params
['color'] = 'no_color'
628 term_allow_color
= os
.getenv('TERM', '').lower() != 'dumb'
629 no_color
= bool(os
.getenv('NO_COLOR'))
631 def process_color_policy(stream
):
632 stream_name
= {sys.stdout: 'stdout', sys.stderr: 'stderr'}
[stream
]
633 policy
= traverse_obj(self
.params
, ('color', (stream_name
, None), {str}
), get_all
=False)
634 if policy
in ('auto', None):
635 if term_allow_color
and supports_terminal_sequences(stream
):
636 return 'no_color' if no_color
else True
638 assert policy
in ('always', 'never', 'no_color'), policy
639 return {'always': True, 'never': False}
.get(policy
, policy
)
641 self
._allow
_colors
= Namespace(**{
642 name
: process_color_policy(stream
)
643 for name
, stream
in self
._out
_files
.items_
if name
!= 'console'
646 system_deprecation
= _get_system_deprecation()
647 if system_deprecation
:
648 self
.deprecated_feature(system_deprecation
.replace('\n', '\n '))
650 if self
.params
.get('allow_unplayable_formats'):
652 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
653 'This is a developer option intended for debugging. \n'
654 ' If you experience any issues while using this option, '
655 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
657 if self
.params
.get('bidi_workaround', False):
660 master
, slave
= pty
.openpty()
661 width
= shutil
.get_terminal_size().columns
662 width_args
= [] if width
is None else ['-w', str(width
)]
663 sp_kwargs
= {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
665 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
667 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
668 self
._output
_channel
= os
.fdopen(master
, 'rb')
669 except OSError as ose
:
670 if ose
.errno
== errno
.ENOENT
:
672 'Could not find fribidi executable, ignoring --bidi-workaround. '
673 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
677 self
.params
['compat_opts'] = set(self
.params
.get('compat_opts', ()))
678 self
.params
['http_headers'] = HTTPHeaderDict(std_headers
, self
.params
.get('http_headers'))
679 self
._load
_cookies
(self
.params
['http_headers'].get('Cookie')) # compat
680 self
.params
['http_headers'].pop('Cookie', None)
681 self
._request
_director
= self
.build_request_director(_REQUEST_HANDLERS
.values(), _RH_PREFERENCES
)
683 if auto_init
and auto_init
!= 'no_verbose_header':
684 self
.print_debug_header()
686 def check_deprecated(param
, option
, suggestion
):
687 if self
.params
.get(param
) is not None:
688 self
.report_warning(f
'{option} is deprecated. Use {suggestion} instead')
692 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
693 if self
.params
.get('geo_verification_proxy') is None:
694 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
696 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
697 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
698 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
700 for msg
in self
.params
.get('_warnings', []):
701 self
.report_warning(msg
)
702 for msg
in self
.params
.get('_deprecation_warnings', []):
703 self
.deprecated_feature(msg
)
705 if 'list-formats' in self
.params
['compat_opts']:
706 self
.params
['listformats_table'] = False
708 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
709 # nooverwrites was unnecessarily changed to overwrites
710 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
711 # This ensures compatibility with both keys
712 self
.params
['overwrites'] = not self
.params
['nooverwrites']
713 elif self
.params
.get('overwrites') is None:
714 self
.params
.pop('overwrites', None)
716 self
.params
['nooverwrites'] = not self
.params
['overwrites']
718 if self
.params
.get('simulate') is None and any((
719 self
.params
.get('list_thumbnails'),
720 self
.params
.get('listformats'),
721 self
.params
.get('listsubtitles'),
723 self
.params
['simulate'] = 'list_only'
725 self
.params
.setdefault('forceprint', {})
726 self
.params
.setdefault('print_to_file', {})
728 # Compatibility with older syntax
729 if not isinstance(params
['forceprint'], dict):
730 self
.params
['forceprint'] = {'video': params['forceprint']}
733 self
.add_default_info_extractors()
735 if (sys
.platform
!= 'win32'
736 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
737 and not self
.params
.get('restrictfilenames', False)):
738 # Unicode filesystem API will throw errors (#1474, #13027)
740 'Assuming --restrict-filenames since file system encoding '
741 'cannot encode all characters. '
742 'Set the LC_ALL environment variable to fix this.')
743 self
.params
['restrictfilenames'] = True
745 self
._parse
_outtmpl
()
747 # Creating format selector here allows us to catch syntax errors before the extraction
748 self
.format_selector
= (
749 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
750 else self
.params
['format'] if callable(self
.params
['format'])
751 else self
.build_format_selector(self
.params
['format']))
754 'post_hooks': self
.add_post_hook
,
755 'progress_hooks': self
.add_progress_hook
,
756 'postprocessor_hooks': self
.add_postprocessor_hook
,
758 for opt
, fn
in hooks
.items():
759 for ph
in self
.params
.get(opt
, []):
762 for pp_def_raw
in self
.params
.get('postprocessors', []):
763 pp_def
= dict(pp_def_raw
)
764 when
= pp_def
.pop('when', 'post_process')
765 self
.add_post_processor(
766 get_postprocessor(pp_def
.pop('key'))(self
, **pp_def
),
769 def preload_download_archive(fn
):
770 """Preload the archive, if any is specified"""
774 elif not is_path_like(fn
):
777 self
.write_debug(f
'Loading archive file {fn!r}')
779 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
780 for line
in archive_file
:
781 archive
.add(line
.strip())
782 except OSError as ioe
:
783 if ioe
.errno
!= errno
.ENOENT
:
787 self
.archive
= preload_download_archive(self
.params
.get('download_archive'))
789 def warn_if_short_id(self
, argv
):
790 # short YouTube ID starting with dash?
792 i
for i
, a
in enumerate(argv
)
793 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
797 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
798 + ['--'] + [argv
[i
] for i
in idxs
]
801 'Long argument string detected. '
802 'Use -- to separate parameters and URLs, like this:\n%s' %
803 args_to_str(correct_argv
))
805 def add_info_extractor(self
, ie
):
806 """Add an InfoExtractor object to the end of the list."""
808 self
._ies
[ie_key
] = ie
809 if not isinstance(ie
, type):
810 self
._ies
_instances
[ie_key
] = ie
811 ie
.set_downloader(self
)
813 def get_info_extractor(self
, ie_key
):
815 Get an instance of an IE with name ie_key, it will try to get one from
816 the _ies list, if there's no instance it will create a new one and add
817 it to the extractor list.
819 ie
= self
._ies
_instances
.get(ie_key
)
821 ie
= get_info_extractor(ie_key
)()
822 self
.add_info_extractor(ie
)
825 def add_default_info_extractors(self
):
827 Add the InfoExtractors returned by gen_extractors to the end of the list
829 all_ies
= {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
830 all_ies
['end'] = UnsupportedURLIE()
832 ie_names
= orderedSet_from_options(
833 self
.params
.get('allowed_extractors', ['default']), {
834 'all': list(all_ies
),
835 'default': [name
for name
, ie
in all_ies
.items() if ie
._ENABLED
],
837 except re
.error
as e
:
838 raise ValueError(f
'Wrong regex for allowed_extractors: {e.pattern}')
839 for name
in ie_names
:
840 self
.add_info_extractor(all_ies
[name
])
841 self
.write_debug(f
'Loaded {len(ie_names)} extractors')
843 def add_post_processor(self
, pp
, when
='post_process'):
844 """Add a PostProcessor object to the end of the chain."""
845 assert when
in POSTPROCESS_WHEN
, f
'Invalid when={when}'
846 self
._pps
[when
].append(pp
)
847 pp
.set_downloader(self
)
849 def add_post_hook(self
, ph
):
850 """Add the post hook"""
851 self
._post
_hooks
.append(ph
)
853 def add_progress_hook(self
, ph
):
854 """Add the download progress hook"""
855 self
._progress
_hooks
.append(ph
)
857 def add_postprocessor_hook(self
, ph
):
858 """Add the postprocessing progress hook"""
859 self
._postprocessor
_hooks
.append(ph
)
860 for pps
in self
._pps
.values():
862 pp
.add_progress_hook(ph
)
864 def _bidi_workaround(self
, message
):
865 if not hasattr(self
, '_output_channel'):
868 assert hasattr(self
, '_output_process')
869 assert isinstance(message
, str)
870 line_count
= message
.count('\n') + 1
871 self
._output
_process
.stdin
.write((message
+ '\n').encode())
872 self
._output
_process
.stdin
.flush()
873 res
= ''.join(self
._output
_channel
.readline().decode()
874 for _
in range(line_count
))
875 return res
[:-len('\n')]
877 def _write_string(self
, message
, out
=None, only_once
=False):
879 if message
in self
._printed
_messages
:
881 self
._printed
_messages
.add(message
)
882 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
884 def to_stdout(self
, message
, skip_eol
=False, quiet
=None):
885 """Print message to stdout"""
886 if quiet
is not None:
887 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
888 'Use "YoutubeDL.to_screen" instead')
889 if skip_eol
is not False:
890 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
891 'Use "YoutubeDL.to_screen" instead')
892 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.out
)
894 def to_screen(self
, message
, skip_eol
=False, quiet
=None, only_once
=False):
895 """Print message to screen if not in quiet mode"""
896 if self
.params
.get('logger'):
897 self
.params
['logger'].debug(message
)
899 if (self
.params
.get('quiet') if quiet
is None else quiet
) and not self
.params
.get('verbose'):
902 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
903 self
._out
_files
.screen
, only_once
=only_once
)
905 def to_stderr(self
, message
, only_once
=False):
906 """Print message to stderr"""
907 assert isinstance(message
, str)
908 if self
.params
.get('logger'):
909 self
.params
['logger'].error(message
)
911 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.error
, only_once
=only_once
)
913 def _send_console_code(self
, code
):
914 if compat_os_name
== 'nt' or not self
._out
_files
.console
:
916 self
._write
_string
(code
, self
._out
_files
.console
)
918 def to_console_title(self
, message
):
919 if not self
.params
.get('consoletitle', False):
921 message
= remove_terminal_sequences(message
)
922 if compat_os_name
== 'nt':
923 if ctypes
.windll
.kernel32
.GetConsoleWindow():
924 # c_wchar_p() might not be necessary if `message` is
925 # already of type unicode()
926 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
928 self
._send
_console
_code
(f
'\033]0;{message}\007')
930 def save_console_title(self
):
931 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
933 self
._send
_console
_code
('\033[22;0t') # Save the title on stack
935 def restore_console_title(self
):
936 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
938 self
._send
_console
_code
('\033[23;0t') # Restore the title from stack
941 self
.save_console_title()
944 def save_cookies(self
):
945 if self
.params
.get('cookiefile') is not None:
946 self
.cookiejar
.save()
948 def __exit__(self
, *args
):
949 self
.restore_console_title()
954 self
._request
_director
.close()
956 def trouble(self
, message
=None, tb
=None, is_error
=True):
957 """Determine action to take when a download problem appears.
959 Depending on if the downloader has been configured to ignore
960 download errors or not, this method may throw an exception or
961 not when errors are found, after printing the message.
963 @param tb If given, is additional traceback information
964 @param is_error Whether to raise error according to ignorerrors
966 if message
is not None:
967 self
.to_stderr(message
)
968 if self
.params
.get('verbose'):
970 if sys
.exc_info()[0]: # if .trouble has been called from an except block
972 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
973 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
974 tb
+= encode_compat_str(traceback
.format_exc())
976 tb_data
= traceback
.format_list(traceback
.extract_stack())
977 tb
= ''.join(tb_data
)
982 if not self
.params
.get('ignoreerrors'):
983 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
984 exc_info
= sys
.exc_info()[1].exc_info
986 exc_info
= sys
.exc_info()
987 raise DownloadError(message
, exc_info
)
988 self
._download
_retcode
= 1
992 EMPHASIS
='light blue',
997 BAD_FORMAT
='light red',
999 SUPPRESS
='light black',
1002 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
1005 original_text
= text
1006 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1007 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', None) or 'ascii'
1008 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
1009 if fallback
is not None and text
!= original_text
:
1011 return format_text(text
, f
) if allow_colors
is True else text
if fallback
is None else fallback
1013 def _format_out(self
, *args
, **kwargs
):
1014 return self
._format
_text
(self
._out
_files
.out
, self
._allow
_colors
.out
, *args
, **kwargs
)
1016 def _format_screen(self
, *args
, **kwargs
):
1017 return self
._format
_text
(self
._out
_files
.screen
, self
._allow
_colors
.screen
, *args
, **kwargs
)
1019 def _format_err(self
, *args
, **kwargs
):
1020 return self
._format
_text
(self
._out
_files
.error
, self
._allow
_colors
.error
, *args
, **kwargs
)
1022 def report_warning(self
, message
, only_once
=False):
1024 Print the message to stderr, it will be prefixed with 'WARNING:'
1025 If stderr is a tty file the 'WARNING:' will be colored
1027 if self
.params
.get('logger') is not None:
1028 self
.params
['logger'].warning(message
)
1030 if self
.params
.get('no_warnings'):
1032 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
1034 def deprecation_warning(self
, message
, *, stacklevel
=0):
1035 deprecation_warning(
1036 message
, stacklevel
=stacklevel
+ 1, printer
=self
.report_error
, is_error
=False)
1038 def deprecated_feature(self
, message
):
1039 if self
.params
.get('logger') is not None:
1040 self
.params
['logger'].warning(f
'Deprecated Feature: {message}')
1041 self
.to_stderr(f
'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1043 def report_error(self
, message
, *args
, **kwargs
):
1045 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1046 in red if stderr is a tty file.
1048 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
1050 def write_debug(self
, message
, only_once
=False):
1051 '''Log debug message or Print message to stderr'''
1052 if not self
.params
.get('verbose', False):
1054 message
= f
'[debug] {message}'
1055 if self
.params
.get('logger'):
1056 self
.params
['logger'].debug(message
)
1058 self
.to_stderr(message
, only_once
)
1060 def report_file_already_downloaded(self
, file_name
):
1061 """Report file has already been fully downloaded."""
1063 self
.to_screen('[download] %s has already been downloaded' % file_name
)
1064 except UnicodeEncodeError:
1065 self
.to_screen('[download] The file has already been downloaded')
1067 def report_file_delete(self
, file_name
):
1068 """Report that existing file will be deleted."""
1070 self
.to_screen('Deleting existing file %s' % file_name
)
1071 except UnicodeEncodeError:
1072 self
.to_screen('Deleting existing file')
1074 def raise_no_formats(self
, info
, forced
=False, *, msg
=None):
1075 has_drm
= info
.get('_has_drm')
1076 ignored
, expected
= self
.params
.get('ignore_no_formats_error'), bool(msg
)
1077 msg
= msg
or has_drm
and 'This video is DRM protected' or 'No video formats found!'
1078 if forced
or not ignored
:
1079 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
1080 expected
=has_drm
or ignored
or expected
)
1082 self
.report_warning(msg
)
1084 def parse_outtmpl(self
):
1085 self
.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1086 self
._parse
_outtmpl
()
1087 return self
.params
['outtmpl']
1089 def _parse_outtmpl(self
):
1091 if self
.params
.get('restrictfilenames'): # Remove spaces in the default template
1092 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
1094 outtmpl
= self
.params
.setdefault('outtmpl', {})
1095 if not isinstance(outtmpl
, dict):
1096 self
.params
['outtmpl'] = outtmpl
= {'default': outtmpl}
1097 outtmpl
.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}
)
1099 def get_output_path(self
, dir_type
='', filename
=None):
1100 paths
= self
.params
.get('paths', {})
1101 assert isinstance(paths
, dict), '"paths" parameter must be a dictionary'
1102 path
= os
.path
.join(
1103 expand_path(paths
.get('home', '').strip()),
1104 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
1106 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1109 def _outtmpl_expandpath(outtmpl
):
1110 # expand_path translates '%%' into '%' and '$$' into '$'
1111 # correspondingly that is not what we want since we need to keep
1112 # '%%' intact for template dict substitution step. Working around
1113 # with boundary-alike separator hack.
1114 sep
= ''.join(random
.choices(string
.ascii_letters
, k
=32))
1115 outtmpl
= outtmpl
.replace('%%', f
'%{sep}%').replace('$$', f
'${sep}$')
1117 # outtmpl should be expand_path'ed before template dict substitution
1118 # because meta fields may contain env variables we don't want to
1119 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1120 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1121 return expand_path(outtmpl
).replace(sep
, '')
1124 def escape_outtmpl(outtmpl
):
1125 ''' Escape any remaining strings like %s, %abc% etc. '''
1127 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1128 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1132 def validate_outtmpl(cls
, outtmpl
):
1133 ''' @return None or Exception object '''
1135 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljhqBUDS]'),
1136 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1137 cls
._outtmpl
_expandpath
(outtmpl
))
1139 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1141 except ValueError as err
:
1145 def _copy_infodict(info_dict
):
1146 info_dict
= dict(info_dict
)
1147 info_dict
.pop('__postprocessors', None)
1148 info_dict
.pop('__pending_error', None)
1151 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1152 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1153 @param sanitize Whether to sanitize the output as a filename.
1154 For backward compatibility, a function can also be passed
1157 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1159 info_dict
= self
._copy
_infodict
(info_dict
)
1160 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1161 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1162 if info_dict
.get('duration', None) is not None
1164 info_dict
['autonumber'] = int(self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
)
1165 info_dict
['video_autonumber'] = self
._num
_videos
1166 if info_dict
.get('resolution') is None:
1167 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1169 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1170 # of %(field)s to %(field)0Nd for backward compatibility
1171 field_size_compat_map
= {
1172 'playlist_index': number_of_digits(info_dict
.get('__last_playlist_index') or 0),
1173 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1174 'autonumber': self
.params
.get('autonumber_size') or 5,
1178 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1184 # Field is of the form key1.key2...
1185 # where keys (except first) can be string, int, slice or "{field, ...}"
1186 FIELD_INNER_RE
= r
'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1187 FIELD_RE
= r
'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1188 'inner': FIELD_INNER_RE
,
1189 'field': rf
'\w*(?:\.{FIELD_INNER_RE})*'
1191 MATH_FIELD_RE
= rf
'(?:{FIELD_RE}|-?{NUMBER_RE})'
1192 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
1193 INTERNAL_FORMAT_RE
= re
.compile(rf
'''(?xs)
1195 (?P<fields>{FIELD_RE})
1196 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1197 (?:>(?P<strf_format>.+?))?
1199 (?P<alternate>(?<!\\),[^|&)]+)?
1200 (?:&(?P<replacement>.*?))?
1201 (?:\|(?P<default>.*?))?
1204 def _traverse_infodict(fields
):
1205 fields
= [f
for x
in re
.split(r
'\.({.+?})\.?', fields
)
1206 for f
in ([x
] if x
.startswith('{') else x
.split('.'))]
1208 if fields
and not fields
[i
]:
1211 for i
, f
in enumerate(fields
):
1212 if not f
.startswith('{'):
1214 assert f
.endswith('}'), f
'No closing brace for {f} in {fields}'
1215 fields
[i
] = {k: k.split('.') for k in f[1:-1].split(',')}
1217 return traverse_obj(info_dict
, fields
, is_user_input
=True, traverse_string
=True)
1219 def get_value(mdict
):
1221 value
= _traverse_infodict(mdict
['fields'])
1224 value
= float_or_none(value
)
1225 if value
is not None:
1228 offset_key
= mdict
['maths']
1230 value
= float_or_none(value
)
1234 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1235 offset_key
).group(0)
1236 offset_key
= offset_key
[len(item
):]
1237 if operator
is None:
1238 operator
= MATH_FUNCTIONS
[item
]
1240 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1241 offset
= float_or_none(item
)
1243 offset
= float_or_none(_traverse_infodict(item
))
1245 value
= operator(value
, multiplier
* offset
)
1246 except (TypeError, ZeroDivisionError):
1249 # Datetime formatting
1250 if mdict
['strf_format']:
1251 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1253 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1254 if sanitize
and value
== '':
1258 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1260 def filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames')):
1261 return sanitize_filename(str(value
), restricted
=restricted
, is_id
=(
1262 bool(re
.search(r
'(^|[_.])id(\.|$)', key
))
1263 if 'filename-sanitization' in self
.params
['compat_opts']
1266 sanitizer
= sanitize
if callable(sanitize
) else filename_sanitizer
1267 sanitize
= bool(sanitize
)
1269 def _dumpjson_default(obj
):
1270 if isinstance(obj
, (set, LazyList
)):
1274 class _ReplacementFormatter(string
.Formatter
):
1275 def get_field(self
, field_name
, args
, kwargs
):
1276 if field_name
.isdigit():
1278 raise ValueError('Unsupported field')
1280 replacement_formatter
= _ReplacementFormatter()
1282 def create_key(outer_mobj
):
1283 if not outer_mobj
.group('has_key'):
1284 return outer_mobj
.group(0)
1285 key
= outer_mobj
.group('key')
1286 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1287 value
, replacement
, default
, last_field
= None, None, na
, ''
1289 mobj
= mobj
.groupdict()
1290 default
= mobj
['default'] if mobj
['default'] is not None else default
1291 value
= get_value(mobj
)
1292 last_field
, replacement
= mobj
['fields'], mobj
['replacement']
1293 if value
is None and mobj
['alternate']:
1294 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['remaining'][1:])
1298 if None not in (value
, replacement
):
1300 value
= replacement_formatter
.format(replacement
, value
)
1302 value
, default
= None, na
1304 fmt
= outer_mobj
.group('format')
1305 if fmt
== 's' and last_field
in field_size_compat_map
.keys() and isinstance(value
, int):
1306 fmt
= f
'0{field_size_compat_map[last_field]:d}d'
1308 flags
= outer_mobj
.group('conversion') or ''
1309 str_fmt
= f
'{fmt[:-1]}s'
1311 value
, fmt
= default
, 's'
1312 elif fmt
[-1] == 'l': # list
1313 delim
= '\n' if '#' in flags
else ', '
1314 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes)))), str_fmt
1315 elif fmt
[-1] == 'j': # json
1316 value
, fmt
= json
.dumps(
1317 value
, default
=_dumpjson_default
,
1318 indent
=4 if '#' in flags
else None, ensure_ascii
='+' not in flags
), str_fmt
1319 elif fmt
[-1] == 'h': # html
1320 value
, fmt
= escapeHTML(str(value
)), str_fmt
1321 elif fmt
[-1] == 'q': # quoted
1322 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1323 value
, fmt
= ' '.join(map(compat_shlex_quote
, value
)), str_fmt
1324 elif fmt
[-1] == 'B': # bytes
1325 value
= f
'%{str_fmt}'.encode() % str(value
).encode()
1326 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1327 elif fmt
[-1] == 'U': # unicode normalized
1328 value
, fmt
= unicodedata
.normalize(
1329 # "+" = compatibility equivalence, "#" = NFD
1330 'NF%s%s' % ('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1332 elif fmt
[-1] == 'D': # decimal suffix
1333 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1334 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1335 factor
=1024 if '#' in flags
else 1000)
1336 elif fmt
[-1] == 'S': # filename sanitization
1337 value
, fmt
= filename_sanitizer(last_field
, value
, restricted
='#' in flags
), str_fmt
1338 elif fmt
[-1] == 'c':
1340 value
= str(value
)[0]
1343 elif fmt
[-1] not in 'rsa': # numeric
1344 value
= float_or_none(value
)
1346 value
, fmt
= default
, 's'
1349 # If value is an object, sanitize might convert it to a string
1350 # So we convert it to repr first
1352 value
, fmt
= repr(value
), str_fmt
1353 elif fmt
[-1] == 'a':
1354 value
, fmt
= ascii(value
), str_fmt
1355 if fmt
[-1] in 'csra':
1356 value
= sanitizer(last_field
, value
)
1358 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1359 TMPL_DICT
[key
] = value
1360 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1362 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1364 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1365 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1366 return self
.escape_outtmpl(outtmpl
) % info_dict
1368 def _prepare_filename(self
, info_dict
, *, outtmpl
=None, tmpl_type
=None):
1369 assert None in (outtmpl
, tmpl_type
), 'outtmpl and tmpl_type are mutually exclusive'
1371 outtmpl
= self
.params
['outtmpl'].get(tmpl_type
or 'default', self
.params
['outtmpl']['default'])
1373 outtmpl
= self
._outtmpl
_expandpath
(outtmpl
)
1374 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1378 if tmpl_type
in ('', 'temp'):
1379 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1380 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1381 filename
= replace_extension(filename
, ext
, final_ext
)
1383 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1385 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1387 # https://github.com/blackjack4494/youtube-dlc/issues/85
1388 trim_file_name
= self
.params
.get('trim_file_name', False)
1390 no_ext
, *ext
= filename
.rsplit('.', 2)
1391 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1394 except ValueError as err
:
1395 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1398 def prepare_filename(self
, info_dict
, dir_type
='', *, outtmpl
=None, warn
=False):
1399 """Generate the output filename"""
1401 assert not dir_type
, 'outtmpl and dir_type are mutually exclusive'
1403 filename
= self
._prepare
_filename
(info_dict
, tmpl_type
=dir_type
, outtmpl
=outtmpl
)
1404 if not filename
and dir_type
not in ('', 'temp'):
1408 if not self
.params
.get('paths'):
1410 elif filename
== '-':
1411 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1412 elif os
.path
.isabs(filename
):
1413 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1414 if filename
== '-' or not filename
:
1417 return self
.get_output_path(dir_type
, filename
)
1419 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1420 """Returns None if the file should be downloaded"""
1421 _type
= 'video' if 'playlist-match-filter' in self
.params
['compat_opts'] else info_dict
.get('_type', 'video')
1422 assert incomplete
or _type
== 'video', 'Only video result can be considered complete'
1424 video_title
= info_dict
.get('title', info_dict
.get('id', 'entry'))
1427 if _type
in ('playlist', 'multi_video'):
1429 elif _type
in ('url', 'url_transparent') and not try_call(
1430 lambda: self
.get_info_extractor(info_dict
['ie_key']).is_single_video(info_dict
['url'])):
1433 if 'title' in info_dict
:
1434 # This can happen when we're just evaluating the playlist
1435 title
= info_dict
['title']
1436 matchtitle
= self
.params
.get('matchtitle', False)
1438 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1439 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1440 rejecttitle
= self
.params
.get('rejecttitle', False)
1442 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1443 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1445 date
= info_dict
.get('upload_date')
1446 if date
is not None:
1447 dateRange
= self
.params
.get('daterange', DateRange())
1448 if date
not in dateRange
:
1449 return f
'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1450 view_count
= info_dict
.get('view_count')
1451 if view_count
is not None:
1452 min_views
= self
.params
.get('min_views')
1453 if min_views
is not None and view_count
< min_views
:
1454 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1455 max_views
= self
.params
.get('max_views')
1456 if max_views
is not None and view_count
> max_views
:
1457 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1458 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1459 return 'Skipping "%s" because it is age restricted' % video_title
1461 match_filter
= self
.params
.get('match_filter')
1462 if match_filter
is None:
1468 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1470 # For backward compatibility
1471 ret
= None if incomplete
else match_filter(info_dict
)
1472 except DownloadCancelled
as err
:
1473 if err
.msg
is not NO_DEFAULT
:
1475 ret
, cancelled
= err
.msg
, err
1477 if ret
is NO_DEFAULT
:
1479 filename
= self
._format
_screen
(self
.prepare_filename(info_dict
), self
.Styles
.FILENAME
)
1480 reply
= input(self
._format
_screen
(
1481 f
'Download "{filename}"? (Y/n): ', self
.Styles
.EMPHASIS
)).lower().strip()
1482 if reply
in {'y', ''}
:
1486 raise type(cancelled
)(f
'Skipping {video_title}')
1487 return f
'Skipping {video_title}'
1490 if self
.in_download_archive(info_dict
):
1492 format_field(info_dict
, 'id', f
'{self._format_screen("%s", self.Styles.ID)}: '),
1493 format_field(info_dict
, 'title', f
'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1494 'has already been recorded in the archive'))
1495 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1498 reason
= check_filter()
1499 except DownloadCancelled
as e
:
1500 reason
, break_opt
, break_err
= e
.msg
, 'match_filter', type(e
)
1502 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1503 if reason
is not None:
1505 self
.to_screen('[download] ' + reason
)
1506 if self
.params
.get(break_opt
, False):
1511 def add_extra_info(info_dict
, extra_info
):
1512 '''Set the keys from extra_info in info dict if they are missing'''
1513 for key
, value
in extra_info
.items():
1514 info_dict
.setdefault(key
, value
)
1516 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1517 process
=True, force_generic_extractor
=False):
1519 Extract and return the information dictionary of the URL
1522 @param url URL to extract
1525 @param download Whether to download videos
1526 @param process Whether to resolve all unresolved references (URLs, playlist items).
1527 Must be True for download to work
1528 @param ie_key Use only the extractor with this key
1530 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1531 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1534 if extra_info
is None:
1537 if not ie_key
and force_generic_extractor
:
1541 ies
= {ie_key: self._ies[ie_key]}
if ie_key
in self
._ies
else {}
1545 for key
, ie
in ies
.items():
1546 if not ie
.suitable(url
):
1549 if not ie
.working():
1550 self
.report_warning('The program functionality for this site has been marked as broken, '
1551 'and will probably not work.')
1553 temp_id
= ie
.get_temp_id(url
)
1554 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': key}
):
1555 self
.to_screen(f
'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1556 'has already been recorded in the archive')
1557 if self
.params
.get('break_on_existing', False):
1558 raise ExistingVideoReached()
1560 return self
.__extract
_info
(url
, self
.get_info_extractor(key
), download
, extra_info
, process
)
1562 extractors_restricted
= self
.params
.get('allowed_extractors') not in (None, ['default'])
1563 self
.report_error(f
'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1564 tb
=False if extractors_restricted
else None)
1566 def _handle_extraction_exceptions(func
):
1567 @functools.wraps(func
)
1568 def wrapper(self
, *args
, **kwargs
):
1571 return func(self
, *args
, **kwargs
)
1572 except (DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1574 except ReExtractInfo
as e
:
1576 self
.to_screen(f
'{e}; Re-extracting data')
1578 self
.to_stderr('\r')
1579 self
.report_warning(f
'{e}; Re-extracting data')
1581 except GeoRestrictedError
as e
:
1584 msg
+= '\nThis video is available in %s.' % ', '.join(
1585 map(ISO3166Utils
.short2full
, e
.countries
))
1586 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1587 self
.report_error(msg
)
1588 except ExtractorError
as e
: # An error we somewhat expected
1589 self
.report_error(str(e
), e
.format_traceback())
1590 except Exception as e
:
1591 if self
.params
.get('ignoreerrors'):
1592 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1598 def _wait_for_video(self
, ie_result
={}):
1599 if (not self
.params
.get('wait_for_video')
1600 or ie_result
.get('_type', 'video') != 'video'
1601 or ie_result
.get('formats') or ie_result
.get('url')):
1604 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1609 full_msg
= f
'{msg}\n'
1610 if not self
.params
.get('noprogress'):
1611 full_msg
= msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r'
1614 self
.to_screen(full_msg
, skip_eol
=True)
1617 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1618 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1619 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1620 diff
= round(random
.uniform(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
), 0)
1621 self
.report_warning('Release time of video is not known')
1622 elif ie_result
and (diff
or 0) <= 0:
1623 self
.report_warning('Video should already be available according to extracted info')
1624 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1625 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1627 wait_till
= time
.time() + diff
1630 diff
= wait_till
- time
.time()
1633 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1634 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1636 except KeyboardInterrupt:
1638 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1639 except BaseException
as e
:
1640 if not isinstance(e
, ReExtractInfo
):
1644 def _load_cookies(self
, data
, *, autoscope
=True):
1645 """Loads cookies from a `Cookie` header
1647 This tries to work around the security vulnerability of passing cookies to every domain.
1648 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1650 @param data The Cookie header as string to load the cookies from
1651 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1652 If `True`, save cookies for later to be stored in the jar with a limited scope
1653 If a URL, save cookies in the jar with the domain of the URL
1655 for cookie
in LenientSimpleCookie(data
).values():
1656 if autoscope
and any(cookie
.values()):
1657 raise ValueError('Invalid syntax in Cookie Header')
1659 domain
= cookie
.get('domain') or ''
1660 expiry
= cookie
.get('expires')
1661 if expiry
== '': # 0 is valid
1663 prepared_cookie
= http
.cookiejar
.Cookie(
1664 cookie
.get('version') or 0, cookie
.key
, cookie
.value
, None, False,
1665 domain
, True, True, cookie
.get('path') or '', bool(cookie
.get('path')),
1666 cookie
.get('secure') or False, expiry
, False, None, None, {})
1669 self
.cookiejar
.set_cookie(prepared_cookie
)
1670 elif autoscope
is True:
1671 self
.deprecated_feature(
1672 'Passing cookies as a header is a potential security risk; '
1673 'they will be scoped to the domain of the downloaded urls. '
1674 'Please consider loading cookies from a file or browser instead.')
1675 self
.__header
_cookies
.append(prepared_cookie
)
1677 self
.report_warning(
1678 'The extractor result contains an unscoped cookie as an HTTP header. '
1679 f
'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1681 self
._apply
_header
_cookies
(autoscope
, [prepared_cookie
])
1683 self
.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1684 tb
=False, is_error
=False)
1686 def _apply_header_cookies(self
, url
, cookies
=None):
1687 """Applies stray header cookies to the provided url
1689 This loads header cookies and scopes them to the domain provided in `url`.
1690 While this is not ideal, it helps reduce the risk of them being sent
1691 to an unintended destination while mostly maintaining compatibility.
1693 parsed
= urllib
.parse
.urlparse(url
)
1694 if not parsed
.hostname
:
1697 for cookie
in map(copy
.copy
, cookies
or self
.__header
_cookies
):
1698 cookie
.domain
= f
'.{parsed.hostname}'
1699 self
.cookiejar
.set_cookie(cookie
)
1701 @_handle_extraction_exceptions
1702 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1703 self
._apply
_header
_cookies
(url
)
1706 ie_result
= ie
.extract(url
)
1707 except UserNotLive
as e
:
1709 if self
.params
.get('wait_for_video'):
1710 self
.report_warning(e
)
1711 self
._wait
_for
_video
()
1713 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1714 self
.report_warning(f
'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1716 if isinstance(ie_result
, list):
1717 # Backwards compatibility: old IE result format
1719 '_type': 'compat_list',
1720 'entries': ie_result
,
1722 if extra_info
.get('original_url'):
1723 ie_result
.setdefault('original_url', extra_info
['original_url'])
1724 self
.add_default_extra_info(ie_result
, ie
, url
)
1726 self
._wait
_for
_video
(ie_result
)
1727 return self
.process_ie_result(ie_result
, download
, extra_info
)
1731 def add_default_extra_info(self
, ie_result
, ie
, url
):
1733 self
.add_extra_info(ie_result
, {
1735 'original_url': url
,
1737 webpage_url
= ie_result
.get('webpage_url')
1739 self
.add_extra_info(ie_result
, {
1740 'webpage_url_basename': url_basename(webpage_url
),
1741 'webpage_url_domain': get_domain(webpage_url
),
1744 self
.add_extra_info(ie_result
, {
1745 'extractor': ie
.IE_NAME
,
1746 'extractor_key': ie
.ie_key(),
1749 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1751 Take the result of the ie(may be modified) and resolve all unresolved
1752 references (URLs, playlist items).
1754 It will also download the videos if 'download'.
1755 Returns the resolved ie_result.
1757 if extra_info
is None:
1759 result_type
= ie_result
.get('_type', 'video')
1761 if result_type
in ('url', 'url_transparent'):
1762 ie_result
['url'] = sanitize_url(
1763 ie_result
['url'], scheme
='http' if self
.params
.get('prefer_insecure') else 'https')
1764 if ie_result
.get('original_url') and not extra_info
.get('original_url'):
1765 extra_info
= {'original_url': ie_result['original_url'], **extra_info}
1767 extract_flat
= self
.params
.get('extract_flat', False)
1768 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1769 or extract_flat
is True):
1770 info_copy
= ie_result
.copy()
1771 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1772 if ie
and not ie_result
.get('id'):
1773 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1774 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1775 self
.add_extra_info(info_copy
, extra_info
)
1776 info_copy
, _
= self
.pre_process(info_copy
)
1777 self
._fill
_common
_fields
(info_copy
, False)
1778 self
.__forced
_printings
(info_copy
)
1779 self
._raise
_pending
_errors
(info_copy
)
1780 if self
.params
.get('force_write_download_archive', False):
1781 self
.record_download_archive(info_copy
)
1784 if result_type
== 'video':
1785 self
.add_extra_info(ie_result
, extra_info
)
1786 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1787 self
._raise
_pending
_errors
(ie_result
)
1788 additional_urls
= (ie_result
or {}).get('additional_urls')
1790 # TODO: Improve MetadataParserPP to allow setting a list
1791 if isinstance(additional_urls
, str):
1792 additional_urls
= [additional_urls
]
1794 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1795 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1796 ie_result
['additional_entries'] = [
1798 url
, download
, extra_info
=extra_info
,
1799 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1800 for url
in additional_urls
1803 elif result_type
== 'url':
1804 # We have to add extra_info to the results because it may be
1805 # contained in a playlist
1806 return self
.extract_info(
1807 ie_result
['url'], download
,
1808 ie_key
=ie_result
.get('ie_key'),
1809 extra_info
=extra_info
)
1810 elif result_type
== 'url_transparent':
1811 # Use the information from the embedding page
1812 info
= self
.extract_info(
1813 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1814 extra_info
=extra_info
, download
=False, process
=False)
1816 # extract_info may return None when ignoreerrors is enabled and
1817 # extraction failed with an error, don't crash and return early
1822 exempted_fields
= {'_type', 'url', 'ie_key'}
1823 if not ie_result
.get('section_end') and ie_result
.get('section_start') is None:
1824 # For video clips, the id etc of the clip extractor should be used
1825 exempted_fields |
= {'id', 'extractor', 'extractor_key'}
1827 new_result
= info
.copy()
1828 new_result
.update(filter_dict(ie_result
, lambda k
, v
: v
is not None and k
not in exempted_fields
))
1830 # Extracted info may not be a video result (i.e.
1831 # info.get('_type', 'video') != video) but rather an url or
1832 # url_transparent. In such cases outer metadata (from ie_result)
1833 # should be propagated to inner one (info). For this to happen
1834 # _type of info should be overridden with url_transparent. This
1835 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1836 if new_result
.get('_type') == 'url':
1837 new_result
['_type'] = 'url_transparent'
1839 return self
.process_ie_result(
1840 new_result
, download
=download
, extra_info
=extra_info
)
1841 elif result_type
in ('playlist', 'multi_video'):
1842 # Protect from infinite recursion due to recursively nested playlists
1843 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1844 webpage_url
= ie_result
.get('webpage_url') # Playlists maynot have webpage_url
1845 if webpage_url
and webpage_url
in self
._playlist
_urls
:
1847 '[download] Skipping already downloaded playlist: %s'
1848 % ie_result
.get('title') or ie_result
.get('id'))
1851 self
._playlist
_level
+= 1
1852 self
._playlist
_urls
.add(webpage_url
)
1853 self
._fill
_common
_fields
(ie_result
, False)
1854 self
._sanitize
_thumbnails
(ie_result
)
1856 return self
.__process
_playlist
(ie_result
, download
)
1858 self
._playlist
_level
-= 1
1859 if not self
._playlist
_level
:
1860 self
._playlist
_urls
.clear()
1861 elif result_type
== 'compat_list':
1862 self
.report_warning(
1863 'Extractor %s returned a compat_list result. '
1864 'It needs to be updated.' % ie_result
.get('extractor'))
1867 self
.add_extra_info(r
, {
1868 'extractor': ie_result
['extractor'],
1869 'webpage_url': ie_result
['webpage_url'],
1870 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1871 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1872 'extractor_key': ie_result
['extractor_key'],
1875 ie_result
['entries'] = [
1876 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1877 for r
in ie_result
['entries']
1881 raise Exception('Invalid result type: %s' % result_type
)
1883 def _ensure_dir_exists(self
, path
):
1884 return make_dir(path
, self
.report_error
)
1887 def _playlist_infodict(ie_result
, strict
=False, **kwargs
):
1889 'playlist_count': ie_result
.get('playlist_count'),
1890 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1891 'playlist_id': ie_result
.get('id'),
1892 'playlist_title': ie_result
.get('title'),
1893 'playlist_uploader': ie_result
.get('uploader'),
1894 'playlist_uploader_id': ie_result
.get('uploader_id'),
1899 if ie_result
.get('webpage_url'):
1901 'webpage_url': ie_result
['webpage_url'],
1902 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1903 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1907 'playlist_index': 0,
1908 '__last_playlist_index': max(ie_result
.get('requested_entries') or (0, 0)),
1909 'extractor': ie_result
['extractor'],
1910 'extractor_key': ie_result
['extractor_key'],
1913 def __process_playlist(self
, ie_result
, download
):
1914 """Process each entry in the playlist"""
1915 assert ie_result
['_type'] in ('playlist', 'multi_video')
1917 common_info
= self
._playlist
_infodict
(ie_result
, strict
=True)
1918 title
= common_info
.get('playlist') or '<Untitled>'
1919 if self
._match
_entry
(common_info
, incomplete
=True) is not None:
1921 self
.to_screen(f
'[download] Downloading {ie_result["_type"]}: {title}')
1923 all_entries
= PlaylistEntries(self
, ie_result
)
1924 entries
= orderedSet(all_entries
.get_requested_items(), lazy
=True)
1926 lazy
= self
.params
.get('lazy_playlist')
1928 resolved_entries
, n_entries
= [], 'N/A'
1929 ie_result
['requested_entries'], ie_result
['entries'] = None, None
1931 entries
= resolved_entries
= list(entries
)
1932 n_entries
= len(resolved_entries
)
1933 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
1934 if not ie_result
.get('playlist_count'):
1935 # Better to do this after potentially exhausting entries
1936 ie_result
['playlist_count'] = all_entries
.get_full_count()
1938 extra
= self
._playlist
_infodict
(ie_result
, n_entries
=int_or_none(n_entries
))
1939 ie_copy
= collections
.ChainMap(ie_result
, extra
)
1941 _infojson_written
= False
1942 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
1943 if write_playlist_files
and self
.params
.get('list_thumbnails'):
1944 self
.list_thumbnails(ie_result
)
1945 if write_playlist_files
and not self
.params
.get('simulate'):
1946 _infojson_written
= self
._write
_info
_json
(
1947 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
1948 if _infojson_written
is None:
1950 if self
._write
_description
('playlist', ie_result
,
1951 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1953 # TODO: This should be passed to ThumbnailsConvertor if necessary
1954 self
._write
_thumbnails
('playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1957 if self
.params
.get('playlistreverse') or self
.params
.get('playlistrandom'):
1958 self
.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once
=True)
1959 elif self
.params
.get('playlistreverse'):
1961 elif self
.params
.get('playlistrandom'):
1962 random
.shuffle(entries
)
1964 self
.to_screen(f
'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1965 f
'{format_field(ie_result, "playlist_count", " of %s")}')
1967 keep_resolved_entries
= self
.params
.get('extract_flat') != 'discard'
1968 if self
.params
.get('extract_flat') == 'discard_in_playlist':
1969 keep_resolved_entries
= ie_result
['_type'] != 'playlist'
1970 if keep_resolved_entries
:
1971 self
.write_debug('The information of all playlist entries will be held in memory')
1974 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1975 for i
, (playlist_index
, entry
) in enumerate(entries
):
1977 resolved_entries
.append((playlist_index
, entry
))
1981 entry
['__x_forwarded_for_ip'] = ie_result
.get('__x_forwarded_for_ip')
1982 if not lazy
and 'playlist-index' in self
.params
['compat_opts']:
1983 playlist_index
= ie_result
['requested_entries'][i
]
1985 entry_copy
= collections
.ChainMap(entry
, {
1987 'n_entries': int_or_none(n_entries
),
1988 'playlist_index': playlist_index
,
1989 'playlist_autonumber': i
+ 1,
1992 if self
._match
_entry
(entry_copy
, incomplete
=True) is not None:
1993 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1994 resolved_entries
[i
] = (playlist_index
, NO_DEFAULT
)
1997 self
.to_screen('[download] Downloading item %s of %s' % (
1998 self
._format
_screen
(i
+ 1, self
.Styles
.ID
), self
._format
_screen
(n_entries
, self
.Styles
.EMPHASIS
)))
2000 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, collections
.ChainMap({
2001 'playlist_index': playlist_index
,
2002 'playlist_autonumber': i
+ 1,
2004 if not entry_result
:
2006 if failures
>= max_failures
:
2008 f
'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2010 if keep_resolved_entries
:
2011 resolved_entries
[i
] = (playlist_index
, entry_result
)
2013 # Update with processed data
2014 ie_result
['entries'] = [e
for _
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2015 ie_result
['requested_entries'] = [i
for i
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2016 if ie_result
['requested_entries'] == try_call(lambda: list(range(1, ie_result
['playlist_count'] + 1))):
2017 # Do not set for full playlist
2018 ie_result
.pop('requested_entries')
2020 # Write the updated info to json
2021 if _infojson_written
is True and self
._write
_info
_json
(
2022 'updated playlist', ie_result
,
2023 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
2026 ie_result
= self
.run_all_pps('playlist', ie_result
)
2027 self
.to_screen(f
'[download] Finished downloading playlist: {title}')
2030 @_handle_extraction_exceptions
2031 def __process_iterable_entry(self
, entry
, download
, extra_info
):
2032 return self
.process_ie_result(
2033 entry
, download
=download
, extra_info
=extra_info
)
2035 def _build_format_filter(self
, filter_spec
):
2036 " Returns a function to filter the formats according to the filter_spec "
2046 operator_rex
= re
.compile(r
'''(?x)\s*
2048 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2049 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2050 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
2051 m
= operator_rex
.fullmatch(filter_spec
)
2054 comparison_value
= int(m
.group('value'))
2056 comparison_value
= parse_filesize(m
.group('value'))
2057 if comparison_value
is None:
2058 comparison_value
= parse_filesize(m
.group('value') + 'B')
2059 if comparison_value
is None:
2061 'Invalid value %r in format specification %r' % (
2062 m
.group('value'), filter_spec
))
2063 op
= OPERATORS
[m
.group('op')]
2068 '^=': lambda attr
, value
: attr
.startswith(value
),
2069 '$=': lambda attr
, value
: attr
.endswith(value
),
2070 '*=': lambda attr
, value
: value
in attr
,
2071 '~=': lambda attr
, value
: value
.search(attr
) is not None
2073 str_operator_rex
= re
.compile(r
'''(?x)\s*
2074 (?P<key>[a-zA-Z0-9._-]+)\s*
2075 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2077 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2078 (?(quote)(?P=quote))\s*
2079 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
2080 m
= str_operator_rex
.fullmatch(filter_spec
)
2082 if m
.group('op') == '~=':
2083 comparison_value
= re
.compile(m
.group('value'))
2085 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
2086 str_op
= STR_OPERATORS
[m
.group('op')]
2087 if m
.group('negation'):
2088 op
= lambda attr
, value
: not str_op(attr
, value
)
2093 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
2096 actual_value
= f
.get(m
.group('key'))
2097 if actual_value
is None:
2098 return m
.group('none_inclusive')
2099 return op(actual_value
, comparison_value
)
2102 def _check_formats(self
, formats
):
2104 self
.to_screen('[info] Testing format %s' % f
['format_id'])
2105 path
= self
.get_output_path('temp')
2106 if not self
._ensure
_dir
_exists
(f
'{path}/'):
2108 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
2111 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
2112 except (DownloadError
, OSError, ValueError) + network_exceptions
:
2115 if os
.path
.exists(temp_file
.name
):
2117 os
.remove(temp_file
.name
)
2119 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
2123 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
2125 def _default_format_spec(self
, info_dict
, download
=True):
2128 merger
= FFmpegMergerPP(self
)
2129 return merger
.available
and merger
.can_merge()
2132 not self
.params
.get('simulate')
2136 or info_dict
.get('is_live') and not self
.params
.get('live_from_start')
2137 or self
.params
['outtmpl']['default'] == '-'))
2140 or self
.params
.get('allow_multiple_audio_streams', False)
2141 or 'format-spec' in self
.params
['compat_opts'])
2144 'best/bestvideo+bestaudio' if prefer_best
2145 else 'bestvideo*+bestaudio/best' if not compat
2146 else 'bestvideo+bestaudio/best')
2148 def build_format_selector(self
, format_spec
):
2149 def syntax_error(note
, start
):
2151 'Invalid format specification: '
2152 '{}\n\t{}\n\t{}^'.format(note
, format_spec
, ' ' * start
[1]))
2153 return SyntaxError(message
)
2155 PICKFIRST
= 'PICKFIRST'
2159 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2161 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
2162 'video': self
.params
.get('allow_multiple_video_streams', False)}
2164 def _parse_filter(tokens
):
2166 for type, string_
, start
, _
, _
in tokens
:
2167 if type == tokenize
.OP
and string_
== ']':
2168 return ''.join(filter_parts
)
2170 filter_parts
.append(string_
)
2172 def _remove_unused_ops(tokens
):
2173 # Remove operators that we don't use and join them with the surrounding strings.
2174 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2175 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
2176 last_string
, last_start
, last_end
, last_line
= None, None, None, None
2177 for type, string_
, start
, end
, line
in tokens
:
2178 if type == tokenize
.OP
and string_
== '[':
2180 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2182 yield type, string_
, start
, end
, line
2183 # everything inside brackets will be handled by _parse_filter
2184 for type, string_
, start
, end
, line
in tokens
:
2185 yield type, string_
, start
, end
, line
2186 if type == tokenize
.OP
and string_
== ']':
2188 elif type == tokenize
.OP
and string_
in ALLOWED_OPS
:
2190 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2192 yield type, string_
, start
, end
, line
2193 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
2195 last_string
= string_
2199 last_string
+= string_
2201 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2203 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
2205 current_selector
= None
2206 for type, string_
, start
, _
, _
in tokens
:
2207 # ENCODING is only defined in python 3.x
2208 if type == getattr(tokenize
, 'ENCODING', None):
2210 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
2211 current_selector
= FormatSelector(SINGLE
, string_
, [])
2212 elif type == tokenize
.OP
:
2214 if not inside_group
:
2215 # ')' will be handled by the parentheses group
2216 tokens
.restore_last_token()
2218 elif inside_merge
and string_
in ['/', ',']:
2219 tokens
.restore_last_token()
2221 elif inside_choice
and string_
== ',':
2222 tokens
.restore_last_token()
2224 elif string_
== ',':
2225 if not current_selector
:
2226 raise syntax_error('"," must follow a format selector', start
)
2227 selectors
.append(current_selector
)
2228 current_selector
= None
2229 elif string_
== '/':
2230 if not current_selector
:
2231 raise syntax_error('"/" must follow a format selector', start
)
2232 first_choice
= current_selector
2233 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
2234 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
2235 elif string_
== '[':
2236 if not current_selector
:
2237 current_selector
= FormatSelector(SINGLE
, 'best', [])
2238 format_filter
= _parse_filter(tokens
)
2239 current_selector
.filters
.append(format_filter
)
2240 elif string_
== '(':
2241 if current_selector
:
2242 raise syntax_error('Unexpected "("', start
)
2243 group
= _parse_format_selection(tokens
, inside_group
=True)
2244 current_selector
= FormatSelector(GROUP
, group
, [])
2245 elif string_
== '+':
2246 if not current_selector
:
2247 raise syntax_error('Unexpected "+"', start
)
2248 selector_1
= current_selector
2249 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2251 raise syntax_error('Expected a selector', start
)
2252 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2254 raise syntax_error(f
'Operator not recognized: "{string_}"', start
)
2255 elif type == tokenize
.ENDMARKER
:
2257 if current_selector
:
2258 selectors
.append(current_selector
)
2261 def _merge(formats_pair
):
2262 format_1
, format_2
= formats_pair
2265 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2266 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2268 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2269 get_no_more
= {'video': False, 'audio': False}
2270 for (i
, fmt_info
) in enumerate(formats_info
):
2271 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2274 for aud_vid
in ['audio', 'video']:
2275 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2276 if get_no_more
[aud_vid
]:
2279 get_no_more
[aud_vid
] = True
2281 if len(formats_info
) == 1:
2282 return formats_info
[0]
2284 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2285 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2287 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2288 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2290 output_ext
= get_compatible_ext(
2291 vcodecs
=[f
.get('vcodec') for f
in video_fmts
],
2292 acodecs
=[f
.get('acodec') for f
in audio_fmts
],
2293 vexts
=[f
['ext'] for f
in video_fmts
],
2294 aexts
=[f
['ext'] for f
in audio_fmts
],
2295 preferences
=(try_call(lambda: self
.params
['merge_output_format'].split('/'))
2296 or self
.params
.get('prefer_free_formats') and ('webm', 'mkv')))
2298 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2301 'requested_formats': formats_info
,
2302 'format': '+'.join(filtered('format')),
2303 'format_id': '+'.join(filtered('format_id')),
2305 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2306 'language': '+'.join(orderedSet(filtered('language'))) or None,
2307 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2308 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2309 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2314 'width': the_only_video
.get('width'),
2315 'height': the_only_video
.get('height'),
2316 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2317 'fps': the_only_video
.get('fps'),
2318 'dynamic_range': the_only_video
.get('dynamic_range'),
2319 'vcodec': the_only_video
.get('vcodec'),
2320 'vbr': the_only_video
.get('vbr'),
2321 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2322 'aspect_ratio': the_only_video
.get('aspect_ratio'),
2327 'acodec': the_only_audio
.get('acodec'),
2328 'abr': the_only_audio
.get('abr'),
2329 'asr': the_only_audio
.get('asr'),
2330 'audio_channels': the_only_audio
.get('audio_channels')
2335 def _check_formats(formats
):
2336 if self
.params
.get('check_formats') == 'selected':
2337 yield from self
._check
_formats
(formats
)
2339 elif (self
.params
.get('check_formats') is not None
2340 or self
.params
.get('allow_unplayable_formats')):
2345 if f
.get('has_drm') or f
.get('__needs_testing'):
2346 yield from self
._check
_formats
([f
])
2350 def _build_selector_function(selector
):
2351 if isinstance(selector
, list): # ,
2352 fs
= [_build_selector_function(s
) for s
in selector
]
2354 def selector_function(ctx
):
2357 return selector_function
2359 elif selector
.type == GROUP
: # ()
2360 selector_function
= _build_selector_function(selector
.selector
)
2362 elif selector
.type == PICKFIRST
: # /
2363 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2365 def selector_function(ctx
):
2367 picked_formats
= list(f(ctx
))
2369 return picked_formats
2372 elif selector
.type == MERGE
: # +
2373 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2375 def selector_function(ctx
):
2376 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2379 elif selector
.type == SINGLE
: # atom
2380 format_spec
= selector
.selector
or 'best'
2382 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2383 if format_spec
== 'all':
2384 def selector_function(ctx
):
2385 yield from _check_formats(ctx
['formats'][::-1])
2386 elif format_spec
== 'mergeall':
2387 def selector_function(ctx
):
2388 formats
= list(_check_formats(
2389 f
for f
in ctx
['formats'] if f
.get('vcodec') != 'none' or f
.get('acodec') != 'none'))
2392 merged_format
= formats
[-1]
2393 for f
in formats
[-2::-1]:
2394 merged_format
= _merge((merged_format
, f
))
2398 format_fallback
, seperate_fallback
, format_reverse
, format_idx
= False, None, True, 1
2400 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2402 if mobj
is not None:
2403 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2404 format_reverse
= mobj
.group('bw')[0] == 'b'
2405 format_type
= (mobj
.group('type') or [None])[0]
2406 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
2407 format_modified
= mobj
.group('mod') is not None
2409 format_fallback
= not format_type
and not format_modified
# for b, w
2411 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
2412 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2413 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
2414 if format_type
# bv, ba, wv, wa
2415 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2416 if not format_modified
# b, w
2417 else lambda f
: True) # b*, w*
2418 filter_f
= lambda f
: _filter_f(f
) and (
2419 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2421 if format_spec
in self
._format
_selection
_exts
['audio']:
2422 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2423 elif format_spec
in self
._format
_selection
_exts
['video']:
2424 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2425 seperate_fallback
= lambda f
: f
.get('ext') == format_spec
and f
.get('vcodec') != 'none'
2426 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2427 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2429 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2431 def selector_function(ctx
):
2432 formats
= list(ctx
['formats'])
2433 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2435 if format_fallback
and ctx
['incomplete_formats']:
2436 # for extractors with incomplete formats (audio only (soundcloud)
2437 # or video only (imgur)) best/worst will fallback to
2438 # best/worst {video,audio}-only format
2440 elif seperate_fallback
and not ctx
['has_merged_format']:
2441 # for compatibility with youtube-dl when there is no pre-merged format
2442 matches
= list(filter(seperate_fallback
, formats
))
2443 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2445 yield matches
[format_idx
- 1]
2446 except LazyList
.IndexError:
2449 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2451 def final_selector(ctx
):
2452 ctx_copy
= dict(ctx
)
2453 for _filter
in filters
:
2454 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2455 return selector_function(ctx_copy
)
2456 return final_selector
2458 stream
= io
.BytesIO(format_spec
.encode())
2460 tokens
= list(_remove_unused_ops(tokenize
.tokenize(stream
.readline
)))
2461 except tokenize
.TokenError
:
2462 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2464 class TokenIterator
:
2465 def __init__(self
, tokens
):
2466 self
.tokens
= tokens
2473 if self
.counter
>= len(self
.tokens
):
2474 raise StopIteration()
2475 value
= self
.tokens
[self
.counter
]
2481 def restore_last_token(self
):
2484 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2485 return _build_selector_function(parsed_selector
)
2487 def _calc_headers(self
, info_dict
, load_cookies
=False):
2488 res
= HTTPHeaderDict(self
.params
['http_headers'], info_dict
.get('http_headers'))
2491 if load_cookies
: # For --load-info-json
2492 self
._load
_cookies
(res
.get('Cookie'), autoscope
=info_dict
['url']) # compat
2493 self
._load
_cookies
(info_dict
.get('cookies'), autoscope
=False)
2494 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2495 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2496 res
.pop('Cookie', None)
2497 cookies
= self
.cookiejar
.get_cookies_for_url(info_dict
['url'])
2499 encoder
= LenientSimpleCookie()
2501 for cookie
in cookies
:
2502 _
, value
= encoder
.value_encode(cookie
.value
)
2503 values
.append(f
'{cookie.name}={value}')
2505 values
.append(f
'Domain={cookie.domain}')
2507 values
.append(f
'Path={cookie.path}')
2509 values
.append('Secure')
2511 values
.append(f
'Expires={cookie.expires}')
2513 values
.append(f
'Version={cookie.version}')
2514 info_dict
['cookies'] = '; '.join(values
)
2516 if 'X-Forwarded-For' not in res
:
2517 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2518 if x_forwarded_for_ip
:
2519 res
['X-Forwarded-For'] = x_forwarded_for_ip
2523 def _calc_cookies(self
, url
):
2524 self
.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2525 return self
.cookiejar
.get_cookie_header(url
)
2527 def _sort_thumbnails(self
, thumbnails
):
2528 thumbnails
.sort(key
=lambda t
: (
2529 t
.get('preference') if t
.get('preference') is not None else -1,
2530 t
.get('width') if t
.get('width') is not None else -1,
2531 t
.get('height') if t
.get('height') is not None else -1,
2532 t
.get('id') if t
.get('id') is not None else '',
2535 def _sanitize_thumbnails(self
, info_dict
):
2536 thumbnails
= info_dict
.get('thumbnails')
2537 if thumbnails
is None:
2538 thumbnail
= info_dict
.get('thumbnail')
2540 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2544 def check_thumbnails(thumbnails
):
2545 for t
in thumbnails
:
2546 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2548 self
.urlopen(HEADRequest(t
['url']))
2549 except network_exceptions
as err
:
2550 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2554 self
._sort
_thumbnails
(thumbnails
)
2555 for i
, t
in enumerate(thumbnails
):
2556 if t
.get('id') is None:
2558 if t
.get('width') and t
.get('height'):
2559 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2560 t
['url'] = sanitize_url(t
['url'])
2562 if self
.params
.get('check_formats') is True:
2563 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2565 info_dict
['thumbnails'] = thumbnails
2567 def _fill_common_fields(self
, info_dict
, final
=True):
2568 # TODO: move sanitization here
2570 title
= info_dict
['fulltitle'] = info_dict
.get('title')
2573 self
.write_debug('Extractor gave empty title. Creating a generic title')
2575 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2576 info_dict
['title'] = f
'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2578 if info_dict
.get('duration') is not None:
2579 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2581 for ts_key
, date_key
in (
2582 ('timestamp', 'upload_date'),
2583 ('release_timestamp', 'release_date'),
2584 ('modified_timestamp', 'modified_date'),
2586 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2587 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2588 # see http://bugs.python.org/issue1646728)
2589 with contextlib
.suppress(ValueError, OverflowError, OSError):
2590 upload_date
= datetime
.datetime
.fromtimestamp(info_dict
[ts_key
], datetime
.timezone
.utc
)
2591 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2593 if not info_dict
.get('release_year'):
2594 info_dict
['release_year'] = traverse_obj(info_dict
, ('release_date', {lambda x: int(x[:4])}
))
2596 live_keys
= ('is_live', 'was_live')
2597 live_status
= info_dict
.get('live_status')
2598 if live_status
is None:
2599 for key
in live_keys
:
2600 if info_dict
.get(key
) is False:
2602 if info_dict
.get(key
):
2605 if all(info_dict
.get(key
) is False for key
in live_keys
):
2606 live_status
= 'not_live'
2608 info_dict
['live_status'] = live_status
2609 for key
in live_keys
:
2610 if info_dict
.get(key
) is None:
2611 info_dict
[key
] = (live_status
== key
)
2612 if live_status
== 'post_live':
2613 info_dict
['was_live'] = True
2615 # Auto generate title fields corresponding to the *_number fields when missing
2616 # in order to always have clean titles. This is very common for TV series.
2617 for field
in ('chapter', 'season', 'episode'):
2618 if final
and info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2619 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2621 def _raise_pending_errors(self
, info
):
2622 err
= info
.pop('__pending_error', None)
2624 self
.report_error(err
, tb
=False)
2626 def sort_formats(self
, info_dict
):
2627 formats
= self
._get
_formats
(info_dict
)
2628 formats
.sort(key
=FormatSorter(
2629 self
, info_dict
.get('_format_sort_fields') or []).calculate_preference
)
2631 def process_video_result(self
, info_dict
, download
=True):
2632 assert info_dict
.get('_type', 'video') == 'video'
2633 self
._num
_videos
+= 1
2635 if 'id' not in info_dict
:
2636 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2637 elif not info_dict
.get('id'):
2638 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2640 def report_force_conversion(field
, field_not
, conversion
):
2641 self
.report_warning(
2642 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2643 % (field
, field_not
, conversion
))
2645 def sanitize_string_field(info
, string_field
):
2646 field
= info
.get(string_field
)
2647 if field
is None or isinstance(field
, str):
2649 report_force_conversion(string_field
, 'a string', 'string')
2650 info
[string_field
] = str(field
)
2652 def sanitize_numeric_fields(info
):
2653 for numeric_field
in self
._NUMERIC
_FIELDS
:
2654 field
= info
.get(numeric_field
)
2655 if field
is None or isinstance(field
, (int, float)):
2657 report_force_conversion(numeric_field
, 'numeric', 'int')
2658 info
[numeric_field
] = int_or_none(field
)
2660 sanitize_string_field(info_dict
, 'id')
2661 sanitize_numeric_fields(info_dict
)
2662 if info_dict
.get('section_end') and info_dict
.get('section_start') is not None:
2663 info_dict
['duration'] = round(info_dict
['section_end'] - info_dict
['section_start'], 3)
2664 if (info_dict
.get('duration') or 0) <= 0 and info_dict
.pop('duration', None):
2665 self
.report_warning('"duration" field is negative, there is an error in extractor')
2667 chapters
= info_dict
.get('chapters') or []
2668 if chapters
and chapters
[0].get('start_time'):
2669 chapters
.insert(0, {'start_time': 0}
)
2671 dummy_chapter
= {'end_time': 0, 'start_time': info_dict.get('duration')}
2672 for idx
, (prev
, current
, next_
) in enumerate(zip(
2673 (dummy_chapter
, *chapters
), chapters
, (*chapters
[1:], dummy_chapter
)), 1):
2674 if current
.get('start_time') is None:
2675 current
['start_time'] = prev
.get('end_time')
2676 if not current
.get('end_time'):
2677 current
['end_time'] = next_
.get('start_time')
2678 if not current
.get('title'):
2679 current
['title'] = f
'<Untitled Chapter {idx}>'
2681 if 'playlist' not in info_dict
:
2682 # It isn't part of a playlist
2683 info_dict
['playlist'] = None
2684 info_dict
['playlist_index'] = None
2686 self
._sanitize
_thumbnails
(info_dict
)
2688 thumbnail
= info_dict
.get('thumbnail')
2689 thumbnails
= info_dict
.get('thumbnails')
2691 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2693 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2695 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2696 info_dict
['display_id'] = info_dict
['id']
2698 self
._fill
_common
_fields
(info_dict
)
2700 for cc_kind
in ('subtitles', 'automatic_captions'):
2701 cc
= info_dict
.get(cc_kind
)
2703 for _
, subtitle
in cc
.items():
2704 for subtitle_format
in subtitle
:
2705 if subtitle_format
.get('url'):
2706 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2707 if subtitle_format
.get('ext') is None:
2708 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2710 automatic_captions
= info_dict
.get('automatic_captions')
2711 subtitles
= info_dict
.get('subtitles')
2713 info_dict
['requested_subtitles'] = self
.process_subtitles(
2714 info_dict
['id'], subtitles
, automatic_captions
)
2716 formats
= self
._get
_formats
(info_dict
)
2718 # Backward compatibility with InfoExtractor._sort_formats
2719 field_preference
= (formats
or [{}])[0].pop('__sort_fields', None)
2720 if field_preference
:
2721 info_dict
['_format_sort_fields'] = field_preference
2723 info_dict
['_has_drm'] = any( # or None ensures --clean-infojson removes it
2724 f
.get('has_drm') and f
['has_drm'] != 'maybe' for f
in formats
) or None
2725 if not self
.params
.get('allow_unplayable_formats'):
2726 formats
= [f
for f
in formats
if not f
.get('has_drm') or f
['has_drm'] == 'maybe']
2728 if formats
and all(f
.get('acodec') == f
.get('vcodec') == 'none' for f
in formats
):
2729 self
.report_warning(
2730 f
'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2731 'only images are available for download. Use --list-formats to see them'.capitalize())
2733 get_from_start
= not info_dict
.get('is_live') or bool(self
.params
.get('live_from_start'))
2734 if not get_from_start
:
2735 info_dict
['title'] += ' ' + datetime
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2736 if info_dict
.get('is_live') and formats
:
2737 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2738 if get_from_start
and not formats
:
2739 self
.raise_no_formats(info_dict
, msg
=(
2740 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2741 'If you want to download from the current time, use --no-live-from-start'))
2743 def is_wellformed(f
):
2746 self
.report_warning(
2747 '"url" field is missing or empty - skipping format, '
2748 'there is an error in extractor')
2750 if isinstance(url
, bytes):
2751 sanitize_string_field(f
, 'url')
2754 # Filter out malformed formats for better extraction robustness
2755 formats
= list(filter(is_wellformed
, formats
or []))
2758 self
.raise_no_formats(info_dict
)
2760 for format
in formats
:
2761 sanitize_string_field(format
, 'format_id')
2762 sanitize_numeric_fields(format
)
2763 format
['url'] = sanitize_url(format
['url'])
2764 if format
.get('ext') is None:
2765 format
['ext'] = determine_ext(format
['url']).lower()
2766 if format
.get('protocol') is None:
2767 format
['protocol'] = determine_protocol(format
)
2768 if format
.get('resolution') is None:
2769 format
['resolution'] = self
.format_resolution(format
, default
=None)
2770 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2771 format
['dynamic_range'] = 'SDR'
2772 if format
.get('aspect_ratio') is None:
2773 format
['aspect_ratio'] = try_call(lambda: round(format
['width'] / format
['height'], 2))
2774 # For fragmented formats, "tbr" is often max bitrate and not average
2775 if (('manifest-filesize-approx' in self
.params
['compat_opts'] or not format
.get('manifest_url'))
2776 and info_dict
.get('duration') and format
.get('tbr')
2777 and not format
.get('filesize') and not format
.get('filesize_approx')):
2778 format
['filesize_approx'] = int(info_dict
['duration'] * format
['tbr'] * (1024 / 8))
2779 format
['http_headers'] = self
._calc
_headers
(collections
.ChainMap(format
, info_dict
), load_cookies
=True)
2781 # Safeguard against old/insecure infojson when using --load-info-json
2782 if info_dict
.get('http_headers'):
2783 info_dict
['http_headers'] = HTTPHeaderDict(info_dict
['http_headers'])
2784 info_dict
['http_headers'].pop('Cookie', None)
2786 # This is copied to http_headers by the above _calc_headers and can now be removed
2787 if '__x_forwarded_for_ip' in info_dict
:
2788 del info_dict
['__x_forwarded_for_ip']
2792 '_format_sort_fields': info_dict
.get('_format_sort_fields')
2795 # Sanitize and group by format_id
2797 for i
, format
in enumerate(formats
):
2798 if not format
.get('format_id'):
2799 format
['format_id'] = str(i
)
2801 # Sanitize format_id from characters used in format selector expression
2802 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2803 formats_dict
.setdefault(format
['format_id'], []).append(format
)
2805 # Make sure all formats have unique format_id
2806 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2807 for format_id
, ambiguous_formats
in formats_dict
.items():
2808 ambigious_id
= len(ambiguous_formats
) > 1
2809 for i
, format
in enumerate(ambiguous_formats
):
2811 format
['format_id'] = '%s-%d' % (format_id
, i
)
2812 # Ensure there is no conflict between id and ext in format selection
2813 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2814 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2815 format
['format_id'] = 'f%s' % format
['format_id']
2817 if format
.get('format') is None:
2818 format
['format'] = '{id} - {res}{note}'.format(
2819 id=format
['format_id'],
2820 res
=self
.format_resolution(format
),
2821 note
=format_field(format
, 'format_note', ' (%s)'),
2824 if self
.params
.get('check_formats') is True:
2825 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2827 if not formats
or formats
[0] is not info_dict
:
2828 # only set the 'formats' fields if the original info_dict list them
2829 # otherwise we end up with a circular reference, the first (and unique)
2830 # element in the 'formats' field in info_dict is info_dict itself,
2831 # which can't be exported to json
2832 info_dict
['formats'] = formats
2834 info_dict
, _
= self
.pre_process(info_dict
)
2836 if self
._match
_entry
(info_dict
, incomplete
=self
._format
_fields
) is not None:
2839 self
.post_extract(info_dict
)
2840 info_dict
, _
= self
.pre_process(info_dict
, 'after_filter')
2842 # The pre-processors may have modified the formats
2843 formats
= self
._get
_formats
(info_dict
)
2845 list_only
= self
.params
.get('simulate') == 'list_only'
2846 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2847 if self
.params
.get('list_thumbnails'):
2848 self
.list_thumbnails(info_dict
)
2849 if self
.params
.get('listsubtitles'):
2850 if 'automatic_captions' in info_dict
:
2851 self
.list_subtitles(
2852 info_dict
['id'], automatic_captions
, 'automatic captions')
2853 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2854 if self
.params
.get('listformats') or interactive_format_selection
:
2855 self
.list_formats(info_dict
)
2857 # Without this printing, -F --print-json will not work
2858 self
.__forced
_printings
(info_dict
)
2861 format_selector
= self
.format_selector
2863 if interactive_format_selection
:
2864 req_format
= input(self
._format
_screen
('\nEnter format selector ', self
.Styles
.EMPHASIS
)
2865 + '(Press ENTER for default, or Ctrl+C to quit)'
2866 + self
._format
_screen
(': ', self
.Styles
.EMPHASIS
))
2868 format_selector
= self
.build_format_selector(req_format
) if req_format
else None
2869 except SyntaxError as err
:
2870 self
.report_error(err
, tb
=False, is_error
=False)
2873 if format_selector
is None:
2874 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2875 self
.write_debug(f
'Default format spec: {req_format}')
2876 format_selector
= self
.build_format_selector(req_format
)
2878 formats_to_download
= list(format_selector({
2880 'has_merged_format': any('none' not in (f
.get('acodec'), f
.get('vcodec')) for f
in formats
),
2881 'incomplete_formats': (all(f
.get('vcodec') == 'none' for f
in formats
) # No formats with video
2882 or all(f
.get('acodec') == 'none' for f
in formats
)), # OR, No formats with audio
2884 if interactive_format_selection
and not formats_to_download
:
2885 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2889 if not formats_to_download
:
2890 if not self
.params
.get('ignore_no_formats_error'):
2891 raise ExtractorError(
2892 'Requested format is not available. Use --list-formats for a list of available formats',
2893 expected
=True, video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2894 self
.report_warning('Requested format is not available')
2895 # Process what we can, even without any available formats.
2896 formats_to_download
= [{}]
2898 requested_ranges
= tuple(self
.params
.get('download_ranges', lambda *_
: [{}])(info_dict
, self
))
2899 best_format
, downloaded_formats
= formats_to_download
[-1], []
2901 if best_format
and requested_ranges
:
2902 def to_screen(*msg
):
2903 self
.to_screen(f
'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2905 to_screen(f
'Downloading {len(formats_to_download)} format(s):',
2906 (f
['format_id'] for f
in formats_to_download
))
2907 if requested_ranges
!= ({}, ):
2908 to_screen(f
'Downloading {len(requested_ranges)} time ranges:',
2909 (f
'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c
in requested_ranges
))
2910 max_downloads_reached
= False
2912 for fmt
, chapter
in itertools
.product(formats_to_download
, requested_ranges
):
2913 new_info
= self
._copy
_infodict
(info_dict
)
2914 new_info
.update(fmt
)
2915 offset
, duration
= info_dict
.get('section_start') or 0, info_dict
.get('duration') or float('inf')
2916 end_time
= offset
+ min(chapter
.get('end_time', duration
), duration
)
2917 # duration may not be accurate. So allow deviations <1sec
2918 if end_time
== float('inf') or end_time
> offset
+ duration
+ 1:
2920 if chapter
or offset
:
2922 'section_start': offset
+ chapter
.get('start_time', 0),
2923 'section_end': end_time
,
2924 'section_title': chapter
.get('title'),
2925 'section_number': chapter
.get('index'),
2927 downloaded_formats
.append(new_info
)
2929 self
.process_info(new_info
)
2930 except MaxDownloadsReached
:
2931 max_downloads_reached
= True
2932 self
._raise
_pending
_errors
(new_info
)
2933 # Remove copied info
2934 for key
, val
in tuple(new_info
.items()):
2935 if info_dict
.get(key
) == val
:
2937 if max_downloads_reached
:
2940 write_archive
= {f.get('__write_download_archive', False) for f in downloaded_formats}
2941 assert write_archive
.issubset({True, False, 'ignore'}
)
2942 if True in write_archive
and False not in write_archive
:
2943 self
.record_download_archive(info_dict
)
2945 info_dict
['requested_downloads'] = downloaded_formats
2946 info_dict
= self
.run_all_pps('after_video', info_dict
)
2947 if max_downloads_reached
:
2948 raise MaxDownloadsReached()
2950 # We update the info dict with the selected best quality format (backwards compatibility)
2951 info_dict
.update(best_format
)
2954 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2955 """Select the requested subtitles and their format"""
2956 available_subs
, normal_sub_langs
= {}, []
2957 if normal_subtitles
and self
.params
.get('writesubtitles'):
2958 available_subs
.update(normal_subtitles
)
2959 normal_sub_langs
= tuple(normal_subtitles
.keys())
2960 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2961 for lang
, cap_info
in automatic_captions
.items():
2962 if lang
not in available_subs
:
2963 available_subs
[lang
] = cap_info
2965 if not available_subs
or (
2966 not self
.params
.get('writesubtitles')
2967 and not self
.params
.get('writeautomaticsub')):
2970 all_sub_langs
= tuple(available_subs
.keys())
2971 if self
.params
.get('allsubtitles', False):
2972 requested_langs
= all_sub_langs
2973 elif self
.params
.get('subtitleslangs', False):
2975 requested_langs
= orderedSet_from_options(
2976 self
.params
.get('subtitleslangs'), {'all': all_sub_langs}
, use_regex
=True)
2977 except re
.error
as e
:
2978 raise ValueError(f
'Wrong regex for subtitlelangs: {e.pattern}')
2980 requested_langs
= LazyList(itertools
.chain(
2981 ['en'] if 'en' in normal_sub_langs
else [],
2982 filter(lambda f
: f
.startswith('en'), normal_sub_langs
),
2983 ['en'] if 'en' in all_sub_langs
else [],
2984 filter(lambda f
: f
.startswith('en'), all_sub_langs
),
2985 normal_sub_langs
, all_sub_langs
,
2988 self
.to_screen(f
'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2990 formats_query
= self
.params
.get('subtitlesformat', 'best')
2991 formats_preference
= formats_query
.split('/') if formats_query
else []
2993 for lang
in requested_langs
:
2994 formats
= available_subs
.get(lang
)
2996 self
.report_warning(f
'{lang} subtitles not available for {video_id}')
2998 for ext
in formats_preference
:
3002 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
3008 self
.report_warning(
3009 'No subtitle format found matching "%s" for language %s, '
3010 'using %s' % (formats_query
, lang
, f
['ext']))
3014 def _forceprint(self
, key
, info_dict
):
3015 if info_dict
is None:
3017 info_copy
= info_dict
.copy()
3018 info_copy
.setdefault('filename', self
.prepare_filename(info_dict
))
3019 if info_dict
.get('requested_formats') is not None:
3020 # For RTMP URLs, also include the playpath
3021 info_copy
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
3022 elif info_dict
.get('url'):
3023 info_copy
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
3024 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
3025 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
3026 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
3027 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
3029 def format_tmpl(tmpl
):
3030 mobj
= re
.fullmatch(r
'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl
)
3035 if tmpl
.startswith('{'):
3036 tmpl
, fmt
= f
'.{tmpl}', '%({})j'
3037 if tmpl
.endswith('='):
3038 tmpl
, fmt
= tmpl
[:-1], '{0} = %({0})#j'
3039 return '\n'.join(map(fmt
.format
, [tmpl
] if mobj
.group('dict') else tmpl
.split(',')))
3041 for tmpl
in self
.params
['forceprint'].get(key
, []):
3042 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
3044 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
3045 filename
= self
.prepare_filename(info_dict
, outtmpl
=file_tmpl
)
3046 tmpl
= format_tmpl(tmpl
)
3047 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
3048 if self
._ensure
_dir
_exists
(filename
):
3049 with open(filename
, 'a', encoding
='utf-8', newline
='') as f
:
3050 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + os
.linesep
)
3054 def __forced_printings(self
, info_dict
, filename
=None, incomplete
=True):
3055 if (self
.params
.get('forcejson')
3056 or self
.params
['forceprint'].get('video')
3057 or self
.params
['print_to_file'].get('video')):
3058 self
.post_extract(info_dict
)
3060 info_dict
['filename'] = filename
3061 info_copy
= self
._forceprint
('video', info_dict
)
3063 def print_field(field
, actual_field
=None, optional
=False):
3064 if actual_field
is None:
3065 actual_field
= field
3066 if self
.params
.get(f
'force{field}') and (
3067 info_copy
.get(field
) is not None or (not optional
and not incomplete
)):
3068 self
.to_stdout(info_copy
[actual_field
])
3070 print_field('title')
3072 print_field('url', 'urls')
3073 print_field('thumbnail', optional
=True)
3074 print_field('description', optional
=True)
3075 print_field('filename')
3076 if self
.params
.get('forceduration') and info_copy
.get('duration') is not None:
3077 self
.to_stdout(formatSeconds(info_copy
['duration']))
3078 print_field('format')
3080 if self
.params
.get('forcejson'):
3081 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
3083 def dl(self
, name
, info
, subtitle
=False, test
=False):
3084 if not info
.get('url'):
3085 self
.raise_no_formats(info
, True)
3088 verbose
= self
.params
.get('verbose')
3091 'quiet': self
.params
.get('quiet') or not verbose
,
3093 'noprogress': not verbose
,
3095 'skip_unavailable_fragments': False,
3096 'keep_fragments': False,
3098 '_no_ytdl_file': True,
3101 params
= self
.params
3102 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
3104 for ph
in self
._progress
_hooks
:
3105 fd
.add_progress_hook(ph
)
3107 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
3108 for f
in info
.get('requested_formats', []) or [info
])
3109 self
.write_debug(f
'Invoking {fd.FD_NAME} downloader on "{urls}"')
3111 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3112 # But it may contain objects that are not deep-copyable
3113 new_info
= self
._copy
_infodict
(info
)
3114 if new_info
.get('http_headers') is None:
3115 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
3116 return fd
.download(name
, new_info
, subtitle
)
3118 def existing_file(self
, filepaths
, *, default_overwrite
=True):
3119 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
3120 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
3121 return existing_files
[0]
3123 for file in existing_files
:
3124 self
.report_file_delete(file)
3128 def process_info(self
, info_dict
):
3129 """Process a single resolved IE result. (Modifies it in-place)"""
3131 assert info_dict
.get('_type', 'video') == 'video'
3132 original_infodict
= info_dict
3134 if 'format' not in info_dict
and 'ext' in info_dict
:
3135 info_dict
['format'] = info_dict
['ext']
3137 if self
._match
_entry
(info_dict
) is not None:
3138 info_dict
['__write_download_archive'] = 'ignore'
3141 # Does nothing under normal operation - for backward compatibility of process_info
3142 self
.post_extract(info_dict
)
3144 def replace_info_dict(new_info
):
3146 if new_info
== info_dict
:
3149 info_dict
.update(new_info
)
3151 new_info
, _
= self
.pre_process(info_dict
, 'video')
3152 replace_info_dict(new_info
)
3153 self
._num
_downloads
+= 1
3155 # info_dict['_filename'] needs to be set for backward compatibility
3156 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
3157 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
3161 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
3163 def check_max_downloads():
3164 if self
._num
_downloads
>= float(self
.params
.get('max_downloads') or 'inf'):
3165 raise MaxDownloadsReached()
3167 if self
.params
.get('simulate'):
3168 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3169 check_max_downloads()
3172 if full_filename
is None:
3174 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
3176 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
3179 if self
._write
_description
('video', info_dict
,
3180 self
.prepare_filename(info_dict
, 'description')) is None:
3183 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
3184 if sub_files
is None:
3186 files_to_move
.update(dict(sub_files
))
3188 thumb_files
= self
._write
_thumbnails
(
3189 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
3190 if thumb_files
is None:
3192 files_to_move
.update(dict(thumb_files
))
3194 infofn
= self
.prepare_filename(info_dict
, 'infojson')
3195 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
3196 if _infojson_written
:
3197 info_dict
['infojson_filename'] = infofn
3198 # For backward compatibility, even though it was a private field
3199 info_dict
['__infojson_filename'] = infofn
3200 elif _infojson_written
is None:
3203 # Note: Annotations are deprecated
3205 if self
.params
.get('writeannotations', False):
3206 annofn
= self
.prepare_filename(info_dict
, 'annotation')
3208 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
3210 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
3211 self
.to_screen('[info] Video annotations are already present')
3212 elif not info_dict
.get('annotations'):
3213 self
.report_warning('There are no annotations to write.')
3216 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
3217 with open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
3218 annofile
.write(info_dict
['annotations'])
3219 except (KeyError, TypeError):
3220 self
.report_warning('There are no annotations to write.')
3222 self
.report_error('Cannot write annotations file: ' + annofn
)
3225 # Write internet shortcut files
3226 def _write_link_file(link_type
):
3227 url
= try_get(info_dict
['webpage_url'], iri_to_uri
)
3229 self
.report_warning(
3230 f
'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3232 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
3233 if not self
._ensure
_dir
_exists
(encodeFilename(linkfn
)):
3235 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
3236 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
3239 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3240 with open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8',
3241 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
3242 template_vars
= {'url': url}
3243 if link_type
== 'desktop':
3244 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
3245 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
3247 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
3252 'url': self
.params
.get('writeurllink'),
3253 'webloc': self
.params
.get('writewebloclink'),
3254 'desktop': self
.params
.get('writedesktoplink'),
3256 if self
.params
.get('writelink'):
3257 link_type
= ('webloc' if sys
.platform
== 'darwin'
3258 else 'desktop' if sys
.platform
.startswith('linux')
3260 write_links
[link_type
] = True
3262 if any(should_write
and not _write_link_file(link_type
)
3263 for link_type
, should_write
in write_links
.items()):
3266 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
3267 replace_info_dict(new_info
)
3269 if self
.params
.get('skip_download'):
3270 info_dict
['filepath'] = temp_filename
3271 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3272 info_dict
['__files_to_move'] = files_to_move
3273 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
3274 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3277 info_dict
.setdefault('__postprocessors', [])
3280 def existing_video_file(*filepaths
):
3281 ext
= info_dict
.get('ext')
3282 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
3283 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
3284 default_overwrite
=False)
3286 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
3289 fd
, success
= None, True
3290 if info_dict
.get('protocol') or info_dict
.get('url'):
3291 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3292 if fd
!= FFmpegFD
and 'no-direct-merge' not in self
.params
['compat_opts'] and (
3293 info_dict
.get('section_start') or info_dict
.get('section_end')):
3294 msg
= ('This format cannot be partially downloaded' if FFmpegFD
.available()
3295 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3296 self
.report_error(f
'{msg}. Aborting')
3299 if info_dict
.get('requested_formats') is not None:
3300 old_ext
= info_dict
['ext']
3301 if self
.params
.get('merge_output_format') is None:
3302 if (info_dict
['ext'] == 'webm'
3303 and info_dict
.get('thumbnails')
3304 # check with type instead of pp_key, __name__, or isinstance
3305 # since we dont want any custom PPs to trigger this
3306 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])): # noqa: E721
3307 info_dict
['ext'] = 'mkv'
3308 self
.report_warning(
3309 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3310 new_ext
= info_dict
['ext']
3312 def correct_ext(filename
, ext
=new_ext
):
3315 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3317 os
.path
.splitext(filename
)[0]
3318 if filename_real_ext
in (old_ext
, new_ext
)
3320 return f
'{filename_wo_ext}.{ext}'
3322 # Ensure filename always has a correct extension for successful merge
3323 full_filename
= correct_ext(full_filename
)
3324 temp_filename
= correct_ext(temp_filename
)
3325 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3327 info_dict
['__real_download'] = False
3328 # NOTE: Copy so that original format dicts are not modified
3329 info_dict
['requested_formats'] = list(map(dict, info_dict
['requested_formats']))
3331 merger
= FFmpegMergerPP(self
)
3333 if dl_filename
is not None:
3334 self
.report_file_already_downloaded(dl_filename
)
3336 for f
in info_dict
['requested_formats'] if fd
!= FFmpegFD
else []:
3337 f
['filepath'] = fname
= prepend_extension(
3338 correct_ext(temp_filename
, info_dict
['ext']),
3339 'f%s' % f
['format_id'], info_dict
['ext'])
3340 downloaded
.append(fname
)
3341 info_dict
['url'] = '\n'.join(f
['url'] for f
in info_dict
['requested_formats'])
3342 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3343 info_dict
['__real_download'] = real_download
3345 if self
.params
.get('allow_unplayable_formats'):
3346 self
.report_warning(
3347 'You have requested merging of multiple formats '
3348 'while also allowing unplayable formats to be downloaded. '
3349 'The formats won\'t be merged to prevent data corruption.')
3350 elif not merger
.available
:
3351 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3352 if not self
.params
.get('ignoreerrors'):
3353 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3355 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3357 if temp_filename
== '-':
3358 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3359 else 'but the formats are incompatible for simultaneous download' if merger
.available
3360 else 'but ffmpeg is not installed')
3361 self
.report_warning(
3362 f
'You have requested downloading multiple formats to stdout {reason}. '
3363 'The formats will be streamed one after the other')
3364 fname
= temp_filename
3365 for f
in info_dict
['requested_formats']:
3366 new_info
= dict(info_dict
)
3367 del new_info
['requested_formats']
3369 if temp_filename
!= '-':
3370 fname
= prepend_extension(
3371 correct_ext(temp_filename
, new_info
['ext']),
3372 'f%s' % f
['format_id'], new_info
['ext'])
3373 if not self
._ensure
_dir
_exists
(fname
):
3375 f
['filepath'] = fname
3376 downloaded
.append(fname
)
3377 partial_success
, real_download
= self
.dl(fname
, new_info
)
3378 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3379 success
= success
and partial_success
3381 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3382 info_dict
['__postprocessors'].append(merger
)
3383 info_dict
['__files_to_merge'] = downloaded
3384 # Even if there were no downloads, it is being merged only now
3385 info_dict
['__real_download'] = True
3387 for file in downloaded
:
3388 files_to_move
[file] = None
3390 # Just a single file
3391 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3392 if dl_filename
is None or dl_filename
== temp_filename
:
3393 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3394 # So we should try to resume the download
3395 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3396 info_dict
['__real_download'] = real_download
3398 self
.report_file_already_downloaded(dl_filename
)
3400 dl_filename
= dl_filename
or temp_filename
3401 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3403 except network_exceptions
as err
:
3404 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
3406 except OSError as err
:
3407 raise UnavailableVideoError(err
)
3408 except (ContentTooShortError
, ) as err
:
3409 self
.report_error(f
'content too short (expected {err.expected} bytes and served {err.downloaded})')
3412 self
._raise
_pending
_errors
(info_dict
)
3413 if success
and full_filename
!= '-':
3417 fixup_policy
= self
.params
.get('fixup')
3418 vid
= info_dict
['id']
3420 if fixup_policy
in ('ignore', 'never'):
3422 elif fixup_policy
== 'warn':
3424 elif fixup_policy
!= 'force':
3425 assert fixup_policy
in ('detect_or_warn', None)
3426 if not info_dict
.get('__real_download'):
3429 def ffmpeg_fixup(cndn
, msg
, cls
):
3430 if not (do_fixup
and cndn
):
3432 elif do_fixup
== 'warn':
3433 self
.report_warning(f
'{vid}: {msg}')
3437 info_dict
['__postprocessors'].append(pp
)
3439 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3441 stretched_ratio
= info_dict
.get('stretched_ratio')
3442 ffmpeg_fixup(stretched_ratio
not in (1, None),
3443 f
'Non-uniform pixel ratio {stretched_ratio}',
3444 FFmpegFixupStretchedPP
)
3446 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3447 downloader
= downloader
.FD_NAME
if downloader
else None
3449 ext
= info_dict
.get('ext')
3450 postprocessed_by_ffmpeg
= info_dict
.get('requested_formats') or any((
3451 isinstance(pp
, FFmpegVideoConvertorPP
)
3452 and resolve_recode_mapping(ext
, pp
.mapping
)[0] not in (ext
, None)
3453 ) for pp
in self
._pps
['post_process'])
3455 if not postprocessed_by_ffmpeg
:
3456 ffmpeg_fixup(fd
!= FFmpegFD
and ext
== 'm4a'
3457 and info_dict
.get('container') == 'm4a_dash',
3458 'writing DASH m4a. Only some players support this container',
3460 ffmpeg_fixup(downloader
== 'hlsnative' and not self
.params
.get('hls_use_mpegts')
3461 or info_dict
.get('is_live') and self
.params
.get('hls_use_mpegts') is None,
3462 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3464 ffmpeg_fixup(info_dict
.get('is_live') and downloader
== 'dashsegments',
3465 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3467 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3468 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP
)
3472 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3473 except PostProcessingError
as err
:
3474 self
.report_error('Postprocessing: %s' % str(err
))
3477 for ph
in self
._post
_hooks
:
3478 ph(info_dict
['filepath'])
3479 except Exception as err
:
3480 self
.report_error('post hooks: %s' % str(err
))
3482 info_dict
['__write_download_archive'] = True
3484 assert info_dict
is original_infodict
# Make sure the info_dict was modified in-place
3485 if self
.params
.get('force_write_download_archive'):
3486 info_dict
['__write_download_archive'] = True
3487 check_max_downloads()
3489 def __download_wrapper(self
, func
):
3490 @functools.wraps(func
)
3491 def wrapper(*args
, **kwargs
):
3493 res
= func(*args
, **kwargs
)
3494 except UnavailableVideoError
as e
:
3495 self
.report_error(e
)
3496 except DownloadCancelled
as e
:
3497 self
.to_screen(f
'[info] {e}')
3498 if not self
.params
.get('break_per_url'):
3500 self
._num
_downloads
= 0
3502 if self
.params
.get('dump_single_json', False):
3503 self
.post_extract(res
)
3504 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3507 def download(self
, url_list
):
3508 """Download a given list of URLs."""
3509 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3510 outtmpl
= self
.params
['outtmpl']['default']
3511 if (len(url_list
) > 1
3513 and '%' not in outtmpl
3514 and self
.params
.get('max_downloads') != 1):
3515 raise SameFileError(outtmpl
)
3517 for url
in url_list
:
3518 self
.__download
_wrapper
(self
.extract_info
)(
3519 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3521 return self
._download
_retcode
3523 def download_with_info_file(self
, info_filename
):
3524 with contextlib
.closing(fileinput
.FileInput(
3525 [info_filename
], mode
='r',
3526 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3527 # FileInput doesn't have a read method, we can't call json.load
3528 infos
= [self
.sanitize_info(info
, self
.params
.get('clean_infojson', True))
3529 for info
in variadic(json
.loads('\n'.join(f
)))]
3532 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3533 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3534 if not isinstance(e
, EntryNotInPlaylist
):
3535 self
.to_stderr('\r')
3536 webpage_url
= info
.get('webpage_url')
3537 if webpage_url
is None:
3539 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3540 self
.download([webpage_url
])
3541 return self
._download
_retcode
3544 def sanitize_info(info_dict
, remove_private_keys
=False):
3545 ''' Sanitize the infodict for converting to json '''
3546 if info_dict
is None:
3548 info_dict
.setdefault('epoch', int(time
.time()))
3549 info_dict
.setdefault('_type', 'video')
3550 info_dict
.setdefault('_version', {
3551 'version': __version__
,
3552 'current_git_head': current_git_head(),
3553 'release_git_head': RELEASE_GIT_HEAD
,
3554 'repository': ORIGIN
,
3557 if remove_private_keys
:
3558 reject
= lambda k
, v
: v
is None or k
.startswith('__') or k
in {
3559 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3560 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3561 'playlist_autonumber',
3564 reject
= lambda k
, v
: False
3567 if isinstance(obj
, dict):
3568 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3569 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3570 return list(map(filter_fn
, obj
))
3571 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3576 return filter_fn(info_dict
)
3579 def filter_requested_info(info_dict
, actually_filter
=True):
3580 ''' Alias of sanitize_info for backward compatibility '''
3581 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3583 def _delete_downloaded_files(self
, *files_to_delete
, info
={}, msg
=None):
3584 for filename
in set(filter(None, files_to_delete
)):
3586 self
.to_screen(msg
% filename
)
3590 self
.report_warning(f
'Unable to delete file {filename}')
3591 if filename
in info
.get('__files_to_move', []): # NB: Delete even if None
3592 del info
['__files_to_move'][filename
]
3595 def post_extract(info_dict
):
3596 def actual_post_extract(info_dict
):
3597 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3598 for video_dict
in info_dict
.get('entries', {}):
3599 actual_post_extract(video_dict
or {})
3602 post_extractor
= info_dict
.pop('__post_extractor', None) or (lambda: {})
3603 info_dict
.update(post_extractor())
3605 actual_post_extract(info_dict
or {})
3607 def run_pp(self
, pp
, infodict
):
3608 files_to_delete
= []
3609 if '__files_to_move' not in infodict
:
3610 infodict
['__files_to_move'] = {}
3612 files_to_delete
, infodict
= pp
.run(infodict
)
3613 except PostProcessingError
as e
:
3614 # Must be True and not 'only_download'
3615 if self
.params
.get('ignoreerrors') is True:
3616 self
.report_error(e
)
3620 if not files_to_delete
:
3622 if self
.params
.get('keepvideo', False):
3623 for f
in files_to_delete
:
3624 infodict
['__files_to_move'].setdefault(f
, '')
3626 self
._delete
_downloaded
_files
(
3627 *files_to_delete
, info
=infodict
, msg
='Deleting original file %s (pass -k to keep)')
3630 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3632 self
._forceprint
(key
, info
)
3633 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3634 info
= self
.run_pp(pp
, info
)
3637 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3638 info
= dict(ie_info
)
3639 info
['__files_to_move'] = files_to_move
or {}
3641 info
= self
.run_all_pps(key
, info
)
3642 except PostProcessingError
as err
:
3643 msg
= f
'Preprocessing: {err}'
3644 info
.setdefault('__pending_error', msg
)
3645 self
.report_error(msg
, is_error
=False)
3646 return info
, info
.pop('__files_to_move', None)
3648 def post_process(self
, filename
, info
, files_to_move
=None):
3649 """Run all the postprocessors on the given file."""
3650 info
['filepath'] = filename
3651 info
['__files_to_move'] = files_to_move
or {}
3652 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3653 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3654 del info
['__files_to_move']
3655 return self
.run_all_pps('after_move', info
)
3657 def _make_archive_id(self
, info_dict
):
3658 video_id
= info_dict
.get('id')
3661 # Future-proof against any change in case
3662 # and backwards compatibility with prior versions
3663 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3664 if extractor
is None:
3665 url
= str_or_none(info_dict
.get('url'))
3668 # Try to find matching extractor for the URL and take its ie_key
3669 for ie_key
, ie
in self
._ies
.items():
3670 if ie
.suitable(url
):
3675 return make_archive_id(extractor
, video_id
)
3677 def in_download_archive(self
, info_dict
):
3678 if not self
.archive
:
3681 vid_ids
= [self
._make
_archive
_id
(info_dict
)]
3682 vid_ids
.extend(info_dict
.get('_old_archive_ids') or [])
3683 return any(id_
in self
.archive
for id_
in vid_ids
)
3685 def record_download_archive(self
, info_dict
):
3686 fn
= self
.params
.get('download_archive')
3689 vid_id
= self
._make
_archive
_id
(info_dict
)
3692 self
.write_debug(f
'Adding to archive: {vid_id}')
3693 if is_path_like(fn
):
3694 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3695 archive_file
.write(vid_id
+ '\n')
3696 self
.archive
.add(vid_id
)
3699 def format_resolution(format
, default
='unknown'):
3700 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3702 if format
.get('resolution') is not None:
3703 return format
['resolution']
3704 if format
.get('width') and format
.get('height'):
3705 return '%dx%d' % (format
['width'], format
['height'])
3706 elif format
.get('height'):
3707 return '%sp' % format
['height']
3708 elif format
.get('width'):
3709 return '%dx?' % format
['width']
3712 def _list_format_headers(self
, *headers
):
3713 if self
.params
.get('listformats_table', True) is not False:
3714 return [self
._format
_out
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3717 def _format_note(self
, fdict
):
3719 if fdict
.get('ext') in ['f4f', 'f4m']:
3720 res
+= '(unsupported)'
3721 if fdict
.get('language'):
3724 res
+= '[%s]' % fdict
['language']
3725 if fdict
.get('format_note') is not None:
3728 res
+= fdict
['format_note']
3729 if fdict
.get('tbr') is not None:
3732 res
+= '%4dk' % fdict
['tbr']
3733 if fdict
.get('container') is not None:
3736 res
+= '%s container' % fdict
['container']
3737 if (fdict
.get('vcodec') is not None
3738 and fdict
.get('vcodec') != 'none'):
3741 res
+= fdict
['vcodec']
3742 if fdict
.get('vbr') is not None:
3744 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3746 if fdict
.get('vbr') is not None:
3747 res
+= '%4dk' % fdict
['vbr']
3748 if fdict
.get('fps') is not None:
3751 res
+= '%sfps' % fdict
['fps']
3752 if fdict
.get('acodec') is not None:
3755 if fdict
['acodec'] == 'none':
3758 res
+= '%-5s' % fdict
['acodec']
3759 elif fdict
.get('abr') is not None:
3763 if fdict
.get('abr') is not None:
3764 res
+= '@%3dk' % fdict
['abr']
3765 if fdict
.get('asr') is not None:
3766 res
+= ' (%5dHz)' % fdict
['asr']
3767 if fdict
.get('filesize') is not None:
3770 res
+= format_bytes(fdict
['filesize'])
3771 elif fdict
.get('filesize_approx') is not None:
3774 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3777 def _get_formats(self
, info_dict
):
3778 if info_dict
.get('formats') is None:
3779 if info_dict
.get('url') and info_dict
.get('_type', 'video') == 'video':
3782 return info_dict
['formats']
3784 def render_formats_table(self
, info_dict
):
3785 formats
= self
._get
_formats
(info_dict
)
3788 if not self
.params
.get('listformats_table', True) is not False:
3791 format_field(f
, 'format_id'),
3792 format_field(f
, 'ext'),
3793 self
.format_resolution(f
),
3794 self
._format
_note
(f
)
3795 ] for f
in formats
if (f
.get('preference') or 0) >= -1000]
3796 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3798 def simplified_codec(f
, field
):
3799 assert field
in ('acodec', 'vcodec')
3800 codec
= f
.get(field
)
3803 elif codec
!= 'none':
3804 return '.'.join(codec
.split('.')[:4])
3806 if field
== 'vcodec' and f
.get('acodec') == 'none':
3808 elif field
== 'acodec' and f
.get('vcodec') == 'none':
3810 return self
._format
_out
('audio only' if field
== 'vcodec' else 'video only',
3811 self
.Styles
.SUPPRESS
)
3813 delim
= self
._format
_out
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3816 self
._format
_out
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3817 format_field(f
, 'ext'),
3818 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3819 format_field(f
, 'fps', '\t%d', func
=round),
3820 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3821 format_field(f
, 'audio_channels', '\t%s'),
3823 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
)
3824 or format_field(f
, 'filesize_approx', '≈\t%s', func
=format_bytes
)
3825 or format_field(try_call(lambda: format_bytes(int(info_dict
['duration'] * f
['tbr'] * (1024 / 8)))),
3826 None, self
._format
_out
('~\t%s', self
.Styles
.SUPPRESS
))),
3827 format_field(f
, 'tbr', '\t%dk', func
=round),
3828 shorten_protocol_name(f
.get('protocol', '')),
3830 simplified_codec(f
, 'vcodec'),
3831 format_field(f
, 'vbr', '\t%dk', func
=round),
3832 simplified_codec(f
, 'acodec'),
3833 format_field(f
, 'abr', '\t%dk', func
=round),
3834 format_field(f
, 'asr', '\t%s', func
=format_decimal_suffix
),
3835 join_nonempty(format_field(f
, 'language', '[%s]'), join_nonempty(
3836 self
._format
_out
('UNSUPPORTED', self
.Styles
.BAD_FORMAT
) if f
.get('ext') in ('f4f', 'f4m') else None,
3837 (self
._format
_out
('Maybe DRM', self
.Styles
.WARNING
) if f
.get('has_drm') == 'maybe'
3838 else self
._format
_out
('DRM', self
.Styles
.BAD_FORMAT
) if f
.get('has_drm') else None),
3839 format_field(f
, 'format_note'),
3840 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3841 delim
=', '), delim
=' '),
3842 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3843 header_line
= self
._list
_format
_headers
(
3844 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3845 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3847 return render_table(
3848 header_line
, table
, hide_empty
=True,
3849 delim
=self
._format
_out
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3851 def render_thumbnails_table(self
, info_dict
):
3852 thumbnails
= list(info_dict
.get('thumbnails') or [])
3855 return render_table(
3856 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3857 [[t
.get('id'), t
.get('width') or 'unknown', t
.get('height') or 'unknown', t
['url']] for t
in thumbnails
])
3859 def render_subtitles_table(self
, video_id
, subtitles
):
3860 def _row(lang
, formats
):
3861 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3862 if len(set(names
)) == 1:
3863 names
= [] if names
[0] == 'unknown' else names
[:1]
3864 return [lang
, ', '.join(names
), ', '.join(exts
)]
3868 return render_table(
3869 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3870 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3873 def __list_table(self
, video_id
, name
, func
, *args
):
3876 self
.to_screen(f
'{video_id} has no {name}')
3878 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3879 self
.to_stdout(table
)
3881 def list_formats(self
, info_dict
):
3882 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3884 def list_thumbnails(self
, info_dict
):
3885 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3887 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3888 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3890 def print_debug_header(self
):
3891 if not self
.params
.get('verbose'):
3894 from . import _IN_CLI
# Must be delayed import
3896 # These imports can be slow. So import them only as needed
3897 from .extractor
.extractors
import _LAZY_LOADER
3898 from .extractor
.extractors
import (
3899 _PLUGIN_CLASSES
as plugin_ies
,
3900 _PLUGIN_OVERRIDES
as plugin_ie_overrides
3903 def get_encoding(stream
):
3904 ret
= str(getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__))
3905 additional_info
= []
3906 if os
.environ
.get('TERM', '').lower() == 'dumb':
3907 additional_info
.append('dumb')
3908 if not supports_terminal_sequences(stream
):
3909 from .utils
import WINDOWS_VT_MODE
# Must be imported locally
3910 additional_info
.append('No VT' if WINDOWS_VT_MODE
is False else 'No ANSI')
3912 ret
= f
'{ret} ({",".join(additional_info)})'
3915 encoding_str
= 'Encodings: locale %s, fs %s, pref %s, %s' % (
3916 locale
.getpreferredencoding(),
3917 sys
.getfilesystemencoding(),
3918 self
.get_encoding(),
3920 f
'{key} {get_encoding(stream)}' for key
, stream
in self
._out
_files
.items_
3921 if stream
is not None and key
!= 'console')
3924 logger
= self
.params
.get('logger')
3926 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3927 write_debug(encoding_str
)
3929 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
3930 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3932 source
= detect_variant()
3933 if VARIANT
not in (None, 'pip'):
3936 write_debug(join_nonempty(
3937 f
'{REPOSITORY.rpartition("/")[2]} version',
3938 _make_label(ORIGIN
, CHANNEL
.partition('@')[2] or __version__
, __version__
),
3939 f
'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD
else '',
3940 '' if source
== 'unknown' else f
'({source})',
3941 '' if _IN_CLI
else 'API' if klass
== YoutubeDL
else f
'API:{self.__module__}.{klass.__qualname__}',
3945 write_debug(f
'params: {self.params}')
3947 if not _LAZY_LOADER
:
3948 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3949 write_debug('Lazy loading extractors is forcibly disabled')
3951 write_debug('Lazy loading extractors is disabled')
3952 if self
.params
['compat_opts']:
3953 write_debug('Compatibility options: %s' % ', '.join(self
.params
['compat_opts']))
3955 if current_git_head():
3956 write_debug(f
'Git HEAD: {current_git_head()}')
3957 write_debug(system_identifier())
3959 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
3960 ffmpeg_features
= {key for key, val in ffmpeg_features.items() if val}
3962 exe_versions
['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features
))
3964 exe_versions
['rtmpdump'] = rtmpdump_version()
3965 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3966 exe_str
= ', '.join(
3967 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3969 write_debug('exe versions: %s' % exe_str
)
3971 from .compat
.compat_utils
import get_package_info
3972 from .dependencies
import available_dependencies
3974 write_debug('Optional libraries: %s' % (', '.join(sorted({
3975 join_nonempty(*get_package_info(m
)) for m
in available_dependencies
.values()
3978 write_debug(f
'Proxy map: {self.proxies}')
3979 write_debug(f
'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
3980 for plugin_type
, plugins
in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}
.items():
3981 display_list
= ['%s%s' % (
3982 klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3983 for name
, klass
in plugins
.items()]
3984 if plugin_type
== 'Extractor':
3985 display_list
.extend(f
'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3986 for parent
, plugins
in plugin_ie_overrides
.items())
3987 if not display_list
:
3989 write_debug(f
'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3991 plugin_dirs
= plugin_directories()
3993 write_debug(f
'Plugin directories: {plugin_dirs}')
3996 if False and self
.params
.get('call_home'):
3997 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode()
3998 write_debug('Public IP address: %s' % ipaddr
)
3999 latest_version
= self
.urlopen(
4000 'https://yt-dl.org/latest/version').read().decode()
4001 if version_tuple(latest_version
) > version_tuple(__version__
):
4002 self
.report_warning(
4003 'You are using an outdated version (newest version: %s)! '
4004 'See https://yt-dl.org/update if you need help updating.' %
4007 @functools.cached_property
4009 """Global proxy configuration"""
4010 opts_proxy
= self
.params
.get('proxy')
4011 if opts_proxy
is not None:
4012 if opts_proxy
== '':
4013 opts_proxy
= '__noproxy__'
4014 proxies
= {'all': opts_proxy}
4016 proxies
= urllib
.request
.getproxies()
4017 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4018 if 'http' in proxies
and 'https' not in proxies
:
4019 proxies
['https'] = proxies
['http']
4023 @functools.cached_property
4024 def cookiejar(self
):
4025 """Global cookiejar instance"""
4026 return load_cookies(
4027 self
.params
.get('cookiefile'), self
.params
.get('cookiesfrombrowser'), self
)
4032 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4034 self
.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4035 handler
= self
._request
_director
.handlers
['Urllib']
4036 return handler
._get
_instance
(cookiejar
=self
.cookiejar
, proxies
=self
.proxies
)
4038 def urlopen(self
, req
):
4039 """ Start an HTTP download """
4040 if isinstance(req
, str):
4042 elif isinstance(req
, urllib
.request
.Request
):
4043 self
.deprecation_warning(
4044 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4045 'Use yt_dlp.networking.common.Request instead.')
4046 req
= urllib_req_to_req(req
)
4047 assert isinstance(req
, Request
)
4049 # compat: Assume user:pass url params are basic auth
4050 url
, basic_auth_header
= extract_basic_auth(req
.url
)
4051 if basic_auth_header
:
4052 req
.headers
['Authorization'] = basic_auth_header
4053 req
.url
= sanitize_url(url
)
4055 clean_proxies(proxies
=req
.proxies
, headers
=req
.headers
)
4056 clean_headers(req
.headers
)
4059 return self
._request
_director
.send(req
)
4060 except NoSupportingHandlers
as e
:
4061 for ue
in e
.unsupported_errors
:
4062 # FIXME: This depends on the order of errors.
4063 if not (ue
.handler
and ue
.msg
):
4065 if ue
.handler
.RH_KEY
== 'Urllib' and 'unsupported url scheme: "file"' in ue
.msg
.lower():
4067 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4068 'Use --enable-file-urls to enable at your own risk.', cause
=ue
) from ue
4069 if 'unsupported proxy type: "https"' in ue
.msg
.lower():
4071 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
4074 re
.match(r
'unsupported url scheme: "wss?"', ue
.msg
.lower())
4075 and 'websockets' not in self
._request
_director
.handlers
4078 'This request requires WebSocket support. '
4079 'Ensure one of the following dependencies are installed: websockets',
4082 except SSLError
as e
:
4083 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e
):
4084 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause
=e
) from e
4085 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e
):
4087 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4088 'Try using --legacy-server-connect', cause
=e
) from e
4090 except HTTPError
as e
: # TODO: Remove in a future release
4091 raise _CompatHTTPError(e
) from e
4093 def build_request_director(self
, handlers
, preferences
=None):
4094 logger
= _YDLLogger(self
)
4095 headers
= self
.params
['http_headers'].copy()
4096 proxies
= self
.proxies
.copy()
4097 clean_headers(headers
)
4098 clean_proxies(proxies
, headers
)
4100 director
= RequestDirector(logger
=logger
, verbose
=self
.params
.get('debug_printtraffic'))
4101 for handler
in handlers
:
4102 director
.add_handler(handler(
4105 cookiejar
=self
.cookiejar
,
4107 prefer_system_certs
='no-certifi' in self
.params
['compat_opts'],
4108 verify
=not self
.params
.get('nocheckcertificate'),
4109 **traverse_obj(self
.params
, {
4110 'verbose': 'debug_printtraffic',
4111 'source_address': 'source_address',
4112 'timeout': 'socket_timeout',
4113 'legacy_ssl_support': 'legacyserverconnect',
4114 'enable_file_urls': 'enable_file_urls',
4116 'client_certificate': 'client_certificate',
4117 'client_certificate_key': 'client_certificate_key',
4118 'client_certificate_password': 'client_certificate_password',
4122 director
.preferences
.update(preferences
or [])
4123 if 'prefer-legacy-http-handler' in self
.params
['compat_opts']:
4124 director
.preferences
.add(lambda rh
, _
: 500 if rh
.RH_KEY
== 'Urllib' else 0)
4127 def encode(self
, s
):
4128 if isinstance(s
, bytes):
4129 return s
# Already encoded
4132 return s
.encode(self
.get_encoding())
4133 except UnicodeEncodeError as err
:
4134 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
4137 def get_encoding(self
):
4138 encoding
= self
.params
.get('encoding')
4139 if encoding
is None:
4140 encoding
= preferredencoding()
4143 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
4144 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4145 if overwrite
is None:
4146 overwrite
= self
.params
.get('overwrites', True)
4147 if not self
.params
.get('writeinfojson'):
4150 self
.write_debug(f
'Skipping writing {label} infojson')
4152 elif not self
._ensure
_dir
_exists
(infofn
):
4154 elif not overwrite
and os
.path
.exists(infofn
):
4155 self
.to_screen(f
'[info] {label.title()} metadata is already present')
4158 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
4160 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
4163 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
4166 def _write_description(self
, label
, ie_result
, descfn
):
4167 ''' Write description and returns True = written, False = skip, None = error '''
4168 if not self
.params
.get('writedescription'):
4171 self
.write_debug(f
'Skipping writing {label} description')
4173 elif not self
._ensure
_dir
_exists
(descfn
):
4175 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
4176 self
.to_screen(f
'[info] {label.title()} description is already present')
4177 elif ie_result
.get('description') is None:
4178 self
.to_screen(f
'[info] There\'s no {label} description to write')
4182 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
4183 with open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
4184 descfile
.write(ie_result
['description'])
4186 self
.report_error(f
'Cannot write {label} description file {descfn}')
4190 def _write_subtitles(self
, info_dict
, filename
):
4191 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4193 subtitles
= info_dict
.get('requested_subtitles')
4194 if not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
4195 # subtitles download errors are already managed as troubles in relevant IE
4196 # that way it will silently go on when used with unsupporting IE
4199 self
.to_screen('[info] There are no subtitles for the requested languages')
4201 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
4202 if not sub_filename_base
:
4203 self
.to_screen('[info] Skipping writing video subtitles')
4206 for sub_lang
, sub_info
in subtitles
.items():
4207 sub_format
= sub_info
['ext']
4208 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
4209 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
4210 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
4212 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4213 sub_info
['filepath'] = existing_sub
4214 ret
.append((existing_sub
, sub_filename_final
))
4217 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
4218 if sub_info
.get('data') is not None:
4220 # Use newline='' to prevent conversion of newline characters
4221 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4222 with open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
4223 subfile
.write(sub_info
['data'])
4224 sub_info
['filepath'] = sub_filename
4225 ret
.append((sub_filename
, sub_filename_final
))
4228 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
4232 sub_copy
= sub_info
.copy()
4233 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
4234 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
4235 sub_info
['filepath'] = sub_filename
4236 ret
.append((sub_filename
, sub_filename_final
))
4237 except (DownloadError
, ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
4238 msg
= f
'Unable to download video subtitles for {sub_lang!r}: {err}'
4239 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
4240 if not self
.params
.get('ignoreerrors'):
4241 self
.report_error(msg
)
4242 raise DownloadError(msg
)
4243 self
.report_warning(msg
)
4246 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
4247 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
4248 write_all
= self
.params
.get('write_all_thumbnails', False)
4249 thumbnails
, ret
= [], []
4250 if write_all
or self
.params
.get('writethumbnail', False):
4251 thumbnails
= info_dict
.get('thumbnails') or []
4253 self
.to_screen(f
'[info] There are no {label} thumbnails to download')
4255 multiple
= write_all
and len(thumbnails
) > 1
4257 if thumb_filename_base
is None:
4258 thumb_filename_base
= filename
4259 if thumbnails
and not thumb_filename_base
:
4260 self
.write_debug(f
'Skipping writing {label} thumbnail')
4263 if thumbnails
and not self
._ensure
_dir
_exists
(filename
):
4266 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
4267 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
4268 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
4269 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
4270 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
4272 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
4274 self
.to_screen('[info] %s is already present' % (
4275 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize())
4276 t
['filepath'] = existing_thumb
4277 ret
.append((existing_thumb
, thumb_filename_final
))
4279 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
4281 uf
= self
.urlopen(Request(t
['url'], headers
=t
.get('http_headers', {})))
4282 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
4283 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
4284 shutil
.copyfileobj(uf
, thumbf
)
4285 ret
.append((thumb_filename
, thumb_filename_final
))
4286 t
['filepath'] = thumb_filename
4287 except network_exceptions
as err
:
4288 if isinstance(err
, HTTPError
) and err
.status
== 404:
4289 self
.to_screen(f
'[info] {thumb_display_id.title()} does not exist')
4291 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
4293 if ret
and not write_all
: