26 from .cache
import Cache
27 from .compat
import functools
, urllib
# isort: split
28 from .compat
import compat_os_name
, compat_shlex_quote
, urllib_req_to_req
29 from .cookies
import LenientSimpleCookie
, load_cookies
30 from .downloader
import FFmpegFD
, get_suitable_downloader
, shorten_protocol_name
31 from .downloader
.rtmp
import rtmpdump_version
32 from .extractor
import gen_extractor_classes
, get_info_extractor
33 from .extractor
.common
import UnsupportedURLIE
34 from .extractor
.openload
import PhantomJSwrapper
35 from .minicurses
import format_text
36 from .networking
import HEADRequest
, Request
, RequestDirector
37 from .networking
.common
import _REQUEST_HANDLERS
, _RH_PREFERENCES
38 from .networking
.exceptions
import (
46 from .plugins
import directories
as plugin_directories
47 from .postprocessor
import _PLUGIN_CLASSES
as plugin_pps
48 from .postprocessor
import (
50 FFmpegFixupDuplicateMoovPP
,
51 FFmpegFixupDurationPP
,
54 FFmpegFixupStretchedPP
,
55 FFmpegFixupTimestampPP
,
58 FFmpegVideoConvertorPP
,
59 MoveFilesAfterDownloadPP
,
62 from .postprocessor
.ffmpeg
import resolve_mapping
as resolve_recode_mapping
63 from .update
import REPOSITORY
, current_git_head
, detect_variant
95 UnavailableVideoError
,
113 format_decimal_suffix
,
127 orderedSet_from_options
,
131 remove_terminal_sequences
,
140 supports_terminal_sequences
,
150 windows_enable_vt_mode
,
154 from .utils
._utils
import _YDLLogger
155 from .utils
.networking
import (
161 from .version
import CHANNEL
, RELEASE_GIT_HEAD
, VARIANT
, __version__
163 if compat_os_name
== 'nt':
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
202 netrc_cmd: Use a shell command to get credentials
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
205 no_warnings: Do not print out anything for warnings.
206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
209 For compatibility, a single list is also accepted
210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
212 forcejson: Force printing info_dict as JSON.
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
219 format: Video format code. see "FORMAT SELECTION" for more details.
220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
229 format_sort_force: Force the given format_sort. see "Sorting Formats"
231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
233 allow_multiple_video_streams: Allow multiple video streams to be merged
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
237 check_formats Whether to test if the formats are downloadable.
238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
240 or None (check only if requested by extractor)
241 paths: Dictionary of output paths. The allowed keys are 'home'
242 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
243 outtmpl: Dictionary of templates for output names. Allowed keys
244 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
245 For compatibility with youtube-dl, a single string can also be used
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
249 windowsfilenames: Force the filenames to be windows compatible
250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
255 allowed_extractors: List of regexes to match against extractor names that are allowed
256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
259 playlist_items: Specific indices of playlist to download.
260 playlistrandom: Download playlist items in random order.
261 lazy_playlist: Process playlist entries as they are received.
262 matchtitle: Download only matching titles.
263 rejecttitle: Reject downloads for matching titles.
264 logger: Log messages to a logging.Logger instance.
265 logtostderr: Print everything to stderr instead of stdout.
266 consoletitle: Display progress in console window's titlebar.
267 writedescription: Write the video description to a .description file
268 writeinfojson: Write the video description to a .info.json file
269 clean_infojson: Remove internal metadata from the infojson
270 getcomments: Extract video comments. This will not be written to disk
271 unless writeinfojson is also given
272 writeannotations: Write the video annotations to a .annotations.xml file
273 writethumbnail: Write the thumbnail image to a file
274 allow_playlist_files: Whether to write playlists' description, infojson etc
275 also to disk when using the 'write*' options
276 write_all_thumbnails: Write all thumbnail formats to files
277 writelink: Write an internet shortcut file, depending on the
278 current platform (.url/.webloc/.desktop)
279 writeurllink: Write a Windows internet shortcut file (.url)
280 writewebloclink: Write a macOS internet shortcut file (.webloc)
281 writedesktoplink: Write a Linux internet shortcut file (.desktop)
282 writesubtitles: Write the video subtitles to a file
283 writeautomaticsub: Write the automatically generated subtitles to a file
284 listsubtitles: Lists all available subtitles for the video
285 subtitlesformat: The format code for subtitles
286 subtitleslangs: List of languages of the subtitles to download (can be regex).
287 The list may contain "all" to refer to all the available
288 subtitles. The language can be prefixed with a "-" to
289 exclude it from the requested languages, e.g. ['all', '-live_chat']
290 keepvideo: Keep the video file after post-processing
291 daterange: A utils.DateRange object, download only if the upload_date is in the range.
292 skip_download: Skip the actual download of the video file
293 cachedir: Location of the cache files in the filesystem.
294 False to disable filesystem cache.
295 noplaylist: Download single video instead of a playlist if in doubt.
296 age_limit: An integer representing the user's age in years.
297 Unsuitable videos for the given age are skipped.
298 min_views: An integer representing the minimum view count the video
299 must have in order to not be skipped.
300 Videos without view count information are always
301 downloaded. None for no limit.
302 max_views: An integer representing the maximum view count.
303 Videos that are more popular than that are not
305 Videos without view count information are always
306 downloaded. None for no limit.
307 download_archive: A set, or the name of a file where all downloads are recorded.
308 Videos already present in the file are not downloaded again.
309 break_on_existing: Stop the download process after attempting to download a
310 file that is in the archive.
311 break_per_url: Whether break_on_reject and break_on_existing
312 should act on each input URL as opposed to for the entire queue
313 cookiefile: File name or text stream from where cookies should be read and dumped to
314 cookiesfrombrowser: A tuple containing the name of the browser, the profile
315 name/path from where cookies are loaded, the name of the keyring,
316 and the container name, e.g. ('chrome', ) or
317 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
318 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
319 support RFC 5746 secure renegotiation
320 nocheckcertificate: Do not verify SSL certificates
321 client_certificate: Path to client certificate file in PEM format. May include the private key
322 client_certificate_key: Path to private key file for client certificate
323 client_certificate_password: Password for client certificate private key, if encrypted.
324 If not provided and the key is encrypted, yt-dlp will ask interactively
325 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
326 (Only supported by some extractors)
327 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
328 http_headers: A dictionary of custom headers to be used for all requests
329 proxy: URL of the proxy server to use
330 geo_verification_proxy: URL of the proxy to use for IP address verification
331 on geo-restricted sites.
332 socket_timeout: Time to wait for unresponsive hosts, in seconds
333 bidi_workaround: Work around buggy terminals without bidirectional text
334 support, using fridibi
335 debug_printtraffic:Print out sent and received HTTP traffic
336 default_search: Prepend this string if an input url is not valid.
337 'auto' for elaborate guessing
338 encoding: Use this encoding instead of the system-specified.
339 extract_flat: Whether to resolve and process url_results further
340 * False: Always process. Default for API
341 * True: Never process
342 * 'in_playlist': Do not process inside playlist/multi_video
343 * 'discard': Always process, but don't return the result
344 from inside playlist/multi_video
345 * 'discard_in_playlist': Same as "discard", but only for
346 playlists (not multi_video). Default for CLI
347 wait_for_video: If given, wait for scheduled streams to become available.
348 The value should be a tuple containing the range
349 (min_secs, max_secs) to wait between retries
350 postprocessors: A list of dictionaries, each with an entry
351 * key: The name of the postprocessor. See
352 yt_dlp/postprocessor/__init__.py for a list.
353 * when: When to run the postprocessor. Allowed values are
354 the entries of utils.POSTPROCESS_WHEN
355 Assumed to be 'post_process' if not given
356 progress_hooks: A list of functions that get called on download
357 progress, with a dictionary with the entries
358 * status: One of "downloading", "error", or "finished".
359 Check this first and ignore unknown values.
360 * info_dict: The extracted info_dict
362 If status is one of "downloading", or "finished", the
363 following properties may also be present:
364 * filename: The final filename (always present)
365 * tmpfilename: The filename we're currently writing to
366 * downloaded_bytes: Bytes on disk
367 * total_bytes: Size of the whole file, None if unknown
368 * total_bytes_estimate: Guess of the eventual file size,
370 * elapsed: The number of seconds since download started.
371 * eta: The estimated time in seconds, None if unknown
372 * speed: The download speed in bytes/second, None if
374 * fragment_index: The counter of the currently
375 downloaded video fragment.
376 * fragment_count: The number of fragments (= individual
377 files that will be merged)
379 Progress hooks are guaranteed to be called at least once
380 (with status "finished") if the download is successful.
381 postprocessor_hooks: A list of functions that get called on postprocessing
382 progress, with a dictionary with the entries
383 * status: One of "started", "processing", or "finished".
384 Check this first and ignore unknown values.
385 * postprocessor: Name of the postprocessor
386 * info_dict: The extracted info_dict
388 Progress hooks are guaranteed to be called at least twice
389 (with status "started" and "finished") if the processing is successful.
390 merge_output_format: "/" separated list of extensions to use when merging formats.
391 final_ext: Expected final extension; used to detect when the file was
392 already downloaded and converted
393 fixup: Automatically correct known faults of the file.
395 - "never": do nothing
396 - "warn": only emit a warning
397 - "detect_or_warn": check whether we can do anything
398 about it, warn otherwise (default)
399 source_address: Client-side IP address to bind to.
400 sleep_interval_requests: Number of seconds to sleep between requests
402 sleep_interval: Number of seconds to sleep before each download when
403 used alone or a lower bound of a range for randomized
404 sleep before each download (minimum possible number
405 of seconds to sleep) when used along with
407 max_sleep_interval:Upper bound of a range for randomized sleep before each
408 download (maximum possible number of seconds to sleep).
409 Must only be used along with sleep_interval.
410 Actual sleep time will be a random float from range
411 [sleep_interval; max_sleep_interval].
412 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
413 listformats: Print an overview of available video formats and exit.
414 list_thumbnails: Print a table of all thumbnails and exit.
415 match_filter: A function that gets called for every video with the signature
416 (info_dict, *, incomplete: bool) -> Optional[str]
417 For backward compatibility with youtube-dl, the signature
418 (info_dict) -> Optional[str] is also allowed.
419 - If it returns a message, the video is ignored.
420 - If it returns None, the video is downloaded.
421 - If it returns utils.NO_DEFAULT, the user is interactively
422 asked whether to download the video.
423 - Raise utils.DownloadCancelled(msg) to abort remaining
424 downloads when a video is rejected.
425 match_filter_func in utils.py is one example for this.
426 color: A Dictionary with output stream names as keys
427 and their respective color policy as values.
428 Can also just be a single color policy,
429 in which case it applies to all outputs.
430 Valid stream names are 'stdout' and 'stderr'.
431 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
432 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
435 Two-letter ISO 3166-2 country code that will be used for
436 explicit geographic restriction bypassing via faking
437 X-Forwarded-For HTTP header
439 IP range in CIDR notation that will be used similarly to
441 external_downloader: A dictionary of protocol keys and the executable of the
442 external downloader to use for it. The allowed protocols
443 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
444 Set the value to 'native' to use the native downloader
445 compat_opts: Compatibility options. See "Differences in default behavior".
446 The following options do not work when used through the API:
447 filename, abort-on-error, multistreams, no-live-chat, format-sort
448 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
449 Refer __init__.py for their implementation
450 progress_template: Dictionary of templates for progress outputs.
451 Allowed keys are 'download', 'postprocess',
452 'download-title' (console title) and 'postprocess-title'.
453 The template is mapped on a dictionary with keys 'progress' and 'info'
454 retry_sleep_functions: Dictionary of functions that takes the number of attempts
455 as argument and returns the time to sleep in seconds.
456 Allowed keys are 'http', 'fragment', 'file_access'
457 download_ranges: A callback function that gets called for every video with
458 the signature (info_dict, ydl) -> Iterable[Section].
459 Only the returned sections will be downloaded.
460 Each Section is a dict with the following keys:
461 * start_time: Start time of the section in seconds
462 * end_time: End time of the section in seconds
463 * title: Section title (Optional)
464 * index: Section number (Optional)
465 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
466 noprogress: Do not print the progress bar
467 live_from_start: Whether to download livestreams videos from the start
469 The following parameters are not used by YoutubeDL itself, they are used by
470 the downloader (see yt_dlp/downloader/common.py):
471 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
472 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
473 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
474 external_downloader_args, concurrent_fragment_downloads.
476 The following options are used by the post processors:
477 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
478 to the binary or its containing directory.
479 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
480 and a list of additional command-line arguments for the
481 postprocessor/executable. The dict can also have "PP+EXE" keys
482 which are used when the given exe is used by the given PP.
483 Use 'default' as the name for arguments to passed to all PP
484 For compatibility with youtube-dl, a single list of args
487 The following options are used by the extractors:
488 extractor_retries: Number of times to retry for known errors (default: 3)
489 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
490 hls_split_discontinuity: Split HLS playlists to different formats at
491 discontinuities such as ad breaks (default: False)
492 extractor_args: A dictionary of arguments to be passed to the extractors.
493 See "EXTRACTOR ARGUMENTS" for details.
494 E.g. {'youtube': {'skip': ['dash', 'hls']}}
495 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
497 The following options are deprecated and may be removed in the future:
499 break_on_reject: Stop the download process when encountering a video that
500 has been filtered out.
501 - `raise DownloadCancelled(msg)` in match_filter instead
502 force_generic_extractor: Force downloader to use the generic extractor
503 - Use allowed_extractors = ['generic', 'default']
504 playliststart: - Use playlist_items
505 Playlist item to start at.
506 playlistend: - Use playlist_items
507 Playlist item to end at.
508 playlistreverse: - Use playlist_items
509 Download playlist items in reverse order.
510 forceurl: - Use forceprint
511 Force printing final URL.
512 forcetitle: - Use forceprint
513 Force printing title.
514 forceid: - Use forceprint
516 forcethumbnail: - Use forceprint
517 Force printing thumbnail URL.
518 forcedescription: - Use forceprint
519 Force printing description.
520 forcefilename: - Use forceprint
521 Force printing final filename.
522 forceduration: - Use forceprint
523 Force printing duration.
524 allsubtitles: - Use subtitleslangs = ['all']
525 Downloads all the subtitles of the video
526 (requires writesubtitles or writeautomaticsub)
527 include_ads: - Doesn't work
529 call_home: - Not implemented
530 Boolean, true iff we are allowed to contact the
531 yt-dlp servers for debugging.
532 post_hooks: - Register a custom postprocessor
533 A list of functions that get called as the final step
534 for each video file, after all postprocessors have been
535 called. The filename will be passed as the only argument.
536 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
537 Use the native HLS downloader instead of ffmpeg/avconv
538 if True, otherwise use ffmpeg/avconv if False, otherwise
539 use downloader suggested by extractor if None.
540 prefer_ffmpeg: - avconv support is deprecated
541 If False, use avconv instead of ffmpeg if both are available,
542 otherwise prefer ffmpeg.
543 youtube_include_dash_manifest: - Use extractor_args
544 If True (default), DASH manifests and related
545 data will be downloaded and processed by extractor.
546 You can reduce network I/O by disabling it if you don't
547 care about DASH. (only for youtube)
548 youtube_include_hls_manifest: - Use extractor_args
549 If True (default), HLS manifests and related
550 data will be downloaded and processed by extractor.
551 You can reduce network I/O by disabling it if you don't
552 care about HLS. (only for youtube)
553 no_color: Same as `color='no_color'`
554 no_overwrites: Same as `overwrites=False`
558 'width', 'height', 'asr', 'audio_channels', 'fps',
559 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
560 'timestamp', 'release_timestamp',
561 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
562 'average_rating', 'comment_count', 'age_limit',
563 'start_time', 'end_time',
564 'chapter_number', 'season_number', 'episode_number',
565 'track_number', 'disc_number', 'release_year',
569 # NB: Keep in sync with the docstring of extractor/common.py
570 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
571 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
572 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
573 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
574 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
575 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
576 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
578 _format_selection_exts
= {
579 'audio': set(MEDIA_EXTENSIONS
.common_audio
),
580 'video': set(MEDIA_EXTENSIONS
.common_video
+ ('3gp', )),
581 'storyboards': set(MEDIA_EXTENSIONS
.storyboards
),
584 def __init__(self
, params
=None, auto_init
=True):
585 """Create a FileDownloader object with the given options.
586 @param auto_init Whether to load the default extractors and print header (if verbose).
587 Set to 'no_verbose_header' to not print the header
593 self
._ies
_instances
= {}
594 self
._pps
= {k: [] for k in POSTPROCESS_WHEN}
595 self
._printed
_messages
= set()
596 self
._first
_webpage
_request
= True
597 self
._post
_hooks
= []
598 self
._progress
_hooks
= []
599 self
._postprocessor
_hooks
= []
600 self
._download
_retcode
= 0
601 self
._num
_downloads
= 0
603 self
._playlist
_level
= 0
604 self
._playlist
_urls
= set()
605 self
.cache
= Cache(self
)
606 self
.__header
_cookies
= []
608 stdout
= sys
.stderr
if self
.params
.get('logtostderr') else sys
.stdout
609 self
._out
_files
= Namespace(
612 screen
=sys
.stderr
if self
.params
.get('quiet') else stdout
,
613 console
=None if compat_os_name
== 'nt' else next(
614 filter(supports_terminal_sequences
, (sys
.stderr
, sys
.stdout
)), None)
618 windows_enable_vt_mode()
619 except Exception as e
:
620 self
.write_debug(f
'Failed to enable VT mode: {e}')
622 if self
.params
.get('no_color'):
623 if self
.params
.get('color') is not None:
624 self
.params
.setdefault('_warnings', []).append(
625 'Overwriting params from "color" with "no_color"')
626 self
.params
['color'] = 'no_color'
628 term_allow_color
= os
.environ
.get('TERM', '').lower() != 'dumb'
630 def process_color_policy(stream
):
631 stream_name
= {sys.stdout: 'stdout', sys.stderr: 'stderr'}
[stream
]
632 policy
= traverse_obj(self
.params
, ('color', (stream_name
, None), {str}
), get_all
=False)
633 if policy
in ('auto', None):
634 return term_allow_color
and supports_terminal_sequences(stream
)
635 assert policy
in ('always', 'never', 'no_color'), policy
636 return {'always': True, 'never': False}
.get(policy
, policy
)
638 self
._allow
_colors
= Namespace(**{
639 name
: process_color_policy(stream
)
640 for name
, stream
in self
._out
_files
.items_
if name
!= 'console'
643 # The code is left like this to be reused for future deprecations
644 MIN_SUPPORTED
, MIN_RECOMMENDED
= (3, 7), (3, 7)
645 current_version
= sys
.version_info
[:2]
646 if current_version
< MIN_RECOMMENDED
:
647 msg
= ('Support for Python version %d.%d has been deprecated. '
648 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
649 '\n You will no longer receive updates on this version')
650 if current_version
< MIN_SUPPORTED
:
651 msg
= 'Python version %d.%d is no longer supported'
652 self
.deprecated_feature(
653 f
'{msg}! Please update to Python %d.%d or above' % (*current_version
, *MIN_RECOMMENDED
))
655 if self
.params
.get('allow_unplayable_formats'):
657 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
658 'This is a developer option intended for debugging. \n'
659 ' If you experience any issues while using this option, '
660 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
662 if self
.params
.get('bidi_workaround', False):
665 master
, slave
= pty
.openpty()
666 width
= shutil
.get_terminal_size().columns
667 width_args
= [] if width
is None else ['-w', str(width
)]
668 sp_kwargs
= {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
670 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
672 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
673 self
._output
_channel
= os
.fdopen(master
, 'rb')
674 except OSError as ose
:
675 if ose
.errno
== errno
.ENOENT
:
677 'Could not find fribidi executable, ignoring --bidi-workaround. '
678 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
682 self
.params
['compat_opts'] = set(self
.params
.get('compat_opts', ()))
683 self
.params
['http_headers'] = HTTPHeaderDict(std_headers
, self
.params
.get('http_headers'))
684 self
._load
_cookies
(self
.params
['http_headers'].get('Cookie')) # compat
685 self
.params
['http_headers'].pop('Cookie', None)
686 self
._request
_director
= self
.build_request_director(_REQUEST_HANDLERS
.values(), _RH_PREFERENCES
)
688 if auto_init
and auto_init
!= 'no_verbose_header':
689 self
.print_debug_header()
691 def check_deprecated(param
, option
, suggestion
):
692 if self
.params
.get(param
) is not None:
693 self
.report_warning(f
'{option} is deprecated. Use {suggestion} instead')
697 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
698 if self
.params
.get('geo_verification_proxy') is None:
699 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
701 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
702 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
703 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
705 for msg
in self
.params
.get('_warnings', []):
706 self
.report_warning(msg
)
707 for msg
in self
.params
.get('_deprecation_warnings', []):
708 self
.deprecated_feature(msg
)
710 if 'list-formats' in self
.params
['compat_opts']:
711 self
.params
['listformats_table'] = False
713 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
714 # nooverwrites was unnecessarily changed to overwrites
715 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
716 # This ensures compatibility with both keys
717 self
.params
['overwrites'] = not self
.params
['nooverwrites']
718 elif self
.params
.get('overwrites') is None:
719 self
.params
.pop('overwrites', None)
721 self
.params
['nooverwrites'] = not self
.params
['overwrites']
723 if self
.params
.get('simulate') is None and any((
724 self
.params
.get('list_thumbnails'),
725 self
.params
.get('listformats'),
726 self
.params
.get('listsubtitles'),
728 self
.params
['simulate'] = 'list_only'
730 self
.params
.setdefault('forceprint', {})
731 self
.params
.setdefault('print_to_file', {})
733 # Compatibility with older syntax
734 if not isinstance(params
['forceprint'], dict):
735 self
.params
['forceprint'] = {'video': params['forceprint']}
738 self
.add_default_info_extractors()
740 if (sys
.platform
!= 'win32'
741 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
742 and not self
.params
.get('restrictfilenames', False)):
743 # Unicode filesystem API will throw errors (#1474, #13027)
745 'Assuming --restrict-filenames since file system encoding '
746 'cannot encode all characters. '
747 'Set the LC_ALL environment variable to fix this.')
748 self
.params
['restrictfilenames'] = True
750 self
._parse
_outtmpl
()
752 # Creating format selector here allows us to catch syntax errors before the extraction
753 self
.format_selector
= (
754 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
755 else self
.params
['format'] if callable(self
.params
['format'])
756 else self
.build_format_selector(self
.params
['format']))
759 'post_hooks': self
.add_post_hook
,
760 'progress_hooks': self
.add_progress_hook
,
761 'postprocessor_hooks': self
.add_postprocessor_hook
,
763 for opt
, fn
in hooks
.items():
764 for ph
in self
.params
.get(opt
, []):
767 for pp_def_raw
in self
.params
.get('postprocessors', []):
768 pp_def
= dict(pp_def_raw
)
769 when
= pp_def
.pop('when', 'post_process')
770 self
.add_post_processor(
771 get_postprocessor(pp_def
.pop('key'))(self
, **pp_def
),
774 def preload_download_archive(fn
):
775 """Preload the archive, if any is specified"""
779 elif not is_path_like(fn
):
782 self
.write_debug(f
'Loading archive file {fn!r}')
784 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
785 for line
in archive_file
:
786 archive
.add(line
.strip())
787 except OSError as ioe
:
788 if ioe
.errno
!= errno
.ENOENT
:
792 self
.archive
= preload_download_archive(self
.params
.get('download_archive'))
794 def warn_if_short_id(self
, argv
):
795 # short YouTube ID starting with dash?
797 i
for i
, a
in enumerate(argv
)
798 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
802 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
803 + ['--'] + [argv
[i
] for i
in idxs
]
806 'Long argument string detected. '
807 'Use -- to separate parameters and URLs, like this:\n%s' %
808 args_to_str(correct_argv
))
810 def add_info_extractor(self
, ie
):
811 """Add an InfoExtractor object to the end of the list."""
813 self
._ies
[ie_key
] = ie
814 if not isinstance(ie
, type):
815 self
._ies
_instances
[ie_key
] = ie
816 ie
.set_downloader(self
)
818 def get_info_extractor(self
, ie_key
):
820 Get an instance of an IE with name ie_key, it will try to get one from
821 the _ies list, if there's no instance it will create a new one and add
822 it to the extractor list.
824 ie
= self
._ies
_instances
.get(ie_key
)
826 ie
= get_info_extractor(ie_key
)()
827 self
.add_info_extractor(ie
)
830 def add_default_info_extractors(self
):
832 Add the InfoExtractors returned by gen_extractors to the end of the list
834 all_ies
= {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
835 all_ies
['end'] = UnsupportedURLIE()
837 ie_names
= orderedSet_from_options(
838 self
.params
.get('allowed_extractors', ['default']), {
839 'all': list(all_ies
),
840 'default': [name
for name
, ie
in all_ies
.items() if ie
._ENABLED
],
842 except re
.error
as e
:
843 raise ValueError(f
'Wrong regex for allowed_extractors: {e.pattern}')
844 for name
in ie_names
:
845 self
.add_info_extractor(all_ies
[name
])
846 self
.write_debug(f
'Loaded {len(ie_names)} extractors')
848 def add_post_processor(self
, pp
, when
='post_process'):
849 """Add a PostProcessor object to the end of the chain."""
850 assert when
in POSTPROCESS_WHEN
, f
'Invalid when={when}'
851 self
._pps
[when
].append(pp
)
852 pp
.set_downloader(self
)
854 def add_post_hook(self
, ph
):
855 """Add the post hook"""
856 self
._post
_hooks
.append(ph
)
858 def add_progress_hook(self
, ph
):
859 """Add the download progress hook"""
860 self
._progress
_hooks
.append(ph
)
862 def add_postprocessor_hook(self
, ph
):
863 """Add the postprocessing progress hook"""
864 self
._postprocessor
_hooks
.append(ph
)
865 for pps
in self
._pps
.values():
867 pp
.add_progress_hook(ph
)
869 def _bidi_workaround(self
, message
):
870 if not hasattr(self
, '_output_channel'):
873 assert hasattr(self
, '_output_process')
874 assert isinstance(message
, str)
875 line_count
= message
.count('\n') + 1
876 self
._output
_process
.stdin
.write((message
+ '\n').encode())
877 self
._output
_process
.stdin
.flush()
878 res
= ''.join(self
._output
_channel
.readline().decode()
879 for _
in range(line_count
))
880 return res
[:-len('\n')]
882 def _write_string(self
, message
, out
=None, only_once
=False):
884 if message
in self
._printed
_messages
:
886 self
._printed
_messages
.add(message
)
887 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
889 def to_stdout(self
, message
, skip_eol
=False, quiet
=None):
890 """Print message to stdout"""
891 if quiet
is not None:
892 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
893 'Use "YoutubeDL.to_screen" instead')
894 if skip_eol
is not False:
895 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
896 'Use "YoutubeDL.to_screen" instead')
897 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.out
)
899 def to_screen(self
, message
, skip_eol
=False, quiet
=None, only_once
=False):
900 """Print message to screen if not in quiet mode"""
901 if self
.params
.get('logger'):
902 self
.params
['logger'].debug(message
)
904 if (self
.params
.get('quiet') if quiet
is None else quiet
) and not self
.params
.get('verbose'):
907 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
908 self
._out
_files
.screen
, only_once
=only_once
)
910 def to_stderr(self
, message
, only_once
=False):
911 """Print message to stderr"""
912 assert isinstance(message
, str)
913 if self
.params
.get('logger'):
914 self
.params
['logger'].error(message
)
916 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.error
, only_once
=only_once
)
918 def _send_console_code(self
, code
):
919 if compat_os_name
== 'nt' or not self
._out
_files
.console
:
921 self
._write
_string
(code
, self
._out
_files
.console
)
923 def to_console_title(self
, message
):
924 if not self
.params
.get('consoletitle', False):
926 message
= remove_terminal_sequences(message
)
927 if compat_os_name
== 'nt':
928 if ctypes
.windll
.kernel32
.GetConsoleWindow():
929 # c_wchar_p() might not be necessary if `message` is
930 # already of type unicode()
931 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
933 self
._send
_console
_code
(f
'\033]0;{message}\007')
935 def save_console_title(self
):
936 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
938 self
._send
_console
_code
('\033[22;0t') # Save the title on stack
940 def restore_console_title(self
):
941 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
943 self
._send
_console
_code
('\033[23;0t') # Restore the title from stack
946 self
.save_console_title()
949 def save_cookies(self
):
950 if self
.params
.get('cookiefile') is not None:
951 self
.cookiejar
.save()
953 def __exit__(self
, *args
):
954 self
.restore_console_title()
959 self
._request
_director
.close()
961 def trouble(self
, message
=None, tb
=None, is_error
=True):
962 """Determine action to take when a download problem appears.
964 Depending on if the downloader has been configured to ignore
965 download errors or not, this method may throw an exception or
966 not when errors are found, after printing the message.
968 @param tb If given, is additional traceback information
969 @param is_error Whether to raise error according to ignorerrors
971 if message
is not None:
972 self
.to_stderr(message
)
973 if self
.params
.get('verbose'):
975 if sys
.exc_info()[0]: # if .trouble has been called from an except block
977 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
978 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
979 tb
+= encode_compat_str(traceback
.format_exc())
981 tb_data
= traceback
.format_list(traceback
.extract_stack())
982 tb
= ''.join(tb_data
)
987 if not self
.params
.get('ignoreerrors'):
988 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
989 exc_info
= sys
.exc_info()[1].exc_info
991 exc_info
= sys
.exc_info()
992 raise DownloadError(message
, exc_info
)
993 self
._download
_retcode
= 1
997 EMPHASIS
='light blue',
1002 BAD_FORMAT
='light red',
1004 SUPPRESS
='light black',
1007 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
1010 original_text
= text
1011 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1012 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', None) or 'ascii'
1013 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
1014 if fallback
is not None and text
!= original_text
:
1016 return format_text(text
, f
) if allow_colors
is True else text
if fallback
is None else fallback
1018 def _format_out(self
, *args
, **kwargs
):
1019 return self
._format
_text
(self
._out
_files
.out
, self
._allow
_colors
.out
, *args
, **kwargs
)
1021 def _format_screen(self
, *args
, **kwargs
):
1022 return self
._format
_text
(self
._out
_files
.screen
, self
._allow
_colors
.screen
, *args
, **kwargs
)
1024 def _format_err(self
, *args
, **kwargs
):
1025 return self
._format
_text
(self
._out
_files
.error
, self
._allow
_colors
.error
, *args
, **kwargs
)
1027 def report_warning(self
, message
, only_once
=False):
1029 Print the message to stderr, it will be prefixed with 'WARNING:'
1030 If stderr is a tty file the 'WARNING:' will be colored
1032 if self
.params
.get('logger') is not None:
1033 self
.params
['logger'].warning(message
)
1035 if self
.params
.get('no_warnings'):
1037 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
1039 def deprecation_warning(self
, message
, *, stacklevel
=0):
1040 deprecation_warning(
1041 message
, stacklevel
=stacklevel
+ 1, printer
=self
.report_error
, is_error
=False)
1043 def deprecated_feature(self
, message
):
1044 if self
.params
.get('logger') is not None:
1045 self
.params
['logger'].warning(f
'Deprecated Feature: {message}')
1046 self
.to_stderr(f
'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1048 def report_error(self
, message
, *args
, **kwargs
):
1050 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1051 in red if stderr is a tty file.
1053 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
1055 def write_debug(self
, message
, only_once
=False):
1056 '''Log debug message or Print message to stderr'''
1057 if not self
.params
.get('verbose', False):
1059 message
= f
'[debug] {message}'
1060 if self
.params
.get('logger'):
1061 self
.params
['logger'].debug(message
)
1063 self
.to_stderr(message
, only_once
)
1065 def report_file_already_downloaded(self
, file_name
):
1066 """Report file has already been fully downloaded."""
1068 self
.to_screen('[download] %s has already been downloaded' % file_name
)
1069 except UnicodeEncodeError:
1070 self
.to_screen('[download] The file has already been downloaded')
1072 def report_file_delete(self
, file_name
):
1073 """Report that existing file will be deleted."""
1075 self
.to_screen('Deleting existing file %s' % file_name
)
1076 except UnicodeEncodeError:
1077 self
.to_screen('Deleting existing file')
1079 def raise_no_formats(self
, info
, forced
=False, *, msg
=None):
1080 has_drm
= info
.get('_has_drm')
1081 ignored
, expected
= self
.params
.get('ignore_no_formats_error'), bool(msg
)
1082 msg
= msg
or has_drm
and 'This video is DRM protected' or 'No video formats found!'
1083 if forced
or not ignored
:
1084 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
1085 expected
=has_drm
or ignored
or expected
)
1087 self
.report_warning(msg
)
1089 def parse_outtmpl(self
):
1090 self
.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1091 self
._parse
_outtmpl
()
1092 return self
.params
['outtmpl']
1094 def _parse_outtmpl(self
):
1096 if self
.params
.get('restrictfilenames'): # Remove spaces in the default template
1097 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
1099 outtmpl
= self
.params
.setdefault('outtmpl', {})
1100 if not isinstance(outtmpl
, dict):
1101 self
.params
['outtmpl'] = outtmpl
= {'default': outtmpl}
1102 outtmpl
.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}
)
1104 def get_output_path(self
, dir_type
='', filename
=None):
1105 paths
= self
.params
.get('paths', {})
1106 assert isinstance(paths
, dict), '"paths" parameter must be a dictionary'
1107 path
= os
.path
.join(
1108 expand_path(paths
.get('home', '').strip()),
1109 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
1111 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1114 def _outtmpl_expandpath(outtmpl
):
1115 # expand_path translates '%%' into '%' and '$$' into '$'
1116 # correspondingly that is not what we want since we need to keep
1117 # '%%' intact for template dict substitution step. Working around
1118 # with boundary-alike separator hack.
1119 sep
= ''.join(random
.choices(string
.ascii_letters
, k
=32))
1120 outtmpl
= outtmpl
.replace('%%', f
'%{sep}%').replace('$$', f
'${sep}$')
1122 # outtmpl should be expand_path'ed before template dict substitution
1123 # because meta fields may contain env variables we don't want to
1124 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1125 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1126 return expand_path(outtmpl
).replace(sep
, '')
1129 def escape_outtmpl(outtmpl
):
1130 ''' Escape any remaining strings like %s, %abc% etc. '''
1132 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1133 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1137 def validate_outtmpl(cls
, outtmpl
):
1138 ''' @return None or Exception object '''
1140 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljhqBUDS]'),
1141 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1142 cls
._outtmpl
_expandpath
(outtmpl
))
1144 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1146 except ValueError as err
:
1150 def _copy_infodict(info_dict
):
1151 info_dict
= dict(info_dict
)
1152 info_dict
.pop('__postprocessors', None)
1153 info_dict
.pop('__pending_error', None)
1156 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1157 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1158 @param sanitize Whether to sanitize the output as a filename.
1159 For backward compatibility, a function can also be passed
1162 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1164 info_dict
= self
._copy
_infodict
(info_dict
)
1165 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1166 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1167 if info_dict
.get('duration', None) is not None
1169 info_dict
['autonumber'] = int(self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
)
1170 info_dict
['video_autonumber'] = self
._num
_videos
1171 if info_dict
.get('resolution') is None:
1172 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1174 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1175 # of %(field)s to %(field)0Nd for backward compatibility
1176 field_size_compat_map
= {
1177 'playlist_index': number_of_digits(info_dict
.get('__last_playlist_index') or 0),
1178 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1179 'autonumber': self
.params
.get('autonumber_size') or 5,
1183 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1188 # Field is of the form key1.key2...
1189 # where keys (except first) can be string, int, slice or "{field, ...}"
1190 FIELD_INNER_RE
= r
'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1191 FIELD_RE
= r
'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1192 'inner': FIELD_INNER_RE
,
1193 'field': rf
'\w*(?:\.{FIELD_INNER_RE})*'
1195 MATH_FIELD_RE
= rf
'(?:{FIELD_RE}|-?{NUMBER_RE})'
1196 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
1197 INTERNAL_FORMAT_RE
= re
.compile(rf
'''(?xs)
1199 (?P<fields>{FIELD_RE})
1200 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1201 (?:>(?P<strf_format>.+?))?
1203 (?P<alternate>(?<!\\),[^|&)]+)?
1204 (?:&(?P<replacement>.*?))?
1205 (?:\|(?P<default>.*?))?
1208 def _traverse_infodict(fields
):
1209 fields
= [f
for x
in re
.split(r
'\.({.+?})\.?', fields
)
1210 for f
in ([x
] if x
.startswith('{') else x
.split('.'))]
1212 if fields
and not fields
[i
]:
1215 for i
, f
in enumerate(fields
):
1216 if not f
.startswith('{'):
1218 assert f
.endswith('}'), f
'No closing brace for {f} in {fields}'
1219 fields
[i
] = {k: k.split('.') for k in f[1:-1].split(',')}
1221 return traverse_obj(info_dict
, fields
, is_user_input
=True, traverse_string
=True)
1223 def get_value(mdict
):
1225 value
= _traverse_infodict(mdict
['fields'])
1228 value
= float_or_none(value
)
1229 if value
is not None:
1232 offset_key
= mdict
['maths']
1234 value
= float_or_none(value
)
1238 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1239 offset_key
).group(0)
1240 offset_key
= offset_key
[len(item
):]
1241 if operator
is None:
1242 operator
= MATH_FUNCTIONS
[item
]
1244 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1245 offset
= float_or_none(item
)
1247 offset
= float_or_none(_traverse_infodict(item
))
1249 value
= operator(value
, multiplier
* offset
)
1250 except (TypeError, ZeroDivisionError):
1253 # Datetime formatting
1254 if mdict
['strf_format']:
1255 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1257 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1258 if sanitize
and value
== '':
1262 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1264 def filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames')):
1265 return sanitize_filename(str(value
), restricted
=restricted
, is_id
=(
1266 bool(re
.search(r
'(^|[_.])id(\.|$)', key
))
1267 if 'filename-sanitization' in self
.params
['compat_opts']
1270 sanitizer
= sanitize
if callable(sanitize
) else filename_sanitizer
1271 sanitize
= bool(sanitize
)
1273 def _dumpjson_default(obj
):
1274 if isinstance(obj
, (set, LazyList
)):
1278 class _ReplacementFormatter(string
.Formatter
):
1279 def get_field(self
, field_name
, args
, kwargs
):
1280 if field_name
.isdigit():
1282 raise ValueError('Unsupported field')
1284 replacement_formatter
= _ReplacementFormatter()
1286 def create_key(outer_mobj
):
1287 if not outer_mobj
.group('has_key'):
1288 return outer_mobj
.group(0)
1289 key
= outer_mobj
.group('key')
1290 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1291 value
, replacement
, default
, last_field
= None, None, na
, ''
1293 mobj
= mobj
.groupdict()
1294 default
= mobj
['default'] if mobj
['default'] is not None else default
1295 value
= get_value(mobj
)
1296 last_field
, replacement
= mobj
['fields'], mobj
['replacement']
1297 if value
is None and mobj
['alternate']:
1298 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['remaining'][1:])
1302 if None not in (value
, replacement
):
1304 value
= replacement_formatter
.format(replacement
, value
)
1306 value
, default
= None, na
1308 fmt
= outer_mobj
.group('format')
1309 if fmt
== 's' and last_field
in field_size_compat_map
.keys() and isinstance(value
, int):
1310 fmt
= f
'0{field_size_compat_map[last_field]:d}d'
1312 flags
= outer_mobj
.group('conversion') or ''
1313 str_fmt
= f
'{fmt[:-1]}s'
1315 value
, fmt
= default
, 's'
1316 elif fmt
[-1] == 'l': # list
1317 delim
= '\n' if '#' in flags
else ', '
1318 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes)))), str_fmt
1319 elif fmt
[-1] == 'j': # json
1320 value
, fmt
= json
.dumps(
1321 value
, default
=_dumpjson_default
,
1322 indent
=4 if '#' in flags
else None, ensure_ascii
='+' not in flags
), str_fmt
1323 elif fmt
[-1] == 'h': # html
1324 value
, fmt
= escapeHTML(str(value
)), str_fmt
1325 elif fmt
[-1] == 'q': # quoted
1326 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1327 value
, fmt
= ' '.join(map(compat_shlex_quote
, value
)), str_fmt
1328 elif fmt
[-1] == 'B': # bytes
1329 value
= f
'%{str_fmt}'.encode() % str(value
).encode()
1330 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1331 elif fmt
[-1] == 'U': # unicode normalized
1332 value
, fmt
= unicodedata
.normalize(
1333 # "+" = compatibility equivalence, "#" = NFD
1334 'NF%s%s' % ('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1336 elif fmt
[-1] == 'D': # decimal suffix
1337 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1338 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1339 factor
=1024 if '#' in flags
else 1000)
1340 elif fmt
[-1] == 'S': # filename sanitization
1341 value
, fmt
= filename_sanitizer(last_field
, value
, restricted
='#' in flags
), str_fmt
1342 elif fmt
[-1] == 'c':
1344 value
= str(value
)[0]
1347 elif fmt
[-1] not in 'rsa': # numeric
1348 value
= float_or_none(value
)
1350 value
, fmt
= default
, 's'
1353 # If value is an object, sanitize might convert it to a string
1354 # So we convert it to repr first
1356 value
, fmt
= repr(value
), str_fmt
1357 elif fmt
[-1] == 'a':
1358 value
, fmt
= ascii(value
), str_fmt
1359 if fmt
[-1] in 'csra':
1360 value
= sanitizer(last_field
, value
)
1362 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1363 TMPL_DICT
[key
] = value
1364 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1366 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1368 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1369 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1370 return self
.escape_outtmpl(outtmpl
) % info_dict
1372 def _prepare_filename(self
, info_dict
, *, outtmpl
=None, tmpl_type
=None):
1373 assert None in (outtmpl
, tmpl_type
), 'outtmpl and tmpl_type are mutually exclusive'
1375 outtmpl
= self
.params
['outtmpl'].get(tmpl_type
or 'default', self
.params
['outtmpl']['default'])
1377 outtmpl
= self
._outtmpl
_expandpath
(outtmpl
)
1378 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1382 if tmpl_type
in ('', 'temp'):
1383 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1384 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1385 filename
= replace_extension(filename
, ext
, final_ext
)
1387 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1389 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1391 # https://github.com/blackjack4494/youtube-dlc/issues/85
1392 trim_file_name
= self
.params
.get('trim_file_name', False)
1394 no_ext
, *ext
= filename
.rsplit('.', 2)
1395 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1398 except ValueError as err
:
1399 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1402 def prepare_filename(self
, info_dict
, dir_type
='', *, outtmpl
=None, warn
=False):
1403 """Generate the output filename"""
1405 assert not dir_type
, 'outtmpl and dir_type are mutually exclusive'
1407 filename
= self
._prepare
_filename
(info_dict
, tmpl_type
=dir_type
, outtmpl
=outtmpl
)
1408 if not filename
and dir_type
not in ('', 'temp'):
1412 if not self
.params
.get('paths'):
1414 elif filename
== '-':
1415 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1416 elif os
.path
.isabs(filename
):
1417 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1418 if filename
== '-' or not filename
:
1421 return self
.get_output_path(dir_type
, filename
)
1423 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1424 """Returns None if the file should be downloaded"""
1425 _type
= 'video' if 'playlist-match-filter' in self
.params
['compat_opts'] else info_dict
.get('_type', 'video')
1426 assert incomplete
or _type
== 'video', 'Only video result can be considered complete'
1428 video_title
= info_dict
.get('title', info_dict
.get('id', 'entry'))
1431 if _type
in ('playlist', 'multi_video'):
1433 elif _type
in ('url', 'url_transparent') and not try_call(
1434 lambda: self
.get_info_extractor(info_dict
['ie_key']).is_single_video(info_dict
['url'])):
1437 if 'title' in info_dict
:
1438 # This can happen when we're just evaluating the playlist
1439 title
= info_dict
['title']
1440 matchtitle
= self
.params
.get('matchtitle', False)
1442 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1443 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1444 rejecttitle
= self
.params
.get('rejecttitle', False)
1446 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1447 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1449 date
= info_dict
.get('upload_date')
1450 if date
is not None:
1451 dateRange
= self
.params
.get('daterange', DateRange())
1452 if date
not in dateRange
:
1453 return f
'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1454 view_count
= info_dict
.get('view_count')
1455 if view_count
is not None:
1456 min_views
= self
.params
.get('min_views')
1457 if min_views
is not None and view_count
< min_views
:
1458 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1459 max_views
= self
.params
.get('max_views')
1460 if max_views
is not None and view_count
> max_views
:
1461 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1462 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1463 return 'Skipping "%s" because it is age restricted' % video_title
1465 match_filter
= self
.params
.get('match_filter')
1466 if match_filter
is None:
1472 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1474 # For backward compatibility
1475 ret
= None if incomplete
else match_filter(info_dict
)
1476 except DownloadCancelled
as err
:
1477 if err
.msg
is not NO_DEFAULT
:
1479 ret
, cancelled
= err
.msg
, err
1481 if ret
is NO_DEFAULT
:
1483 filename
= self
._format
_screen
(self
.prepare_filename(info_dict
), self
.Styles
.FILENAME
)
1484 reply
= input(self
._format
_screen
(
1485 f
'Download "{filename}"? (Y/n): ', self
.Styles
.EMPHASIS
)).lower().strip()
1486 if reply
in {'y', ''}
:
1490 raise type(cancelled
)(f
'Skipping {video_title}')
1491 return f
'Skipping {video_title}'
1494 if self
.in_download_archive(info_dict
):
1496 format_field(info_dict
, 'id', f
'{self._format_screen("%s", self.Styles.ID)}: '),
1497 format_field(info_dict
, 'title', f
'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1498 'has already been recorded in the archive'))
1499 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1502 reason
= check_filter()
1503 except DownloadCancelled
as e
:
1504 reason
, break_opt
, break_err
= e
.msg
, 'match_filter', type(e
)
1506 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1507 if reason
is not None:
1509 self
.to_screen('[download] ' + reason
)
1510 if self
.params
.get(break_opt
, False):
1515 def add_extra_info(info_dict
, extra_info
):
1516 '''Set the keys from extra_info in info dict if they are missing'''
1517 for key
, value
in extra_info
.items():
1518 info_dict
.setdefault(key
, value
)
1520 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1521 process
=True, force_generic_extractor
=False):
1523 Extract and return the information dictionary of the URL
1526 @param url URL to extract
1529 @param download Whether to download videos
1530 @param process Whether to resolve all unresolved references (URLs, playlist items).
1531 Must be True for download to work
1532 @param ie_key Use only the extractor with this key
1534 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1535 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1538 if extra_info
is None:
1541 if not ie_key
and force_generic_extractor
:
1545 ies
= {ie_key: self._ies[ie_key]}
if ie_key
in self
._ies
else {}
1549 for key
, ie
in ies
.items():
1550 if not ie
.suitable(url
):
1553 if not ie
.working():
1554 self
.report_warning('The program functionality for this site has been marked as broken, '
1555 'and will probably not work.')
1557 temp_id
= ie
.get_temp_id(url
)
1558 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': key}
):
1559 self
.to_screen(f
'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1560 'has already been recorded in the archive')
1561 if self
.params
.get('break_on_existing', False):
1562 raise ExistingVideoReached()
1564 return self
.__extract
_info
(url
, self
.get_info_extractor(key
), download
, extra_info
, process
)
1566 extractors_restricted
= self
.params
.get('allowed_extractors') not in (None, ['default'])
1567 self
.report_error(f
'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1568 tb
=False if extractors_restricted
else None)
1570 def _handle_extraction_exceptions(func
):
1571 @functools.wraps(func
)
1572 def wrapper(self
, *args
, **kwargs
):
1575 return func(self
, *args
, **kwargs
)
1576 except (DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1578 except ReExtractInfo
as e
:
1580 self
.to_screen(f
'{e}; Re-extracting data')
1582 self
.to_stderr('\r')
1583 self
.report_warning(f
'{e}; Re-extracting data')
1585 except GeoRestrictedError
as e
:
1588 msg
+= '\nThis video is available in %s.' % ', '.join(
1589 map(ISO3166Utils
.short2full
, e
.countries
))
1590 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1591 self
.report_error(msg
)
1592 except ExtractorError
as e
: # An error we somewhat expected
1593 self
.report_error(str(e
), e
.format_traceback())
1594 except Exception as e
:
1595 if self
.params
.get('ignoreerrors'):
1596 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1602 def _wait_for_video(self
, ie_result
={}):
1603 if (not self
.params
.get('wait_for_video')
1604 or ie_result
.get('_type', 'video') != 'video'
1605 or ie_result
.get('formats') or ie_result
.get('url')):
1608 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1613 full_msg
= f
'{msg}\n'
1614 if not self
.params
.get('noprogress'):
1615 full_msg
= msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r'
1618 self
.to_screen(full_msg
, skip_eol
=True)
1621 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1622 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1623 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1624 diff
= round(random
.uniform(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
), 0)
1625 self
.report_warning('Release time of video is not known')
1626 elif ie_result
and (diff
or 0) <= 0:
1627 self
.report_warning('Video should already be available according to extracted info')
1628 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1629 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1631 wait_till
= time
.time() + diff
1634 diff
= wait_till
- time
.time()
1637 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1638 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1640 except KeyboardInterrupt:
1642 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1643 except BaseException
as e
:
1644 if not isinstance(e
, ReExtractInfo
):
1648 def _load_cookies(self
, data
, *, autoscope
=True):
1649 """Loads cookies from a `Cookie` header
1651 This tries to work around the security vulnerability of passing cookies to every domain.
1652 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1654 @param data The Cookie header as string to load the cookies from
1655 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1656 If `True`, save cookies for later to be stored in the jar with a limited scope
1657 If a URL, save cookies in the jar with the domain of the URL
1659 for cookie
in LenientSimpleCookie(data
).values():
1660 if autoscope
and any(cookie
.values()):
1661 raise ValueError('Invalid syntax in Cookie Header')
1663 domain
= cookie
.get('domain') or ''
1664 expiry
= cookie
.get('expires')
1665 if expiry
== '': # 0 is valid
1667 prepared_cookie
= http
.cookiejar
.Cookie(
1668 cookie
.get('version') or 0, cookie
.key
, cookie
.value
, None, False,
1669 domain
, True, True, cookie
.get('path') or '', bool(cookie
.get('path')),
1670 cookie
.get('secure') or False, expiry
, False, None, None, {})
1673 self
.cookiejar
.set_cookie(prepared_cookie
)
1674 elif autoscope
is True:
1675 self
.deprecated_feature(
1676 'Passing cookies as a header is a potential security risk; '
1677 'they will be scoped to the domain of the downloaded urls. '
1678 'Please consider loading cookies from a file or browser instead.')
1679 self
.__header
_cookies
.append(prepared_cookie
)
1681 self
.report_warning(
1682 'The extractor result contains an unscoped cookie as an HTTP header. '
1683 f
'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1685 self
._apply
_header
_cookies
(autoscope
, [prepared_cookie
])
1687 self
.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1688 tb
=False, is_error
=False)
1690 def _apply_header_cookies(self
, url
, cookies
=None):
1691 """Applies stray header cookies to the provided url
1693 This loads header cookies and scopes them to the domain provided in `url`.
1694 While this is not ideal, it helps reduce the risk of them being sent
1695 to an unintended destination while mostly maintaining compatibility.
1697 parsed
= urllib
.parse
.urlparse(url
)
1698 if not parsed
.hostname
:
1701 for cookie
in map(copy
.copy
, cookies
or self
.__header
_cookies
):
1702 cookie
.domain
= f
'.{parsed.hostname}'
1703 self
.cookiejar
.set_cookie(cookie
)
1705 @_handle_extraction_exceptions
1706 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1707 self
._apply
_header
_cookies
(url
)
1710 ie_result
= ie
.extract(url
)
1711 except UserNotLive
as e
:
1713 if self
.params
.get('wait_for_video'):
1714 self
.report_warning(e
)
1715 self
._wait
_for
_video
()
1717 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1718 self
.report_warning(f
'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1720 if isinstance(ie_result
, list):
1721 # Backwards compatibility: old IE result format
1723 '_type': 'compat_list',
1724 'entries': ie_result
,
1726 if extra_info
.get('original_url'):
1727 ie_result
.setdefault('original_url', extra_info
['original_url'])
1728 self
.add_default_extra_info(ie_result
, ie
, url
)
1730 self
._wait
_for
_video
(ie_result
)
1731 return self
.process_ie_result(ie_result
, download
, extra_info
)
1735 def add_default_extra_info(self
, ie_result
, ie
, url
):
1737 self
.add_extra_info(ie_result
, {
1739 'original_url': url
,
1741 webpage_url
= ie_result
.get('webpage_url')
1743 self
.add_extra_info(ie_result
, {
1744 'webpage_url_basename': url_basename(webpage_url
),
1745 'webpage_url_domain': get_domain(webpage_url
),
1748 self
.add_extra_info(ie_result
, {
1749 'extractor': ie
.IE_NAME
,
1750 'extractor_key': ie
.ie_key(),
1753 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1755 Take the result of the ie(may be modified) and resolve all unresolved
1756 references (URLs, playlist items).
1758 It will also download the videos if 'download'.
1759 Returns the resolved ie_result.
1761 if extra_info
is None:
1763 result_type
= ie_result
.get('_type', 'video')
1765 if result_type
in ('url', 'url_transparent'):
1766 ie_result
['url'] = sanitize_url(
1767 ie_result
['url'], scheme
='http' if self
.params
.get('prefer_insecure') else 'https')
1768 if ie_result
.get('original_url') and not extra_info
.get('original_url'):
1769 extra_info
= {'original_url': ie_result['original_url'], **extra_info}
1771 extract_flat
= self
.params
.get('extract_flat', False)
1772 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1773 or extract_flat
is True):
1774 info_copy
= ie_result
.copy()
1775 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1776 if ie
and not ie_result
.get('id'):
1777 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1778 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1779 self
.add_extra_info(info_copy
, extra_info
)
1780 info_copy
, _
= self
.pre_process(info_copy
)
1781 self
._fill
_common
_fields
(info_copy
, False)
1782 self
.__forced
_printings
(info_copy
)
1783 self
._raise
_pending
_errors
(info_copy
)
1784 if self
.params
.get('force_write_download_archive', False):
1785 self
.record_download_archive(info_copy
)
1788 if result_type
== 'video':
1789 self
.add_extra_info(ie_result
, extra_info
)
1790 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1791 self
._raise
_pending
_errors
(ie_result
)
1792 additional_urls
= (ie_result
or {}).get('additional_urls')
1794 # TODO: Improve MetadataParserPP to allow setting a list
1795 if isinstance(additional_urls
, str):
1796 additional_urls
= [additional_urls
]
1798 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1799 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1800 ie_result
['additional_entries'] = [
1802 url
, download
, extra_info
=extra_info
,
1803 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1804 for url
in additional_urls
1807 elif result_type
== 'url':
1808 # We have to add extra_info to the results because it may be
1809 # contained in a playlist
1810 return self
.extract_info(
1811 ie_result
['url'], download
,
1812 ie_key
=ie_result
.get('ie_key'),
1813 extra_info
=extra_info
)
1814 elif result_type
== 'url_transparent':
1815 # Use the information from the embedding page
1816 info
= self
.extract_info(
1817 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1818 extra_info
=extra_info
, download
=False, process
=False)
1820 # extract_info may return None when ignoreerrors is enabled and
1821 # extraction failed with an error, don't crash and return early
1826 exempted_fields
= {'_type', 'url', 'ie_key'}
1827 if not ie_result
.get('section_end') and ie_result
.get('section_start') is None:
1828 # For video clips, the id etc of the clip extractor should be used
1829 exempted_fields |
= {'id', 'extractor', 'extractor_key'}
1831 new_result
= info
.copy()
1832 new_result
.update(filter_dict(ie_result
, lambda k
, v
: v
is not None and k
not in exempted_fields
))
1834 # Extracted info may not be a video result (i.e.
1835 # info.get('_type', 'video') != video) but rather an url or
1836 # url_transparent. In such cases outer metadata (from ie_result)
1837 # should be propagated to inner one (info). For this to happen
1838 # _type of info should be overridden with url_transparent. This
1839 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1840 if new_result
.get('_type') == 'url':
1841 new_result
['_type'] = 'url_transparent'
1843 return self
.process_ie_result(
1844 new_result
, download
=download
, extra_info
=extra_info
)
1845 elif result_type
in ('playlist', 'multi_video'):
1846 # Protect from infinite recursion due to recursively nested playlists
1847 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1848 webpage_url
= ie_result
.get('webpage_url') # Playlists maynot have webpage_url
1849 if webpage_url
and webpage_url
in self
._playlist
_urls
:
1851 '[download] Skipping already downloaded playlist: %s'
1852 % ie_result
.get('title') or ie_result
.get('id'))
1855 self
._playlist
_level
+= 1
1856 self
._playlist
_urls
.add(webpage_url
)
1857 self
._fill
_common
_fields
(ie_result
, False)
1858 self
._sanitize
_thumbnails
(ie_result
)
1860 return self
.__process
_playlist
(ie_result
, download
)
1862 self
._playlist
_level
-= 1
1863 if not self
._playlist
_level
:
1864 self
._playlist
_urls
.clear()
1865 elif result_type
== 'compat_list':
1866 self
.report_warning(
1867 'Extractor %s returned a compat_list result. '
1868 'It needs to be updated.' % ie_result
.get('extractor'))
1871 self
.add_extra_info(r
, {
1872 'extractor': ie_result
['extractor'],
1873 'webpage_url': ie_result
['webpage_url'],
1874 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1875 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1876 'extractor_key': ie_result
['extractor_key'],
1879 ie_result
['entries'] = [
1880 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1881 for r
in ie_result
['entries']
1885 raise Exception('Invalid result type: %s' % result_type
)
1887 def _ensure_dir_exists(self
, path
):
1888 return make_dir(path
, self
.report_error
)
1891 def _playlist_infodict(ie_result
, strict
=False, **kwargs
):
1893 'playlist_count': ie_result
.get('playlist_count'),
1894 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1895 'playlist_id': ie_result
.get('id'),
1896 'playlist_title': ie_result
.get('title'),
1897 'playlist_uploader': ie_result
.get('uploader'),
1898 'playlist_uploader_id': ie_result
.get('uploader_id'),
1903 if ie_result
.get('webpage_url'):
1905 'webpage_url': ie_result
['webpage_url'],
1906 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1907 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1911 'playlist_index': 0,
1912 '__last_playlist_index': max(ie_result
.get('requested_entries') or (0, 0)),
1913 'extractor': ie_result
['extractor'],
1914 'extractor_key': ie_result
['extractor_key'],
1917 def __process_playlist(self
, ie_result
, download
):
1918 """Process each entry in the playlist"""
1919 assert ie_result
['_type'] in ('playlist', 'multi_video')
1921 common_info
= self
._playlist
_infodict
(ie_result
, strict
=True)
1922 title
= common_info
.get('playlist') or '<Untitled>'
1923 if self
._match
_entry
(common_info
, incomplete
=True) is not None:
1925 self
.to_screen(f
'[download] Downloading {ie_result["_type"]}: {title}')
1927 all_entries
= PlaylistEntries(self
, ie_result
)
1928 entries
= orderedSet(all_entries
.get_requested_items(), lazy
=True)
1930 lazy
= self
.params
.get('lazy_playlist')
1932 resolved_entries
, n_entries
= [], 'N/A'
1933 ie_result
['requested_entries'], ie_result
['entries'] = None, None
1935 entries
= resolved_entries
= list(entries
)
1936 n_entries
= len(resolved_entries
)
1937 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
1938 if not ie_result
.get('playlist_count'):
1939 # Better to do this after potentially exhausting entries
1940 ie_result
['playlist_count'] = all_entries
.get_full_count()
1942 extra
= self
._playlist
_infodict
(ie_result
, n_entries
=int_or_none(n_entries
))
1943 ie_copy
= collections
.ChainMap(ie_result
, extra
)
1945 _infojson_written
= False
1946 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
1947 if write_playlist_files
and self
.params
.get('list_thumbnails'):
1948 self
.list_thumbnails(ie_result
)
1949 if write_playlist_files
and not self
.params
.get('simulate'):
1950 _infojson_written
= self
._write
_info
_json
(
1951 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
1952 if _infojson_written
is None:
1954 if self
._write
_description
('playlist', ie_result
,
1955 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1957 # TODO: This should be passed to ThumbnailsConvertor if necessary
1958 self
._write
_thumbnails
('playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1961 if self
.params
.get('playlistreverse') or self
.params
.get('playlistrandom'):
1962 self
.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once
=True)
1963 elif self
.params
.get('playlistreverse'):
1965 elif self
.params
.get('playlistrandom'):
1966 random
.shuffle(entries
)
1968 self
.to_screen(f
'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1969 f
'{format_field(ie_result, "playlist_count", " of %s")}')
1971 keep_resolved_entries
= self
.params
.get('extract_flat') != 'discard'
1972 if self
.params
.get('extract_flat') == 'discard_in_playlist':
1973 keep_resolved_entries
= ie_result
['_type'] != 'playlist'
1974 if keep_resolved_entries
:
1975 self
.write_debug('The information of all playlist entries will be held in memory')
1978 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1979 for i
, (playlist_index
, entry
) in enumerate(entries
):
1981 resolved_entries
.append((playlist_index
, entry
))
1985 entry
['__x_forwarded_for_ip'] = ie_result
.get('__x_forwarded_for_ip')
1986 if not lazy
and 'playlist-index' in self
.params
['compat_opts']:
1987 playlist_index
= ie_result
['requested_entries'][i
]
1989 entry_copy
= collections
.ChainMap(entry
, {
1991 'n_entries': int_or_none(n_entries
),
1992 'playlist_index': playlist_index
,
1993 'playlist_autonumber': i
+ 1,
1996 if self
._match
_entry
(entry_copy
, incomplete
=True) is not None:
1997 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1998 resolved_entries
[i
] = (playlist_index
, NO_DEFAULT
)
2001 self
.to_screen('[download] Downloading item %s of %s' % (
2002 self
._format
_screen
(i
+ 1, self
.Styles
.ID
), self
._format
_screen
(n_entries
, self
.Styles
.EMPHASIS
)))
2004 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, collections
.ChainMap({
2005 'playlist_index': playlist_index
,
2006 'playlist_autonumber': i
+ 1,
2008 if not entry_result
:
2010 if failures
>= max_failures
:
2012 f
'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2014 if keep_resolved_entries
:
2015 resolved_entries
[i
] = (playlist_index
, entry_result
)
2017 # Update with processed data
2018 ie_result
['entries'] = [e
for _
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2019 ie_result
['requested_entries'] = [i
for i
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2020 if ie_result
['requested_entries'] == try_call(lambda: list(range(1, ie_result
['playlist_count'] + 1))):
2021 # Do not set for full playlist
2022 ie_result
.pop('requested_entries')
2024 # Write the updated info to json
2025 if _infojson_written
is True and self
._write
_info
_json
(
2026 'updated playlist', ie_result
,
2027 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
2030 ie_result
= self
.run_all_pps('playlist', ie_result
)
2031 self
.to_screen(f
'[download] Finished downloading playlist: {title}')
2034 @_handle_extraction_exceptions
2035 def __process_iterable_entry(self
, entry
, download
, extra_info
):
2036 return self
.process_ie_result(
2037 entry
, download
=download
, extra_info
=extra_info
)
2039 def _build_format_filter(self
, filter_spec
):
2040 " Returns a function to filter the formats according to the filter_spec "
2050 operator_rex
= re
.compile(r
'''(?x)\s*
2052 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2053 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2054 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
2055 m
= operator_rex
.fullmatch(filter_spec
)
2058 comparison_value
= int(m
.group('value'))
2060 comparison_value
= parse_filesize(m
.group('value'))
2061 if comparison_value
is None:
2062 comparison_value
= parse_filesize(m
.group('value') + 'B')
2063 if comparison_value
is None:
2065 'Invalid value %r in format specification %r' % (
2066 m
.group('value'), filter_spec
))
2067 op
= OPERATORS
[m
.group('op')]
2072 '^=': lambda attr
, value
: attr
.startswith(value
),
2073 '$=': lambda attr
, value
: attr
.endswith(value
),
2074 '*=': lambda attr
, value
: value
in attr
,
2075 '~=': lambda attr
, value
: value
.search(attr
) is not None
2077 str_operator_rex
= re
.compile(r
'''(?x)\s*
2078 (?P<key>[a-zA-Z0-9._-]+)\s*
2079 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2081 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2082 (?(quote)(?P=quote))\s*
2083 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
2084 m
= str_operator_rex
.fullmatch(filter_spec
)
2086 if m
.group('op') == '~=':
2087 comparison_value
= re
.compile(m
.group('value'))
2089 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
2090 str_op
= STR_OPERATORS
[m
.group('op')]
2091 if m
.group('negation'):
2092 op
= lambda attr
, value
: not str_op(attr
, value
)
2097 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
2100 actual_value
= f
.get(m
.group('key'))
2101 if actual_value
is None:
2102 return m
.group('none_inclusive')
2103 return op(actual_value
, comparison_value
)
2106 def _check_formats(self
, formats
):
2108 self
.to_screen('[info] Testing format %s' % f
['format_id'])
2109 path
= self
.get_output_path('temp')
2110 if not self
._ensure
_dir
_exists
(f
'{path}/'):
2112 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
2115 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
2116 except (DownloadError
, OSError, ValueError) + network_exceptions
:
2119 if os
.path
.exists(temp_file
.name
):
2121 os
.remove(temp_file
.name
)
2123 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
2127 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
2129 def _default_format_spec(self
, info_dict
, download
=True):
2132 merger
= FFmpegMergerPP(self
)
2133 return merger
.available
and merger
.can_merge()
2136 not self
.params
.get('simulate')
2140 or info_dict
.get('is_live') and not self
.params
.get('live_from_start')
2141 or self
.params
['outtmpl']['default'] == '-'))
2144 or self
.params
.get('allow_multiple_audio_streams', False)
2145 or 'format-spec' in self
.params
['compat_opts'])
2148 'best/bestvideo+bestaudio' if prefer_best
2149 else 'bestvideo*+bestaudio/best' if not compat
2150 else 'bestvideo+bestaudio/best')
2152 def build_format_selector(self
, format_spec
):
2153 def syntax_error(note
, start
):
2155 'Invalid format specification: '
2156 '{}\n\t{}\n\t{}^'.format(note
, format_spec
, ' ' * start
[1]))
2157 return SyntaxError(message
)
2159 PICKFIRST
= 'PICKFIRST'
2163 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2165 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
2166 'video': self
.params
.get('allow_multiple_video_streams', False)}
2168 def _parse_filter(tokens
):
2170 for type, string_
, start
, _
, _
in tokens
:
2171 if type == tokenize
.OP
and string_
== ']':
2172 return ''.join(filter_parts
)
2174 filter_parts
.append(string_
)
2176 def _remove_unused_ops(tokens
):
2177 # Remove operators that we don't use and join them with the surrounding strings.
2178 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2179 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
2180 last_string
, last_start
, last_end
, last_line
= None, None, None, None
2181 for type, string_
, start
, end
, line
in tokens
:
2182 if type == tokenize
.OP
and string_
== '[':
2184 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2186 yield type, string_
, start
, end
, line
2187 # everything inside brackets will be handled by _parse_filter
2188 for type, string_
, start
, end
, line
in tokens
:
2189 yield type, string_
, start
, end
, line
2190 if type == tokenize
.OP
and string_
== ']':
2192 elif type == tokenize
.OP
and string_
in ALLOWED_OPS
:
2194 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2196 yield type, string_
, start
, end
, line
2197 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
2199 last_string
= string_
2203 last_string
+= string_
2205 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2207 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
2209 current_selector
= None
2210 for type, string_
, start
, _
, _
in tokens
:
2211 # ENCODING is only defined in python 3.x
2212 if type == getattr(tokenize
, 'ENCODING', None):
2214 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
2215 current_selector
= FormatSelector(SINGLE
, string_
, [])
2216 elif type == tokenize
.OP
:
2218 if not inside_group
:
2219 # ')' will be handled by the parentheses group
2220 tokens
.restore_last_token()
2222 elif inside_merge
and string_
in ['/', ',']:
2223 tokens
.restore_last_token()
2225 elif inside_choice
and string_
== ',':
2226 tokens
.restore_last_token()
2228 elif string_
== ',':
2229 if not current_selector
:
2230 raise syntax_error('"," must follow a format selector', start
)
2231 selectors
.append(current_selector
)
2232 current_selector
= None
2233 elif string_
== '/':
2234 if not current_selector
:
2235 raise syntax_error('"/" must follow a format selector', start
)
2236 first_choice
= current_selector
2237 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
2238 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
2239 elif string_
== '[':
2240 if not current_selector
:
2241 current_selector
= FormatSelector(SINGLE
, 'best', [])
2242 format_filter
= _parse_filter(tokens
)
2243 current_selector
.filters
.append(format_filter
)
2244 elif string_
== '(':
2245 if current_selector
:
2246 raise syntax_error('Unexpected "("', start
)
2247 group
= _parse_format_selection(tokens
, inside_group
=True)
2248 current_selector
= FormatSelector(GROUP
, group
, [])
2249 elif string_
== '+':
2250 if not current_selector
:
2251 raise syntax_error('Unexpected "+"', start
)
2252 selector_1
= current_selector
2253 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2255 raise syntax_error('Expected a selector', start
)
2256 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2258 raise syntax_error(f
'Operator not recognized: "{string_}"', start
)
2259 elif type == tokenize
.ENDMARKER
:
2261 if current_selector
:
2262 selectors
.append(current_selector
)
2265 def _merge(formats_pair
):
2266 format_1
, format_2
= formats_pair
2269 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2270 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2272 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2273 get_no_more
= {'video': False, 'audio': False}
2274 for (i
, fmt_info
) in enumerate(formats_info
):
2275 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2278 for aud_vid
in ['audio', 'video']:
2279 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2280 if get_no_more
[aud_vid
]:
2283 get_no_more
[aud_vid
] = True
2285 if len(formats_info
) == 1:
2286 return formats_info
[0]
2288 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2289 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2291 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2292 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2294 output_ext
= get_compatible_ext(
2295 vcodecs
=[f
.get('vcodec') for f
in video_fmts
],
2296 acodecs
=[f
.get('acodec') for f
in audio_fmts
],
2297 vexts
=[f
['ext'] for f
in video_fmts
],
2298 aexts
=[f
['ext'] for f
in audio_fmts
],
2299 preferences
=(try_call(lambda: self
.params
['merge_output_format'].split('/'))
2300 or self
.params
.get('prefer_free_formats') and ('webm', 'mkv')))
2302 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2305 'requested_formats': formats_info
,
2306 'format': '+'.join(filtered('format')),
2307 'format_id': '+'.join(filtered('format_id')),
2309 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2310 'language': '+'.join(orderedSet(filtered('language'))) or None,
2311 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2312 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2313 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2318 'width': the_only_video
.get('width'),
2319 'height': the_only_video
.get('height'),
2320 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2321 'fps': the_only_video
.get('fps'),
2322 'dynamic_range': the_only_video
.get('dynamic_range'),
2323 'vcodec': the_only_video
.get('vcodec'),
2324 'vbr': the_only_video
.get('vbr'),
2325 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2326 'aspect_ratio': the_only_video
.get('aspect_ratio'),
2331 'acodec': the_only_audio
.get('acodec'),
2332 'abr': the_only_audio
.get('abr'),
2333 'asr': the_only_audio
.get('asr'),
2334 'audio_channels': the_only_audio
.get('audio_channels')
2339 def _check_formats(formats
):
2340 if self
.params
.get('check_formats') == 'selected':
2341 yield from self
._check
_formats
(formats
)
2343 elif (self
.params
.get('check_formats') is not None
2344 or self
.params
.get('allow_unplayable_formats')):
2349 if f
.get('has_drm'):
2350 yield from self
._check
_formats
([f
])
2354 def _build_selector_function(selector
):
2355 if isinstance(selector
, list): # ,
2356 fs
= [_build_selector_function(s
) for s
in selector
]
2358 def selector_function(ctx
):
2361 return selector_function
2363 elif selector
.type == GROUP
: # ()
2364 selector_function
= _build_selector_function(selector
.selector
)
2366 elif selector
.type == PICKFIRST
: # /
2367 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2369 def selector_function(ctx
):
2371 picked_formats
= list(f(ctx
))
2373 return picked_formats
2376 elif selector
.type == MERGE
: # +
2377 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2379 def selector_function(ctx
):
2380 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2383 elif selector
.type == SINGLE
: # atom
2384 format_spec
= selector
.selector
or 'best'
2386 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2387 if format_spec
== 'all':
2388 def selector_function(ctx
):
2389 yield from _check_formats(ctx
['formats'][::-1])
2390 elif format_spec
== 'mergeall':
2391 def selector_function(ctx
):
2392 formats
= list(_check_formats(
2393 f
for f
in ctx
['formats'] if f
.get('vcodec') != 'none' or f
.get('acodec') != 'none'))
2396 merged_format
= formats
[-1]
2397 for f
in formats
[-2::-1]:
2398 merged_format
= _merge((merged_format
, f
))
2402 format_fallback
, seperate_fallback
, format_reverse
, format_idx
= False, None, True, 1
2404 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2406 if mobj
is not None:
2407 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2408 format_reverse
= mobj
.group('bw')[0] == 'b'
2409 format_type
= (mobj
.group('type') or [None])[0]
2410 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
2411 format_modified
= mobj
.group('mod') is not None
2413 format_fallback
= not format_type
and not format_modified
# for b, w
2415 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
2416 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2417 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
2418 if format_type
# bv, ba, wv, wa
2419 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2420 if not format_modified
# b, w
2421 else lambda f
: True) # b*, w*
2422 filter_f
= lambda f
: _filter_f(f
) and (
2423 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2425 if format_spec
in self
._format
_selection
_exts
['audio']:
2426 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2427 elif format_spec
in self
._format
_selection
_exts
['video']:
2428 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2429 seperate_fallback
= lambda f
: f
.get('ext') == format_spec
and f
.get('vcodec') != 'none'
2430 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2431 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2433 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2435 def selector_function(ctx
):
2436 formats
= list(ctx
['formats'])
2437 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2439 if format_fallback
and ctx
['incomplete_formats']:
2440 # for extractors with incomplete formats (audio only (soundcloud)
2441 # or video only (imgur)) best/worst will fallback to
2442 # best/worst {video,audio}-only format
2444 elif seperate_fallback
and not ctx
['has_merged_format']:
2445 # for compatibility with youtube-dl when there is no pre-merged format
2446 matches
= list(filter(seperate_fallback
, formats
))
2447 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2449 yield matches
[format_idx
- 1]
2450 except LazyList
.IndexError:
2453 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2455 def final_selector(ctx
):
2456 ctx_copy
= dict(ctx
)
2457 for _filter
in filters
:
2458 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2459 return selector_function(ctx_copy
)
2460 return final_selector
2462 stream
= io
.BytesIO(format_spec
.encode())
2464 tokens
= list(_remove_unused_ops(tokenize
.tokenize(stream
.readline
)))
2465 except tokenize
.TokenError
:
2466 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2468 class TokenIterator
:
2469 def __init__(self
, tokens
):
2470 self
.tokens
= tokens
2477 if self
.counter
>= len(self
.tokens
):
2478 raise StopIteration()
2479 value
= self
.tokens
[self
.counter
]
2485 def restore_last_token(self
):
2488 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2489 return _build_selector_function(parsed_selector
)
2491 def _calc_headers(self
, info_dict
, load_cookies
=False):
2492 res
= HTTPHeaderDict(self
.params
['http_headers'], info_dict
.get('http_headers'))
2495 if load_cookies
: # For --load-info-json
2496 self
._load
_cookies
(res
.get('Cookie'), autoscope
=info_dict
['url']) # compat
2497 self
._load
_cookies
(info_dict
.get('cookies'), autoscope
=False)
2498 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2499 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2500 res
.pop('Cookie', None)
2501 cookies
= self
.cookiejar
.get_cookies_for_url(info_dict
['url'])
2503 encoder
= LenientSimpleCookie()
2505 for cookie
in cookies
:
2506 _
, value
= encoder
.value_encode(cookie
.value
)
2507 values
.append(f
'{cookie.name}={value}')
2509 values
.append(f
'Domain={cookie.domain}')
2511 values
.append(f
'Path={cookie.path}')
2513 values
.append('Secure')
2515 values
.append(f
'Expires={cookie.expires}')
2517 values
.append(f
'Version={cookie.version}')
2518 info_dict
['cookies'] = '; '.join(values
)
2520 if 'X-Forwarded-For' not in res
:
2521 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2522 if x_forwarded_for_ip
:
2523 res
['X-Forwarded-For'] = x_forwarded_for_ip
2527 def _calc_cookies(self
, url
):
2528 self
.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2529 return self
.cookiejar
.get_cookie_header(url
)
2531 def _sort_thumbnails(self
, thumbnails
):
2532 thumbnails
.sort(key
=lambda t
: (
2533 t
.get('preference') if t
.get('preference') is not None else -1,
2534 t
.get('width') if t
.get('width') is not None else -1,
2535 t
.get('height') if t
.get('height') is not None else -1,
2536 t
.get('id') if t
.get('id') is not None else '',
2539 def _sanitize_thumbnails(self
, info_dict
):
2540 thumbnails
= info_dict
.get('thumbnails')
2541 if thumbnails
is None:
2542 thumbnail
= info_dict
.get('thumbnail')
2544 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2548 def check_thumbnails(thumbnails
):
2549 for t
in thumbnails
:
2550 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2552 self
.urlopen(HEADRequest(t
['url']))
2553 except network_exceptions
as err
:
2554 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2558 self
._sort
_thumbnails
(thumbnails
)
2559 for i
, t
in enumerate(thumbnails
):
2560 if t
.get('id') is None:
2562 if t
.get('width') and t
.get('height'):
2563 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2564 t
['url'] = sanitize_url(t
['url'])
2566 if self
.params
.get('check_formats') is True:
2567 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2569 info_dict
['thumbnails'] = thumbnails
2571 def _fill_common_fields(self
, info_dict
, final
=True):
2572 # TODO: move sanitization here
2574 title
= info_dict
['fulltitle'] = info_dict
.get('title')
2577 self
.write_debug('Extractor gave empty title. Creating a generic title')
2579 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2580 info_dict
['title'] = f
'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2582 if info_dict
.get('duration') is not None:
2583 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2585 for ts_key
, date_key
in (
2586 ('timestamp', 'upload_date'),
2587 ('release_timestamp', 'release_date'),
2588 ('modified_timestamp', 'modified_date'),
2590 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2591 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2592 # see http://bugs.python.org/issue1646728)
2593 with contextlib
.suppress(ValueError, OverflowError, OSError):
2594 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2595 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2597 live_keys
= ('is_live', 'was_live')
2598 live_status
= info_dict
.get('live_status')
2599 if live_status
is None:
2600 for key
in live_keys
:
2601 if info_dict
.get(key
) is False:
2603 if info_dict
.get(key
):
2606 if all(info_dict
.get(key
) is False for key
in live_keys
):
2607 live_status
= 'not_live'
2609 info_dict
['live_status'] = live_status
2610 for key
in live_keys
:
2611 if info_dict
.get(key
) is None:
2612 info_dict
[key
] = (live_status
== key
)
2613 if live_status
== 'post_live':
2614 info_dict
['was_live'] = True
2616 # Auto generate title fields corresponding to the *_number fields when missing
2617 # in order to always have clean titles. This is very common for TV series.
2618 for field
in ('chapter', 'season', 'episode'):
2619 if final
and info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2620 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2622 def _raise_pending_errors(self
, info
):
2623 err
= info
.pop('__pending_error', None)
2625 self
.report_error(err
, tb
=False)
2627 def sort_formats(self
, info_dict
):
2628 formats
= self
._get
_formats
(info_dict
)
2629 formats
.sort(key
=FormatSorter(
2630 self
, info_dict
.get('_format_sort_fields') or []).calculate_preference
)
2632 def process_video_result(self
, info_dict
, download
=True):
2633 assert info_dict
.get('_type', 'video') == 'video'
2634 self
._num
_videos
+= 1
2636 if 'id' not in info_dict
:
2637 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2638 elif not info_dict
.get('id'):
2639 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2641 def report_force_conversion(field
, field_not
, conversion
):
2642 self
.report_warning(
2643 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2644 % (field
, field_not
, conversion
))
2646 def sanitize_string_field(info
, string_field
):
2647 field
= info
.get(string_field
)
2648 if field
is None or isinstance(field
, str):
2650 report_force_conversion(string_field
, 'a string', 'string')
2651 info
[string_field
] = str(field
)
2653 def sanitize_numeric_fields(info
):
2654 for numeric_field
in self
._NUMERIC
_FIELDS
:
2655 field
= info
.get(numeric_field
)
2656 if field
is None or isinstance(field
, (int, float)):
2658 report_force_conversion(numeric_field
, 'numeric', 'int')
2659 info
[numeric_field
] = int_or_none(field
)
2661 sanitize_string_field(info_dict
, 'id')
2662 sanitize_numeric_fields(info_dict
)
2663 if info_dict
.get('section_end') and info_dict
.get('section_start') is not None:
2664 info_dict
['duration'] = round(info_dict
['section_end'] - info_dict
['section_start'], 3)
2665 if (info_dict
.get('duration') or 0) <= 0 and info_dict
.pop('duration', None):
2666 self
.report_warning('"duration" field is negative, there is an error in extractor')
2668 chapters
= info_dict
.get('chapters') or []
2669 if chapters
and chapters
[0].get('start_time'):
2670 chapters
.insert(0, {'start_time': 0}
)
2672 dummy_chapter
= {'end_time': 0, 'start_time': info_dict.get('duration')}
2673 for idx
, (prev
, current
, next_
) in enumerate(zip(
2674 (dummy_chapter
, *chapters
), chapters
, (*chapters
[1:], dummy_chapter
)), 1):
2675 if current
.get('start_time') is None:
2676 current
['start_time'] = prev
.get('end_time')
2677 if not current
.get('end_time'):
2678 current
['end_time'] = next_
.get('start_time')
2679 if not current
.get('title'):
2680 current
['title'] = f
'<Untitled Chapter {idx}>'
2682 if 'playlist' not in info_dict
:
2683 # It isn't part of a playlist
2684 info_dict
['playlist'] = None
2685 info_dict
['playlist_index'] = None
2687 self
._sanitize
_thumbnails
(info_dict
)
2689 thumbnail
= info_dict
.get('thumbnail')
2690 thumbnails
= info_dict
.get('thumbnails')
2692 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2694 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2696 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2697 info_dict
['display_id'] = info_dict
['id']
2699 self
._fill
_common
_fields
(info_dict
)
2701 for cc_kind
in ('subtitles', 'automatic_captions'):
2702 cc
= info_dict
.get(cc_kind
)
2704 for _
, subtitle
in cc
.items():
2705 for subtitle_format
in subtitle
:
2706 if subtitle_format
.get('url'):
2707 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2708 if subtitle_format
.get('ext') is None:
2709 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2711 automatic_captions
= info_dict
.get('automatic_captions')
2712 subtitles
= info_dict
.get('subtitles')
2714 info_dict
['requested_subtitles'] = self
.process_subtitles(
2715 info_dict
['id'], subtitles
, automatic_captions
)
2717 formats
= self
._get
_formats
(info_dict
)
2719 # Backward compatibility with InfoExtractor._sort_formats
2720 field_preference
= (formats
or [{}])[0].pop('__sort_fields', None)
2721 if field_preference
:
2722 info_dict
['_format_sort_fields'] = field_preference
2724 info_dict
['_has_drm'] = any( # or None ensures --clean-infojson removes it
2725 f
.get('has_drm') and f
['has_drm'] != 'maybe' for f
in formats
) or None
2726 if not self
.params
.get('allow_unplayable_formats'):
2727 formats
= [f
for f
in formats
if not f
.get('has_drm') or f
['has_drm'] == 'maybe']
2729 if formats
and all(f
.get('acodec') == f
.get('vcodec') == 'none' for f
in formats
):
2730 self
.report_warning(
2731 f
'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2732 'only images are available for download. Use --list-formats to see them'.capitalize())
2734 get_from_start
= not info_dict
.get('is_live') or bool(self
.params
.get('live_from_start'))
2735 if not get_from_start
:
2736 info_dict
['title'] += ' ' + datetime
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2737 if info_dict
.get('is_live') and formats
:
2738 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2739 if get_from_start
and not formats
:
2740 self
.raise_no_formats(info_dict
, msg
=(
2741 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2742 'If you want to download from the current time, use --no-live-from-start'))
2744 def is_wellformed(f
):
2747 self
.report_warning(
2748 '"url" field is missing or empty - skipping format, '
2749 'there is an error in extractor')
2751 if isinstance(url
, bytes):
2752 sanitize_string_field(f
, 'url')
2755 # Filter out malformed formats for better extraction robustness
2756 formats
= list(filter(is_wellformed
, formats
or []))
2759 self
.raise_no_formats(info_dict
)
2761 for format
in formats
:
2762 sanitize_string_field(format
, 'format_id')
2763 sanitize_numeric_fields(format
)
2764 format
['url'] = sanitize_url(format
['url'])
2765 if format
.get('ext') is None:
2766 format
['ext'] = determine_ext(format
['url']).lower()
2767 if format
.get('protocol') is None:
2768 format
['protocol'] = determine_protocol(format
)
2769 if format
.get('resolution') is None:
2770 format
['resolution'] = self
.format_resolution(format
, default
=None)
2771 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2772 format
['dynamic_range'] = 'SDR'
2773 if format
.get('aspect_ratio') is None:
2774 format
['aspect_ratio'] = try_call(lambda: round(format
['width'] / format
['height'], 2))
2775 if (not format
.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2776 and info_dict
.get('duration') and format
.get('tbr')
2777 and not format
.get('filesize') and not format
.get('filesize_approx')):
2778 format
['filesize_approx'] = int(info_dict
['duration'] * format
['tbr'] * (1024 / 8))
2779 format
['http_headers'] = self
._calc
_headers
(collections
.ChainMap(format
, info_dict
), load_cookies
=True)
2781 # Safeguard against old/insecure infojson when using --load-info-json
2782 if info_dict
.get('http_headers'):
2783 info_dict
['http_headers'] = HTTPHeaderDict(info_dict
['http_headers'])
2784 info_dict
['http_headers'].pop('Cookie', None)
2786 # This is copied to http_headers by the above _calc_headers and can now be removed
2787 if '__x_forwarded_for_ip' in info_dict
:
2788 del info_dict
['__x_forwarded_for_ip']
2792 '_format_sort_fields': info_dict
.get('_format_sort_fields')
2795 # Sanitize and group by format_id
2797 for i
, format
in enumerate(formats
):
2798 if not format
.get('format_id'):
2799 format
['format_id'] = str(i
)
2801 # Sanitize format_id from characters used in format selector expression
2802 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2803 formats_dict
.setdefault(format
['format_id'], []).append(format
)
2805 # Make sure all formats have unique format_id
2806 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2807 for format_id
, ambiguous_formats
in formats_dict
.items():
2808 ambigious_id
= len(ambiguous_formats
) > 1
2809 for i
, format
in enumerate(ambiguous_formats
):
2811 format
['format_id'] = '%s-%d' % (format_id
, i
)
2812 # Ensure there is no conflict between id and ext in format selection
2813 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2814 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2815 format
['format_id'] = 'f%s' % format
['format_id']
2817 if format
.get('format') is None:
2818 format
['format'] = '{id} - {res}{note}'.format(
2819 id=format
['format_id'],
2820 res
=self
.format_resolution(format
),
2821 note
=format_field(format
, 'format_note', ' (%s)'),
2824 if self
.params
.get('check_formats') is True:
2825 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2827 if not formats
or formats
[0] is not info_dict
:
2828 # only set the 'formats' fields if the original info_dict list them
2829 # otherwise we end up with a circular reference, the first (and unique)
2830 # element in the 'formats' field in info_dict is info_dict itself,
2831 # which can't be exported to json
2832 info_dict
['formats'] = formats
2834 info_dict
, _
= self
.pre_process(info_dict
)
2836 if self
._match
_entry
(info_dict
, incomplete
=self
._format
_fields
) is not None:
2839 self
.post_extract(info_dict
)
2840 info_dict
, _
= self
.pre_process(info_dict
, 'after_filter')
2842 # The pre-processors may have modified the formats
2843 formats
= self
._get
_formats
(info_dict
)
2845 list_only
= self
.params
.get('simulate') == 'list_only'
2846 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2847 if self
.params
.get('list_thumbnails'):
2848 self
.list_thumbnails(info_dict
)
2849 if self
.params
.get('listsubtitles'):
2850 if 'automatic_captions' in info_dict
:
2851 self
.list_subtitles(
2852 info_dict
['id'], automatic_captions
, 'automatic captions')
2853 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2854 if self
.params
.get('listformats') or interactive_format_selection
:
2855 self
.list_formats(info_dict
)
2857 # Without this printing, -F --print-json will not work
2858 self
.__forced
_printings
(info_dict
)
2861 format_selector
= self
.format_selector
2863 if interactive_format_selection
:
2864 req_format
= input(self
._format
_screen
('\nEnter format selector ', self
.Styles
.EMPHASIS
)
2865 + '(Press ENTER for default, or Ctrl+C to quit)'
2866 + self
._format
_screen
(': ', self
.Styles
.EMPHASIS
))
2868 format_selector
= self
.build_format_selector(req_format
) if req_format
else None
2869 except SyntaxError as err
:
2870 self
.report_error(err
, tb
=False, is_error
=False)
2873 if format_selector
is None:
2874 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2875 self
.write_debug(f
'Default format spec: {req_format}')
2876 format_selector
= self
.build_format_selector(req_format
)
2878 formats_to_download
= list(format_selector({
2880 'has_merged_format': any('none' not in (f
.get('acodec'), f
.get('vcodec')) for f
in formats
),
2881 'incomplete_formats': (all(f
.get('vcodec') == 'none' for f
in formats
) # No formats with video
2882 or all(f
.get('acodec') == 'none' for f
in formats
)), # OR, No formats with audio
2884 if interactive_format_selection
and not formats_to_download
:
2885 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2889 if not formats_to_download
:
2890 if not self
.params
.get('ignore_no_formats_error'):
2891 raise ExtractorError(
2892 'Requested format is not available. Use --list-formats for a list of available formats',
2893 expected
=True, video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2894 self
.report_warning('Requested format is not available')
2895 # Process what we can, even without any available formats.
2896 formats_to_download
= [{}]
2898 requested_ranges
= tuple(self
.params
.get('download_ranges', lambda *_
: [{}])(info_dict
, self
))
2899 best_format
, downloaded_formats
= formats_to_download
[-1], []
2901 if best_format
and requested_ranges
:
2902 def to_screen(*msg
):
2903 self
.to_screen(f
'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2905 to_screen(f
'Downloading {len(formats_to_download)} format(s):',
2906 (f
['format_id'] for f
in formats_to_download
))
2907 if requested_ranges
!= ({}, ):
2908 to_screen(f
'Downloading {len(requested_ranges)} time ranges:',
2909 (f
'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c
in requested_ranges
))
2910 max_downloads_reached
= False
2912 for fmt
, chapter
in itertools
.product(formats_to_download
, requested_ranges
):
2913 new_info
= self
._copy
_infodict
(info_dict
)
2914 new_info
.update(fmt
)
2915 offset
, duration
= info_dict
.get('section_start') or 0, info_dict
.get('duration') or float('inf')
2916 end_time
= offset
+ min(chapter
.get('end_time', duration
), duration
)
2917 # duration may not be accurate. So allow deviations <1sec
2918 if end_time
== float('inf') or end_time
> offset
+ duration
+ 1:
2920 if chapter
or offset
:
2922 'section_start': offset
+ chapter
.get('start_time', 0),
2923 'section_end': end_time
,
2924 'section_title': chapter
.get('title'),
2925 'section_number': chapter
.get('index'),
2927 downloaded_formats
.append(new_info
)
2929 self
.process_info(new_info
)
2930 except MaxDownloadsReached
:
2931 max_downloads_reached
= True
2932 self
._raise
_pending
_errors
(new_info
)
2933 # Remove copied info
2934 for key
, val
in tuple(new_info
.items()):
2935 if info_dict
.get(key
) == val
:
2937 if max_downloads_reached
:
2940 write_archive
= {f.get('__write_download_archive', False) for f in downloaded_formats}
2941 assert write_archive
.issubset({True, False, 'ignore'}
)
2942 if True in write_archive
and False not in write_archive
:
2943 self
.record_download_archive(info_dict
)
2945 info_dict
['requested_downloads'] = downloaded_formats
2946 info_dict
= self
.run_all_pps('after_video', info_dict
)
2947 if max_downloads_reached
:
2948 raise MaxDownloadsReached()
2950 # We update the info dict with the selected best quality format (backwards compatibility)
2951 info_dict
.update(best_format
)
2954 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2955 """Select the requested subtitles and their format"""
2956 available_subs
, normal_sub_langs
= {}, []
2957 if normal_subtitles
and self
.params
.get('writesubtitles'):
2958 available_subs
.update(normal_subtitles
)
2959 normal_sub_langs
= tuple(normal_subtitles
.keys())
2960 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2961 for lang
, cap_info
in automatic_captions
.items():
2962 if lang
not in available_subs
:
2963 available_subs
[lang
] = cap_info
2965 if not available_subs
or (
2966 not self
.params
.get('writesubtitles')
2967 and not self
.params
.get('writeautomaticsub')):
2970 all_sub_langs
= tuple(available_subs
.keys())
2971 if self
.params
.get('allsubtitles', False):
2972 requested_langs
= all_sub_langs
2973 elif self
.params
.get('subtitleslangs', False):
2975 requested_langs
= orderedSet_from_options(
2976 self
.params
.get('subtitleslangs'), {'all': all_sub_langs}
, use_regex
=True)
2977 except re
.error
as e
:
2978 raise ValueError(f
'Wrong regex for subtitlelangs: {e.pattern}')
2980 requested_langs
= LazyList(itertools
.chain(
2981 ['en'] if 'en' in normal_sub_langs
else [],
2982 filter(lambda f
: f
.startswith('en'), normal_sub_langs
),
2983 ['en'] if 'en' in all_sub_langs
else [],
2984 filter(lambda f
: f
.startswith('en'), all_sub_langs
),
2985 normal_sub_langs
, all_sub_langs
,
2988 self
.to_screen(f
'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2990 formats_query
= self
.params
.get('subtitlesformat', 'best')
2991 formats_preference
= formats_query
.split('/') if formats_query
else []
2993 for lang
in requested_langs
:
2994 formats
= available_subs
.get(lang
)
2996 self
.report_warning(f
'{lang} subtitles not available for {video_id}')
2998 for ext
in formats_preference
:
3002 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
3008 self
.report_warning(
3009 'No subtitle format found matching "%s" for language %s, '
3010 'using %s' % (formats_query
, lang
, f
['ext']))
3014 def _forceprint(self
, key
, info_dict
):
3015 if info_dict
is None:
3017 info_copy
= info_dict
.copy()
3018 info_copy
.setdefault('filename', self
.prepare_filename(info_dict
))
3019 if info_dict
.get('requested_formats') is not None:
3020 # For RTMP URLs, also include the playpath
3021 info_copy
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
3022 elif info_dict
.get('url'):
3023 info_copy
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
3024 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
3025 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
3026 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
3027 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
3029 def format_tmpl(tmpl
):
3030 mobj
= re
.fullmatch(r
'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl
)
3035 if tmpl
.startswith('{'):
3036 tmpl
, fmt
= f
'.{tmpl}', '%({})j'
3037 if tmpl
.endswith('='):
3038 tmpl
, fmt
= tmpl
[:-1], '{0} = %({0})#j'
3039 return '\n'.join(map(fmt
.format
, [tmpl
] if mobj
.group('dict') else tmpl
.split(',')))
3041 for tmpl
in self
.params
['forceprint'].get(key
, []):
3042 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
3044 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
3045 filename
= self
.prepare_filename(info_dict
, outtmpl
=file_tmpl
)
3046 tmpl
= format_tmpl(tmpl
)
3047 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
3048 if self
._ensure
_dir
_exists
(filename
):
3049 with open(filename
, 'a', encoding
='utf-8', newline
='') as f
:
3050 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + os
.linesep
)
3054 def __forced_printings(self
, info_dict
, filename
=None, incomplete
=True):
3055 if (self
.params
.get('forcejson')
3056 or self
.params
['forceprint'].get('video')
3057 or self
.params
['print_to_file'].get('video')):
3058 self
.post_extract(info_dict
)
3060 info_dict
['filename'] = filename
3061 info_copy
= self
._forceprint
('video', info_dict
)
3063 def print_field(field
, actual_field
=None, optional
=False):
3064 if actual_field
is None:
3065 actual_field
= field
3066 if self
.params
.get(f
'force{field}') and (
3067 info_copy
.get(field
) is not None or (not optional
and not incomplete
)):
3068 self
.to_stdout(info_copy
[actual_field
])
3070 print_field('title')
3072 print_field('url', 'urls')
3073 print_field('thumbnail', optional
=True)
3074 print_field('description', optional
=True)
3075 print_field('filename')
3076 if self
.params
.get('forceduration') and info_copy
.get('duration') is not None:
3077 self
.to_stdout(formatSeconds(info_copy
['duration']))
3078 print_field('format')
3080 if self
.params
.get('forcejson'):
3081 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
3083 def dl(self
, name
, info
, subtitle
=False, test
=False):
3084 if not info
.get('url'):
3085 self
.raise_no_formats(info
, True)
3088 verbose
= self
.params
.get('verbose')
3091 'quiet': self
.params
.get('quiet') or not verbose
,
3093 'noprogress': not verbose
,
3095 'skip_unavailable_fragments': False,
3096 'keep_fragments': False,
3098 '_no_ytdl_file': True,
3101 params
= self
.params
3102 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
3104 for ph
in self
._progress
_hooks
:
3105 fd
.add_progress_hook(ph
)
3107 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
3108 for f
in info
.get('requested_formats', []) or [info
])
3109 self
.write_debug(f
'Invoking {fd.FD_NAME} downloader on "{urls}"')
3111 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3112 # But it may contain objects that are not deep-copyable
3113 new_info
= self
._copy
_infodict
(info
)
3114 if new_info
.get('http_headers') is None:
3115 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
3116 return fd
.download(name
, new_info
, subtitle
)
3118 def existing_file(self
, filepaths
, *, default_overwrite
=True):
3119 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
3120 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
3121 return existing_files
[0]
3123 for file in existing_files
:
3124 self
.report_file_delete(file)
3128 def process_info(self
, info_dict
):
3129 """Process a single resolved IE result. (Modifies it in-place)"""
3131 assert info_dict
.get('_type', 'video') == 'video'
3132 original_infodict
= info_dict
3134 if 'format' not in info_dict
and 'ext' in info_dict
:
3135 info_dict
['format'] = info_dict
['ext']
3137 if self
._match
_entry
(info_dict
) is not None:
3138 info_dict
['__write_download_archive'] = 'ignore'
3141 # Does nothing under normal operation - for backward compatibility of process_info
3142 self
.post_extract(info_dict
)
3144 def replace_info_dict(new_info
):
3146 if new_info
== info_dict
:
3149 info_dict
.update(new_info
)
3151 new_info
, _
= self
.pre_process(info_dict
, 'video')
3152 replace_info_dict(new_info
)
3153 self
._num
_downloads
+= 1
3155 # info_dict['_filename'] needs to be set for backward compatibility
3156 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
3157 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
3161 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
3163 def check_max_downloads():
3164 if self
._num
_downloads
>= float(self
.params
.get('max_downloads') or 'inf'):
3165 raise MaxDownloadsReached()
3167 if self
.params
.get('simulate'):
3168 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3169 check_max_downloads()
3172 if full_filename
is None:
3174 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
3176 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
3179 if self
._write
_description
('video', info_dict
,
3180 self
.prepare_filename(info_dict
, 'description')) is None:
3183 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
3184 if sub_files
is None:
3186 files_to_move
.update(dict(sub_files
))
3188 thumb_files
= self
._write
_thumbnails
(
3189 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
3190 if thumb_files
is None:
3192 files_to_move
.update(dict(thumb_files
))
3194 infofn
= self
.prepare_filename(info_dict
, 'infojson')
3195 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
3196 if _infojson_written
:
3197 info_dict
['infojson_filename'] = infofn
3198 # For backward compatibility, even though it was a private field
3199 info_dict
['__infojson_filename'] = infofn
3200 elif _infojson_written
is None:
3203 # Note: Annotations are deprecated
3205 if self
.params
.get('writeannotations', False):
3206 annofn
= self
.prepare_filename(info_dict
, 'annotation')
3208 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
3210 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
3211 self
.to_screen('[info] Video annotations are already present')
3212 elif not info_dict
.get('annotations'):
3213 self
.report_warning('There are no annotations to write.')
3216 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
3217 with open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
3218 annofile
.write(info_dict
['annotations'])
3219 except (KeyError, TypeError):
3220 self
.report_warning('There are no annotations to write.')
3222 self
.report_error('Cannot write annotations file: ' + annofn
)
3225 # Write internet shortcut files
3226 def _write_link_file(link_type
):
3227 url
= try_get(info_dict
['webpage_url'], iri_to_uri
)
3229 self
.report_warning(
3230 f
'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3232 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
3233 if not self
._ensure
_dir
_exists
(encodeFilename(linkfn
)):
3235 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
3236 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
3239 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3240 with open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8',
3241 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
3242 template_vars
= {'url': url}
3243 if link_type
== 'desktop':
3244 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
3245 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
3247 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
3252 'url': self
.params
.get('writeurllink'),
3253 'webloc': self
.params
.get('writewebloclink'),
3254 'desktop': self
.params
.get('writedesktoplink'),
3256 if self
.params
.get('writelink'):
3257 link_type
= ('webloc' if sys
.platform
== 'darwin'
3258 else 'desktop' if sys
.platform
.startswith('linux')
3260 write_links
[link_type
] = True
3262 if any(should_write
and not _write_link_file(link_type
)
3263 for link_type
, should_write
in write_links
.items()):
3266 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
3267 replace_info_dict(new_info
)
3269 if self
.params
.get('skip_download'):
3270 info_dict
['filepath'] = temp_filename
3271 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3272 info_dict
['__files_to_move'] = files_to_move
3273 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
3274 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3277 info_dict
.setdefault('__postprocessors', [])
3280 def existing_video_file(*filepaths
):
3281 ext
= info_dict
.get('ext')
3282 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
3283 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
3284 default_overwrite
=False)
3286 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
3289 fd
, success
= None, True
3290 if info_dict
.get('protocol') or info_dict
.get('url'):
3291 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3292 if fd
!= FFmpegFD
and 'no-direct-merge' not in self
.params
['compat_opts'] and (
3293 info_dict
.get('section_start') or info_dict
.get('section_end')):
3294 msg
= ('This format cannot be partially downloaded' if FFmpegFD
.available()
3295 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3296 self
.report_error(f
'{msg}. Aborting')
3299 if info_dict
.get('requested_formats') is not None:
3300 old_ext
= info_dict
['ext']
3301 if self
.params
.get('merge_output_format') is None:
3302 if (info_dict
['ext'] == 'webm'
3303 and info_dict
.get('thumbnails')
3304 # check with type instead of pp_key, __name__, or isinstance
3305 # since we dont want any custom PPs to trigger this
3306 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])): # noqa: E721
3307 info_dict
['ext'] = 'mkv'
3308 self
.report_warning(
3309 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3310 new_ext
= info_dict
['ext']
3312 def correct_ext(filename
, ext
=new_ext
):
3315 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3317 os
.path
.splitext(filename
)[0]
3318 if filename_real_ext
in (old_ext
, new_ext
)
3320 return f
'{filename_wo_ext}.{ext}'
3322 # Ensure filename always has a correct extension for successful merge
3323 full_filename
= correct_ext(full_filename
)
3324 temp_filename
= correct_ext(temp_filename
)
3325 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3327 info_dict
['__real_download'] = False
3328 # NOTE: Copy so that original format dicts are not modified
3329 info_dict
['requested_formats'] = list(map(dict, info_dict
['requested_formats']))
3331 merger
= FFmpegMergerPP(self
)
3333 if dl_filename
is not None:
3334 self
.report_file_already_downloaded(dl_filename
)
3336 for f
in info_dict
['requested_formats'] if fd
!= FFmpegFD
else []:
3337 f
['filepath'] = fname
= prepend_extension(
3338 correct_ext(temp_filename
, info_dict
['ext']),
3339 'f%s' % f
['format_id'], info_dict
['ext'])
3340 downloaded
.append(fname
)
3341 info_dict
['url'] = '\n'.join(f
['url'] for f
in info_dict
['requested_formats'])
3342 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3343 info_dict
['__real_download'] = real_download
3345 if self
.params
.get('allow_unplayable_formats'):
3346 self
.report_warning(
3347 'You have requested merging of multiple formats '
3348 'while also allowing unplayable formats to be downloaded. '
3349 'The formats won\'t be merged to prevent data corruption.')
3350 elif not merger
.available
:
3351 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3352 if not self
.params
.get('ignoreerrors'):
3353 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3355 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3357 if temp_filename
== '-':
3358 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3359 else 'but the formats are incompatible for simultaneous download' if merger
.available
3360 else 'but ffmpeg is not installed')
3361 self
.report_warning(
3362 f
'You have requested downloading multiple formats to stdout {reason}. '
3363 'The formats will be streamed one after the other')
3364 fname
= temp_filename
3365 for f
in info_dict
['requested_formats']:
3366 new_info
= dict(info_dict
)
3367 del new_info
['requested_formats']
3369 if temp_filename
!= '-':
3370 fname
= prepend_extension(
3371 correct_ext(temp_filename
, new_info
['ext']),
3372 'f%s' % f
['format_id'], new_info
['ext'])
3373 if not self
._ensure
_dir
_exists
(fname
):
3375 f
['filepath'] = fname
3376 downloaded
.append(fname
)
3377 partial_success
, real_download
= self
.dl(fname
, new_info
)
3378 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3379 success
= success
and partial_success
3381 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3382 info_dict
['__postprocessors'].append(merger
)
3383 info_dict
['__files_to_merge'] = downloaded
3384 # Even if there were no downloads, it is being merged only now
3385 info_dict
['__real_download'] = True
3387 for file in downloaded
:
3388 files_to_move
[file] = None
3390 # Just a single file
3391 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3392 if dl_filename
is None or dl_filename
== temp_filename
:
3393 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3394 # So we should try to resume the download
3395 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3396 info_dict
['__real_download'] = real_download
3398 self
.report_file_already_downloaded(dl_filename
)
3400 dl_filename
= dl_filename
or temp_filename
3401 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3403 except network_exceptions
as err
:
3404 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
3406 except OSError as err
:
3407 raise UnavailableVideoError(err
)
3408 except (ContentTooShortError
, ) as err
:
3409 self
.report_error(f
'content too short (expected {err.expected} bytes and served {err.downloaded})')
3412 self
._raise
_pending
_errors
(info_dict
)
3413 if success
and full_filename
!= '-':
3417 fixup_policy
= self
.params
.get('fixup')
3418 vid
= info_dict
['id']
3420 if fixup_policy
in ('ignore', 'never'):
3422 elif fixup_policy
== 'warn':
3424 elif fixup_policy
!= 'force':
3425 assert fixup_policy
in ('detect_or_warn', None)
3426 if not info_dict
.get('__real_download'):
3429 def ffmpeg_fixup(cndn
, msg
, cls
):
3430 if not (do_fixup
and cndn
):
3432 elif do_fixup
== 'warn':
3433 self
.report_warning(f
'{vid}: {msg}')
3437 info_dict
['__postprocessors'].append(pp
)
3439 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3441 stretched_ratio
= info_dict
.get('stretched_ratio')
3442 ffmpeg_fixup(stretched_ratio
not in (1, None),
3443 f
'Non-uniform pixel ratio {stretched_ratio}',
3444 FFmpegFixupStretchedPP
)
3446 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3447 downloader
= downloader
.FD_NAME
if downloader
else None
3449 ext
= info_dict
.get('ext')
3450 postprocessed_by_ffmpeg
= info_dict
.get('requested_formats') or any((
3451 isinstance(pp
, FFmpegVideoConvertorPP
)
3452 and resolve_recode_mapping(ext
, pp
.mapping
)[0] not in (ext
, None)
3453 ) for pp
in self
._pps
['post_process'])
3455 if not postprocessed_by_ffmpeg
:
3456 ffmpeg_fixup(fd
!= FFmpegFD
and ext
== 'm4a'
3457 and info_dict
.get('container') == 'm4a_dash',
3458 'writing DASH m4a. Only some players support this container',
3460 ffmpeg_fixup(downloader
== 'hlsnative' and not self
.params
.get('hls_use_mpegts')
3461 or info_dict
.get('is_live') and self
.params
.get('hls_use_mpegts') is None,
3462 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3464 ffmpeg_fixup(info_dict
.get('is_live') and downloader
== 'dashsegments',
3465 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3467 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3468 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP
)
3472 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3473 except PostProcessingError
as err
:
3474 self
.report_error('Postprocessing: %s' % str(err
))
3477 for ph
in self
._post
_hooks
:
3478 ph(info_dict
['filepath'])
3479 except Exception as err
:
3480 self
.report_error('post hooks: %s' % str(err
))
3482 info_dict
['__write_download_archive'] = True
3484 assert info_dict
is original_infodict
# Make sure the info_dict was modified in-place
3485 if self
.params
.get('force_write_download_archive'):
3486 info_dict
['__write_download_archive'] = True
3487 check_max_downloads()
3489 def __download_wrapper(self
, func
):
3490 @functools.wraps(func
)
3491 def wrapper(*args
, **kwargs
):
3493 res
= func(*args
, **kwargs
)
3494 except UnavailableVideoError
as e
:
3495 self
.report_error(e
)
3496 except DownloadCancelled
as e
:
3497 self
.to_screen(f
'[info] {e}')
3498 if not self
.params
.get('break_per_url'):
3500 self
._num
_downloads
= 0
3502 if self
.params
.get('dump_single_json', False):
3503 self
.post_extract(res
)
3504 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3507 def download(self
, url_list
):
3508 """Download a given list of URLs."""
3509 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3510 outtmpl
= self
.params
['outtmpl']['default']
3511 if (len(url_list
) > 1
3513 and '%' not in outtmpl
3514 and self
.params
.get('max_downloads') != 1):
3515 raise SameFileError(outtmpl
)
3517 for url
in url_list
:
3518 self
.__download
_wrapper
(self
.extract_info
)(
3519 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3521 return self
._download
_retcode
3523 def download_with_info_file(self
, info_filename
):
3524 with contextlib
.closing(fileinput
.FileInput(
3525 [info_filename
], mode
='r',
3526 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3527 # FileInput doesn't have a read method, we can't call json.load
3528 infos
= [self
.sanitize_info(info
, self
.params
.get('clean_infojson', True))
3529 for info
in variadic(json
.loads('\n'.join(f
)))]
3532 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3533 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3534 if not isinstance(e
, EntryNotInPlaylist
):
3535 self
.to_stderr('\r')
3536 webpage_url
= info
.get('webpage_url')
3537 if webpage_url
is None:
3539 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3540 self
.download([webpage_url
])
3541 return self
._download
_retcode
3544 def sanitize_info(info_dict
, remove_private_keys
=False):
3545 ''' Sanitize the infodict for converting to json '''
3546 if info_dict
is None:
3548 info_dict
.setdefault('epoch', int(time
.time()))
3549 info_dict
.setdefault('_type', 'video')
3550 info_dict
.setdefault('_version', {
3551 'version': __version__
,
3552 'current_git_head': current_git_head(),
3553 'release_git_head': RELEASE_GIT_HEAD
,
3554 'repository': REPOSITORY
,
3557 if remove_private_keys
:
3558 reject
= lambda k
, v
: v
is None or k
.startswith('__') or k
in {
3559 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3560 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3561 'playlist_autonumber', '_format_sort_fields',
3564 reject
= lambda k
, v
: False
3567 if isinstance(obj
, dict):
3568 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3569 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3570 return list(map(filter_fn
, obj
))
3571 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3576 return filter_fn(info_dict
)
3579 def filter_requested_info(info_dict
, actually_filter
=True):
3580 ''' Alias of sanitize_info for backward compatibility '''
3581 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3583 def _delete_downloaded_files(self
, *files_to_delete
, info
={}, msg
=None):
3584 for filename
in set(filter(None, files_to_delete
)):
3586 self
.to_screen(msg
% filename
)
3590 self
.report_warning(f
'Unable to delete file {filename}')
3591 if filename
in info
.get('__files_to_move', []): # NB: Delete even if None
3592 del info
['__files_to_move'][filename
]
3595 def post_extract(info_dict
):
3596 def actual_post_extract(info_dict
):
3597 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3598 for video_dict
in info_dict
.get('entries', {}):
3599 actual_post_extract(video_dict
or {})
3602 post_extractor
= info_dict
.pop('__post_extractor', None) or (lambda: {})
3603 info_dict
.update(post_extractor())
3605 actual_post_extract(info_dict
or {})
3607 def run_pp(self
, pp
, infodict
):
3608 files_to_delete
= []
3609 if '__files_to_move' not in infodict
:
3610 infodict
['__files_to_move'] = {}
3612 files_to_delete
, infodict
= pp
.run(infodict
)
3613 except PostProcessingError
as e
:
3614 # Must be True and not 'only_download'
3615 if self
.params
.get('ignoreerrors') is True:
3616 self
.report_error(e
)
3620 if not files_to_delete
:
3622 if self
.params
.get('keepvideo', False):
3623 for f
in files_to_delete
:
3624 infodict
['__files_to_move'].setdefault(f
, '')
3626 self
._delete
_downloaded
_files
(
3627 *files_to_delete
, info
=infodict
, msg
='Deleting original file %s (pass -k to keep)')
3630 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3632 self
._forceprint
(key
, info
)
3633 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3634 info
= self
.run_pp(pp
, info
)
3637 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3638 info
= dict(ie_info
)
3639 info
['__files_to_move'] = files_to_move
or {}
3641 info
= self
.run_all_pps(key
, info
)
3642 except PostProcessingError
as err
:
3643 msg
= f
'Preprocessing: {err}'
3644 info
.setdefault('__pending_error', msg
)
3645 self
.report_error(msg
, is_error
=False)
3646 return info
, info
.pop('__files_to_move', None)
3648 def post_process(self
, filename
, info
, files_to_move
=None):
3649 """Run all the postprocessors on the given file."""
3650 info
['filepath'] = filename
3651 info
['__files_to_move'] = files_to_move
or {}
3652 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3653 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3654 del info
['__files_to_move']
3655 return self
.run_all_pps('after_move', info
)
3657 def _make_archive_id(self
, info_dict
):
3658 video_id
= info_dict
.get('id')
3661 # Future-proof against any change in case
3662 # and backwards compatibility with prior versions
3663 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3664 if extractor
is None:
3665 url
= str_or_none(info_dict
.get('url'))
3668 # Try to find matching extractor for the URL and take its ie_key
3669 for ie_key
, ie
in self
._ies
.items():
3670 if ie
.suitable(url
):
3675 return make_archive_id(extractor
, video_id
)
3677 def in_download_archive(self
, info_dict
):
3678 if not self
.archive
:
3681 vid_ids
= [self
._make
_archive
_id
(info_dict
)]
3682 vid_ids
.extend(info_dict
.get('_old_archive_ids') or [])
3683 return any(id_
in self
.archive
for id_
in vid_ids
)
3685 def record_download_archive(self
, info_dict
):
3686 fn
= self
.params
.get('download_archive')
3689 vid_id
= self
._make
_archive
_id
(info_dict
)
3692 self
.write_debug(f
'Adding to archive: {vid_id}')
3693 if is_path_like(fn
):
3694 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3695 archive_file
.write(vid_id
+ '\n')
3696 self
.archive
.add(vid_id
)
3699 def format_resolution(format
, default
='unknown'):
3700 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3702 if format
.get('resolution') is not None:
3703 return format
['resolution']
3704 if format
.get('width') and format
.get('height'):
3705 return '%dx%d' % (format
['width'], format
['height'])
3706 elif format
.get('height'):
3707 return '%sp' % format
['height']
3708 elif format
.get('width'):
3709 return '%dx?' % format
['width']
3712 def _list_format_headers(self
, *headers
):
3713 if self
.params
.get('listformats_table', True) is not False:
3714 return [self
._format
_out
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3717 def _format_note(self
, fdict
):
3719 if fdict
.get('ext') in ['f4f', 'f4m']:
3720 res
+= '(unsupported)'
3721 if fdict
.get('language'):
3724 res
+= '[%s]' % fdict
['language']
3725 if fdict
.get('format_note') is not None:
3728 res
+= fdict
['format_note']
3729 if fdict
.get('tbr') is not None:
3732 res
+= '%4dk' % fdict
['tbr']
3733 if fdict
.get('container') is not None:
3736 res
+= '%s container' % fdict
['container']
3737 if (fdict
.get('vcodec') is not None
3738 and fdict
.get('vcodec') != 'none'):
3741 res
+= fdict
['vcodec']
3742 if fdict
.get('vbr') is not None:
3744 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3746 if fdict
.get('vbr') is not None:
3747 res
+= '%4dk' % fdict
['vbr']
3748 if fdict
.get('fps') is not None:
3751 res
+= '%sfps' % fdict
['fps']
3752 if fdict
.get('acodec') is not None:
3755 if fdict
['acodec'] == 'none':
3758 res
+= '%-5s' % fdict
['acodec']
3759 elif fdict
.get('abr') is not None:
3763 if fdict
.get('abr') is not None:
3764 res
+= '@%3dk' % fdict
['abr']
3765 if fdict
.get('asr') is not None:
3766 res
+= ' (%5dHz)' % fdict
['asr']
3767 if fdict
.get('filesize') is not None:
3770 res
+= format_bytes(fdict
['filesize'])
3771 elif fdict
.get('filesize_approx') is not None:
3774 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3777 def _get_formats(self
, info_dict
):
3778 if info_dict
.get('formats') is None:
3779 if info_dict
.get('url') and info_dict
.get('_type', 'video') == 'video':
3782 return info_dict
['formats']
3784 def render_formats_table(self
, info_dict
):
3785 formats
= self
._get
_formats
(info_dict
)
3788 if not self
.params
.get('listformats_table', True) is not False:
3791 format_field(f
, 'format_id'),
3792 format_field(f
, 'ext'),
3793 self
.format_resolution(f
),
3794 self
._format
_note
(f
)
3795 ] for f
in formats
if (f
.get('preference') or 0) >= -1000]
3796 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3798 def simplified_codec(f
, field
):
3799 assert field
in ('acodec', 'vcodec')
3800 codec
= f
.get(field
)
3803 elif codec
!= 'none':
3804 return '.'.join(codec
.split('.')[:4])
3806 if field
== 'vcodec' and f
.get('acodec') == 'none':
3808 elif field
== 'acodec' and f
.get('vcodec') == 'none':
3810 return self
._format
_out
('audio only' if field
== 'vcodec' else 'video only',
3811 self
.Styles
.SUPPRESS
)
3813 delim
= self
._format
_out
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3816 self
._format
_out
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3817 format_field(f
, 'ext'),
3818 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3819 format_field(f
, 'fps', '\t%d', func
=round),
3820 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3821 format_field(f
, 'audio_channels', '\t%s'),
3823 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
)
3824 or format_field(f
, 'filesize_approx', '≈\t%s', func
=format_bytes
)
3825 or format_field(try_call(lambda: format_bytes(int(info_dict
['duration'] * f
['tbr'] * (1024 / 8)))),
3826 None, self
._format
_out
('~\t%s', self
.Styles
.SUPPRESS
))),
3827 format_field(f
, 'tbr', '\t%dk', func
=round),
3828 shorten_protocol_name(f
.get('protocol', '')),
3830 simplified_codec(f
, 'vcodec'),
3831 format_field(f
, 'vbr', '\t%dk', func
=round),
3832 simplified_codec(f
, 'acodec'),
3833 format_field(f
, 'abr', '\t%dk', func
=round),
3834 format_field(f
, 'asr', '\t%s', func
=format_decimal_suffix
),
3835 join_nonempty(format_field(f
, 'language', '[%s]'), join_nonempty(
3836 self
._format
_out
('UNSUPPORTED', self
.Styles
.BAD_FORMAT
) if f
.get('ext') in ('f4f', 'f4m') else None,
3837 (self
._format
_out
('Maybe DRM', self
.Styles
.WARNING
) if f
.get('has_drm') == 'maybe'
3838 else self
._format
_out
('DRM', self
.Styles
.BAD_FORMAT
) if f
.get('has_drm') else None),
3839 format_field(f
, 'format_note'),
3840 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3841 delim
=', '), delim
=' '),
3842 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3843 header_line
= self
._list
_format
_headers
(
3844 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3845 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3847 return render_table(
3848 header_line
, table
, hide_empty
=True,
3849 delim
=self
._format
_out
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3851 def render_thumbnails_table(self
, info_dict
):
3852 thumbnails
= list(info_dict
.get('thumbnails') or [])
3855 return render_table(
3856 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3857 [[t
.get('id'), t
.get('width') or 'unknown', t
.get('height') or 'unknown', t
['url']] for t
in thumbnails
])
3859 def render_subtitles_table(self
, video_id
, subtitles
):
3860 def _row(lang
, formats
):
3861 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3862 if len(set(names
)) == 1:
3863 names
= [] if names
[0] == 'unknown' else names
[:1]
3864 return [lang
, ', '.join(names
), ', '.join(exts
)]
3868 return render_table(
3869 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3870 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3873 def __list_table(self
, video_id
, name
, func
, *args
):
3876 self
.to_screen(f
'{video_id} has no {name}')
3878 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3879 self
.to_stdout(table
)
3881 def list_formats(self
, info_dict
):
3882 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3884 def list_thumbnails(self
, info_dict
):
3885 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3887 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3888 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3890 def print_debug_header(self
):
3891 if not self
.params
.get('verbose'):
3894 from . import _IN_CLI
# Must be delayed import
3896 # These imports can be slow. So import them only as needed
3897 from .extractor
.extractors
import _LAZY_LOADER
3898 from .extractor
.extractors
import (
3899 _PLUGIN_CLASSES
as plugin_ies
,
3900 _PLUGIN_OVERRIDES
as plugin_ie_overrides
3903 def get_encoding(stream
):
3904 ret
= str(getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__))
3905 additional_info
= []
3906 if os
.environ
.get('TERM', '').lower() == 'dumb':
3907 additional_info
.append('dumb')
3908 if not supports_terminal_sequences(stream
):
3909 from .utils
import WINDOWS_VT_MODE
# Must be imported locally
3910 additional_info
.append('No VT' if WINDOWS_VT_MODE
is False else 'No ANSI')
3912 ret
= f
'{ret} ({",".join(additional_info)})'
3915 encoding_str
= 'Encodings: locale %s, fs %s, pref %s, %s' % (
3916 locale
.getpreferredencoding(),
3917 sys
.getfilesystemencoding(),
3918 self
.get_encoding(),
3920 f
'{key} {get_encoding(stream)}' for key
, stream
in self
._out
_files
.items_
3921 if stream
is not None and key
!= 'console')
3924 logger
= self
.params
.get('logger')
3926 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3927 write_debug(encoding_str
)
3929 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
3930 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3932 source
= detect_variant()
3933 if VARIANT
not in (None, 'pip'):
3936 write_debug(join_nonempty(
3937 f
'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3938 f
'{CHANNEL}@{__version__}',
3939 f
'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD
else '',
3940 '' if source
== 'unknown' else f
'({source})',
3941 '' if _IN_CLI
else 'API' if klass
== YoutubeDL
else f
'API:{self.__module__}.{klass.__qualname__}',
3945 write_debug(f
'params: {self.params}')
3947 if not _LAZY_LOADER
:
3948 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3949 write_debug('Lazy loading extractors is forcibly disabled')
3951 write_debug('Lazy loading extractors is disabled')
3952 if self
.params
['compat_opts']:
3953 write_debug('Compatibility options: %s' % ', '.join(self
.params
['compat_opts']))
3955 if current_git_head():
3956 write_debug(f
'Git HEAD: {current_git_head()}')
3957 write_debug(system_identifier())
3959 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
3960 ffmpeg_features
= {key for key, val in ffmpeg_features.items() if val}
3962 exe_versions
['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features
))
3964 exe_versions
['rtmpdump'] = rtmpdump_version()
3965 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3966 exe_str
= ', '.join(
3967 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3969 write_debug('exe versions: %s' % exe_str
)
3971 from .compat
.compat_utils
import get_package_info
3972 from .dependencies
import available_dependencies
3974 write_debug('Optional libraries: %s' % (', '.join(sorted({
3975 join_nonempty(*get_package_info(m
)) for m
in available_dependencies
.values()
3978 write_debug(f
'Proxy map: {self.proxies}')
3979 # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
3980 for plugin_type
, plugins
in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}
.items():
3981 display_list
= ['%s%s' % (
3982 klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3983 for name
, klass
in plugins
.items()]
3984 if plugin_type
== 'Extractor':
3985 display_list
.extend(f
'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3986 for parent
, plugins
in plugin_ie_overrides
.items())
3987 if not display_list
:
3989 write_debug(f
'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3991 plugin_dirs
= plugin_directories()
3993 write_debug(f
'Plugin directories: {plugin_dirs}')
3996 if False and self
.params
.get('call_home'):
3997 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode()
3998 write_debug('Public IP address: %s' % ipaddr
)
3999 latest_version
= self
.urlopen(
4000 'https://yt-dl.org/latest/version').read().decode()
4001 if version_tuple(latest_version
) > version_tuple(__version__
):
4002 self
.report_warning(
4003 'You are using an outdated version (newest version: %s)! '
4004 'See https://yt-dl.org/update if you need help updating.' %
4007 @functools.cached_property
4009 """Global proxy configuration"""
4010 opts_proxy
= self
.params
.get('proxy')
4011 if opts_proxy
is not None:
4012 if opts_proxy
== '':
4013 opts_proxy
= '__noproxy__'
4014 proxies
= {'all': opts_proxy}
4016 proxies
= urllib
.request
.getproxies()
4017 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4018 if 'http' in proxies
and 'https' not in proxies
:
4019 proxies
['https'] = proxies
['http']
4023 @functools.cached_property
4024 def cookiejar(self
):
4025 """Global cookiejar instance"""
4026 return load_cookies(
4027 self
.params
.get('cookiefile'), self
.params
.get('cookiesfrombrowser'), self
)
4032 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4034 self
.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4035 handler
= self
._request
_director
.handlers
['Urllib']
4036 return handler
._get
_instance
(cookiejar
=self
.cookiejar
, proxies
=self
.proxies
)
4038 def urlopen(self
, req
):
4039 """ Start an HTTP download """
4040 if isinstance(req
, str):
4042 elif isinstance(req
, urllib
.request
.Request
):
4043 self
.deprecation_warning(
4044 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4045 'Use yt_dlp.networking.common.Request instead.')
4046 req
= urllib_req_to_req(req
)
4047 assert isinstance(req
, Request
)
4049 # compat: Assume user:pass url params are basic auth
4050 url
, basic_auth_header
= extract_basic_auth(req
.url
)
4051 if basic_auth_header
:
4052 req
.headers
['Authorization'] = basic_auth_header
4053 req
.url
= sanitize_url(url
)
4055 clean_proxies(proxies
=req
.proxies
, headers
=req
.headers
)
4056 clean_headers(req
.headers
)
4059 return self
._request
_director
.send(req
)
4060 except NoSupportingHandlers
as e
:
4061 for ue
in e
.unsupported_errors
:
4062 if not (ue
.handler
and ue
.msg
):
4064 if ue
.handler
.RH_KEY
== 'Urllib' and 'unsupported url scheme: "file"' in ue
.msg
.lower():
4066 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4067 'Use --enable-file-urls to enable at your own risk.', cause
=ue
) from ue
4069 except SSLError
as e
:
4070 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e
):
4071 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause
=e
) from e
4072 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e
):
4074 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4075 'Try using --legacy-server-connect', cause
=e
) from e
4077 except HTTPError
as e
: # TODO: Remove in a future release
4078 raise _CompatHTTPError(e
) from e
4080 def build_request_director(self
, handlers
, preferences
=None):
4081 logger
= _YDLLogger(self
)
4082 headers
= self
.params
['http_headers'].copy()
4083 proxies
= self
.proxies
.copy()
4084 clean_headers(headers
)
4085 clean_proxies(proxies
, headers
)
4087 director
= RequestDirector(logger
=logger
, verbose
=self
.params
.get('debug_printtraffic'))
4088 for handler
in handlers
:
4089 director
.add_handler(handler(
4092 cookiejar
=self
.cookiejar
,
4094 prefer_system_certs
='no-certifi' in self
.params
['compat_opts'],
4095 verify
=not self
.params
.get('nocheckcertificate'),
4096 **traverse_obj(self
.params
, {
4097 'verbose': 'debug_printtraffic',
4098 'source_address': 'source_address',
4099 'timeout': 'socket_timeout',
4100 'legacy_ssl_support': 'legacyserverconnect',
4101 'enable_file_urls': 'enable_file_urls',
4103 'client_certificate': 'client_certificate',
4104 'client_certificate_key': 'client_certificate_key',
4105 'client_certificate_password': 'client_certificate_password',
4109 director
.preferences
.update(preferences
or [])
4112 def encode(self
, s
):
4113 if isinstance(s
, bytes):
4114 return s
# Already encoded
4117 return s
.encode(self
.get_encoding())
4118 except UnicodeEncodeError as err
:
4119 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
4122 def get_encoding(self
):
4123 encoding
= self
.params
.get('encoding')
4124 if encoding
is None:
4125 encoding
= preferredencoding()
4128 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
4129 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4130 if overwrite
is None:
4131 overwrite
= self
.params
.get('overwrites', True)
4132 if not self
.params
.get('writeinfojson'):
4135 self
.write_debug(f
'Skipping writing {label} infojson')
4137 elif not self
._ensure
_dir
_exists
(infofn
):
4139 elif not overwrite
and os
.path
.exists(infofn
):
4140 self
.to_screen(f
'[info] {label.title()} metadata is already present')
4143 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
4145 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
4148 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
4151 def _write_description(self
, label
, ie_result
, descfn
):
4152 ''' Write description and returns True = written, False = skip, None = error '''
4153 if not self
.params
.get('writedescription'):
4156 self
.write_debug(f
'Skipping writing {label} description')
4158 elif not self
._ensure
_dir
_exists
(descfn
):
4160 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
4161 self
.to_screen(f
'[info] {label.title()} description is already present')
4162 elif ie_result
.get('description') is None:
4163 self
.to_screen(f
'[info] There\'s no {label} description to write')
4167 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
4168 with open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
4169 descfile
.write(ie_result
['description'])
4171 self
.report_error(f
'Cannot write {label} description file {descfn}')
4175 def _write_subtitles(self
, info_dict
, filename
):
4176 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4178 subtitles
= info_dict
.get('requested_subtitles')
4179 if not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
4180 # subtitles download errors are already managed as troubles in relevant IE
4181 # that way it will silently go on when used with unsupporting IE
4184 self
.to_screen('[info] There are no subtitles for the requested languages')
4186 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
4187 if not sub_filename_base
:
4188 self
.to_screen('[info] Skipping writing video subtitles')
4191 for sub_lang
, sub_info
in subtitles
.items():
4192 sub_format
= sub_info
['ext']
4193 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
4194 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
4195 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
4197 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4198 sub_info
['filepath'] = existing_sub
4199 ret
.append((existing_sub
, sub_filename_final
))
4202 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
4203 if sub_info
.get('data') is not None:
4205 # Use newline='' to prevent conversion of newline characters
4206 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4207 with open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
4208 subfile
.write(sub_info
['data'])
4209 sub_info
['filepath'] = sub_filename
4210 ret
.append((sub_filename
, sub_filename_final
))
4213 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
4217 sub_copy
= sub_info
.copy()
4218 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
4219 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
4220 sub_info
['filepath'] = sub_filename
4221 ret
.append((sub_filename
, sub_filename_final
))
4222 except (DownloadError
, ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
4223 msg
= f
'Unable to download video subtitles for {sub_lang!r}: {err}'
4224 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
4225 if not self
.params
.get('ignoreerrors'):
4226 self
.report_error(msg
)
4227 raise DownloadError(msg
)
4228 self
.report_warning(msg
)
4231 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
4232 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4233 write_all
= self
.params
.get('write_all_thumbnails', False)
4234 thumbnails
, ret
= [], []
4235 if write_all
or self
.params
.get('writethumbnail', False):
4236 thumbnails
= info_dict
.get('thumbnails') or []
4238 self
.to_screen(f
'[info] There are no {label} thumbnails to download')
4240 multiple
= write_all
and len(thumbnails
) > 1
4242 if thumb_filename_base
is None:
4243 thumb_filename_base
= filename
4244 if thumbnails
and not thumb_filename_base
:
4245 self
.write_debug(f
'Skipping writing {label} thumbnail')
4248 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
4249 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
4250 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
4251 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
4252 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
4254 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
4256 self
.to_screen('[info] %s is already present' % (
4257 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize())
4258 t
['filepath'] = existing_thumb
4259 ret
.append((existing_thumb
, thumb_filename_final
))
4261 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
4263 uf
= self
.urlopen(Request(t
['url'], headers
=t
.get('http_headers', {})))
4264 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
4265 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
4266 shutil
.copyfileobj(uf
, thumbf
)
4267 ret
.append((thumb_filename
, thumb_filename_final
))
4268 t
['filepath'] = thumb_filename
4269 except network_exceptions
as err
:
4270 if isinstance(err
, HTTPError
) and err
.status
== 404:
4271 self
.to_screen(f
'[info] {thumb_display_id.title()} does not exist')
4273 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
4275 if ret
and not write_all
: