26 from .cache
import Cache
27 from .compat
import functools
, urllib
# isort: split
28 from .compat
import compat_os_name
, compat_shlex_quote
, urllib_req_to_req
29 from .cookies
import LenientSimpleCookie
, load_cookies
30 from .downloader
import FFmpegFD
, get_suitable_downloader
, shorten_protocol_name
31 from .downloader
.rtmp
import rtmpdump_version
32 from .extractor
import gen_extractor_classes
, get_info_extractor
33 from .extractor
.common
import UnsupportedURLIE
34 from .extractor
.openload
import PhantomJSwrapper
35 from .minicurses
import format_text
36 from .networking
import HEADRequest
, Request
, RequestDirector
37 from .networking
.common
import _REQUEST_HANDLERS
38 from .networking
.exceptions
import (
46 from .plugins
import directories
as plugin_directories
47 from .postprocessor
import _PLUGIN_CLASSES
as plugin_pps
48 from .postprocessor
import (
50 FFmpegFixupDuplicateMoovPP
,
51 FFmpegFixupDurationPP
,
54 FFmpegFixupStretchedPP
,
55 FFmpegFixupTimestampPP
,
58 FFmpegVideoConvertorPP
,
59 MoveFilesAfterDownloadPP
,
62 from .postprocessor
.ffmpeg
import resolve_mapping
as resolve_recode_mapping
63 from .update
import REPOSITORY
, current_git_head
, detect_variant
95 UnavailableVideoError
,
113 format_decimal_suffix
,
127 orderedSet_from_options
,
131 remove_terminal_sequences
,
140 supports_terminal_sequences
,
150 windows_enable_vt_mode
,
154 from .utils
._utils
import _YDLLogger
155 from .utils
.networking
import (
161 from .version
import CHANNEL
, RELEASE_GIT_HEAD
, VARIANT
, __version__
163 if compat_os_name
== 'nt':
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
202 netrc_cmd: Use a shell command to get credentials
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
205 no_warnings: Do not print out anything for warnings.
206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
209 For compatibility, a single list is also accepted
210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
212 forcejson: Force printing info_dict as JSON.
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
219 format: Video format code. see "FORMAT SELECTION" for more details.
220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
229 format_sort_force: Force the given format_sort. see "Sorting Formats"
231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
233 allow_multiple_video_streams: Allow multiple video streams to be merged
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
237 check_formats Whether to test if the formats are downloadable.
238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
240 or None (check only if requested by extractor)
241 paths: Dictionary of output paths. The allowed keys are 'home'
242 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
243 outtmpl: Dictionary of templates for output names. Allowed keys
244 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
245 For compatibility with youtube-dl, a single string can also be used
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
249 windowsfilenames: Force the filenames to be windows compatible
250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
255 allowed_extractors: List of regexes to match against extractor names that are allowed
256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
259 For compatibility with youtube-dl,
260 "nooverwrites" may also be used instead
261 playlist_items: Specific indices of playlist to download.
262 playlistrandom: Download playlist items in random order.
263 lazy_playlist: Process playlist entries as they are received.
264 matchtitle: Download only matching titles.
265 rejecttitle: Reject downloads for matching titles.
266 logger: Log messages to a logging.Logger instance.
267 logtostderr: Print everything to stderr instead of stdout.
268 consoletitle: Display progress in console window's titlebar.
269 writedescription: Write the video description to a .description file
270 writeinfojson: Write the video description to a .info.json file
271 clean_infojson: Remove internal metadata from the infojson
272 getcomments: Extract video comments. This will not be written to disk
273 unless writeinfojson is also given
274 writeannotations: Write the video annotations to a .annotations.xml file
275 writethumbnail: Write the thumbnail image to a file
276 allow_playlist_files: Whether to write playlists' description, infojson etc
277 also to disk when using the 'write*' options
278 write_all_thumbnails: Write all thumbnail formats to files
279 writelink: Write an internet shortcut file, depending on the
280 current platform (.url/.webloc/.desktop)
281 writeurllink: Write a Windows internet shortcut file (.url)
282 writewebloclink: Write a macOS internet shortcut file (.webloc)
283 writedesktoplink: Write a Linux internet shortcut file (.desktop)
284 writesubtitles: Write the video subtitles to a file
285 writeautomaticsub: Write the automatically generated subtitles to a file
286 listsubtitles: Lists all available subtitles for the video
287 subtitlesformat: The format code for subtitles
288 subtitleslangs: List of languages of the subtitles to download (can be regex).
289 The list may contain "all" to refer to all the available
290 subtitles. The language can be prefixed with a "-" to
291 exclude it from the requested languages, e.g. ['all', '-live_chat']
292 keepvideo: Keep the video file after post-processing
293 daterange: A utils.DateRange object, download only if the upload_date is in the range.
294 skip_download: Skip the actual download of the video file
295 cachedir: Location of the cache files in the filesystem.
296 False to disable filesystem cache.
297 noplaylist: Download single video instead of a playlist if in doubt.
298 age_limit: An integer representing the user's age in years.
299 Unsuitable videos for the given age are skipped.
300 min_views: An integer representing the minimum view count the video
301 must have in order to not be skipped.
302 Videos without view count information are always
303 downloaded. None for no limit.
304 max_views: An integer representing the maximum view count.
305 Videos that are more popular than that are not
307 Videos without view count information are always
308 downloaded. None for no limit.
309 download_archive: A set, or the name of a file where all downloads are recorded.
310 Videos already present in the file are not downloaded again.
311 break_on_existing: Stop the download process after attempting to download a
312 file that is in the archive.
313 break_per_url: Whether break_on_reject and break_on_existing
314 should act on each input URL as opposed to for the entire queue
315 cookiefile: File name or text stream from where cookies should be read and dumped to
316 cookiesfrombrowser: A tuple containing the name of the browser, the profile
317 name/path from where cookies are loaded, the name of the keyring,
318 and the container name, e.g. ('chrome', ) or
319 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
320 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
321 support RFC 5746 secure renegotiation
322 nocheckcertificate: Do not verify SSL certificates
323 client_certificate: Path to client certificate file in PEM format. May include the private key
324 client_certificate_key: Path to private key file for client certificate
325 client_certificate_password: Password for client certificate private key, if encrypted.
326 If not provided and the key is encrypted, yt-dlp will ask interactively
327 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
328 (Only supported by some extractors)
329 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
330 http_headers: A dictionary of custom headers to be used for all requests
331 proxy: URL of the proxy server to use
332 geo_verification_proxy: URL of the proxy to use for IP address verification
333 on geo-restricted sites.
334 socket_timeout: Time to wait for unresponsive hosts, in seconds
335 bidi_workaround: Work around buggy terminals without bidirectional text
336 support, using fridibi
337 debug_printtraffic:Print out sent and received HTTP traffic
338 default_search: Prepend this string if an input url is not valid.
339 'auto' for elaborate guessing
340 encoding: Use this encoding instead of the system-specified.
341 extract_flat: Whether to resolve and process url_results further
342 * False: Always process. Default for API
343 * True: Never process
344 * 'in_playlist': Do not process inside playlist/multi_video
345 * 'discard': Always process, but don't return the result
346 from inside playlist/multi_video
347 * 'discard_in_playlist': Same as "discard", but only for
348 playlists (not multi_video). Default for CLI
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 progress_hooks: A list of functions that get called on download
359 progress, with a dictionary with the entries
360 * status: One of "downloading", "error", or "finished".
361 Check this first and ignore unknown values.
362 * info_dict: The extracted info_dict
364 If status is one of "downloading", or "finished", the
365 following properties may also be present:
366 * filename: The final filename (always present)
367 * tmpfilename: The filename we're currently writing to
368 * downloaded_bytes: Bytes on disk
369 * total_bytes: Size of the whole file, None if unknown
370 * total_bytes_estimate: Guess of the eventual file size,
372 * elapsed: The number of seconds since download started.
373 * eta: The estimated time in seconds, None if unknown
374 * speed: The download speed in bytes/second, None if
376 * fragment_index: The counter of the currently
377 downloaded video fragment.
378 * fragment_count: The number of fragments (= individual
379 files that will be merged)
381 Progress hooks are guaranteed to be called at least once
382 (with status "finished") if the download is successful.
383 postprocessor_hooks: A list of functions that get called on postprocessing
384 progress, with a dictionary with the entries
385 * status: One of "started", "processing", or "finished".
386 Check this first and ignore unknown values.
387 * postprocessor: Name of the postprocessor
388 * info_dict: The extracted info_dict
390 Progress hooks are guaranteed to be called at least twice
391 (with status "started" and "finished") if the processing is successful.
392 merge_output_format: "/" separated list of extensions to use when merging formats.
393 final_ext: Expected final extension; used to detect when the file was
394 already downloaded and converted
395 fixup: Automatically correct known faults of the file.
397 - "never": do nothing
398 - "warn": only emit a warning
399 - "detect_or_warn": check whether we can do anything
400 about it, warn otherwise (default)
401 source_address: Client-side IP address to bind to.
402 sleep_interval_requests: Number of seconds to sleep between requests
404 sleep_interval: Number of seconds to sleep before each download when
405 used alone or a lower bound of a range for randomized
406 sleep before each download (minimum possible number
407 of seconds to sleep) when used along with
409 max_sleep_interval:Upper bound of a range for randomized sleep before each
410 download (maximum possible number of seconds to sleep).
411 Must only be used along with sleep_interval.
412 Actual sleep time will be a random float from range
413 [sleep_interval; max_sleep_interval].
414 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
415 listformats: Print an overview of available video formats and exit.
416 list_thumbnails: Print a table of all thumbnails and exit.
417 match_filter: A function that gets called for every video with the signature
418 (info_dict, *, incomplete: bool) -> Optional[str]
419 For backward compatibility with youtube-dl, the signature
420 (info_dict) -> Optional[str] is also allowed.
421 - If it returns a message, the video is ignored.
422 - If it returns None, the video is downloaded.
423 - If it returns utils.NO_DEFAULT, the user is interactively
424 asked whether to download the video.
425 - Raise utils.DownloadCancelled(msg) to abort remaining
426 downloads when a video is rejected.
427 match_filter_func in utils.py is one example for this.
428 color: A Dictionary with output stream names as keys
429 and their respective color policy as values.
430 Can also just be a single color policy,
431 in which case it applies to all outputs.
432 Valid stream names are 'stdout' and 'stderr'.
433 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
434 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
437 Two-letter ISO 3166-2 country code that will be used for
438 explicit geographic restriction bypassing via faking
439 X-Forwarded-For HTTP header
441 IP range in CIDR notation that will be used similarly to
443 external_downloader: A dictionary of protocol keys and the executable of the
444 external downloader to use for it. The allowed protocols
445 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
446 Set the value to 'native' to use the native downloader
447 compat_opts: Compatibility options. See "Differences in default behavior".
448 The following options do not work when used through the API:
449 filename, abort-on-error, multistreams, no-live-chat, format-sort
450 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
451 Refer __init__.py for their implementation
452 progress_template: Dictionary of templates for progress outputs.
453 Allowed keys are 'download', 'postprocess',
454 'download-title' (console title) and 'postprocess-title'.
455 The template is mapped on a dictionary with keys 'progress' and 'info'
456 retry_sleep_functions: Dictionary of functions that takes the number of attempts
457 as argument and returns the time to sleep in seconds.
458 Allowed keys are 'http', 'fragment', 'file_access'
459 download_ranges: A callback function that gets called for every video with
460 the signature (info_dict, ydl) -> Iterable[Section].
461 Only the returned sections will be downloaded.
462 Each Section is a dict with the following keys:
463 * start_time: Start time of the section in seconds
464 * end_time: End time of the section in seconds
465 * title: Section title (Optional)
466 * index: Section number (Optional)
467 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
468 noprogress: Do not print the progress bar
469 live_from_start: Whether to download livestreams videos from the start
471 The following parameters are not used by YoutubeDL itself, they are used by
472 the downloader (see yt_dlp/downloader/common.py):
473 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
474 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
475 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
476 external_downloader_args, concurrent_fragment_downloads.
478 The following options are used by the post processors:
479 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
480 to the binary or its containing directory.
481 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
482 and a list of additional command-line arguments for the
483 postprocessor/executable. The dict can also have "PP+EXE" keys
484 which are used when the given exe is used by the given PP.
485 Use 'default' as the name for arguments to passed to all PP
486 For compatibility with youtube-dl, a single list of args
489 The following options are used by the extractors:
490 extractor_retries: Number of times to retry for known errors (default: 3)
491 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
492 hls_split_discontinuity: Split HLS playlists to different formats at
493 discontinuities such as ad breaks (default: False)
494 extractor_args: A dictionary of arguments to be passed to the extractors.
495 See "EXTRACTOR ARGUMENTS" for details.
496 E.g. {'youtube': {'skip': ['dash', 'hls']}}
497 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
499 The following options are deprecated and may be removed in the future:
501 break_on_reject: Stop the download process when encountering a video that
502 has been filtered out.
503 - `raise DownloadCancelled(msg)` in match_filter instead
504 force_generic_extractor: Force downloader to use the generic extractor
505 - Use allowed_extractors = ['generic', 'default']
506 playliststart: - Use playlist_items
507 Playlist item to start at.
508 playlistend: - Use playlist_items
509 Playlist item to end at.
510 playlistreverse: - Use playlist_items
511 Download playlist items in reverse order.
512 forceurl: - Use forceprint
513 Force printing final URL.
514 forcetitle: - Use forceprint
515 Force printing title.
516 forceid: - Use forceprint
518 forcethumbnail: - Use forceprint
519 Force printing thumbnail URL.
520 forcedescription: - Use forceprint
521 Force printing description.
522 forcefilename: - Use forceprint
523 Force printing final filename.
524 forceduration: - Use forceprint
525 Force printing duration.
526 allsubtitles: - Use subtitleslangs = ['all']
527 Downloads all the subtitles of the video
528 (requires writesubtitles or writeautomaticsub)
529 include_ads: - Doesn't work
531 call_home: - Not implemented
532 Boolean, true iff we are allowed to contact the
533 yt-dlp servers for debugging.
534 post_hooks: - Register a custom postprocessor
535 A list of functions that get called as the final step
536 for each video file, after all postprocessors have been
537 called. The filename will be passed as the only argument.
538 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
539 Use the native HLS downloader instead of ffmpeg/avconv
540 if True, otherwise use ffmpeg/avconv if False, otherwise
541 use downloader suggested by extractor if None.
542 prefer_ffmpeg: - avconv support is deprecated
543 If False, use avconv instead of ffmpeg if both are available,
544 otherwise prefer ffmpeg.
545 youtube_include_dash_manifest: - Use extractor_args
546 If True (default), DASH manifests and related
547 data will be downloaded and processed by extractor.
548 You can reduce network I/O by disabling it if you don't
549 care about DASH. (only for youtube)
550 youtube_include_hls_manifest: - Use extractor_args
551 If True (default), HLS manifests and related
552 data will be downloaded and processed by extractor.
553 You can reduce network I/O by disabling it if you don't
554 care about HLS. (only for youtube)
555 no_color: Same as `color='no_color'`
559 'width', 'height', 'asr', 'audio_channels', 'fps',
560 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
561 'timestamp', 'release_timestamp',
562 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
563 'average_rating', 'comment_count', 'age_limit',
564 'start_time', 'end_time',
565 'chapter_number', 'season_number', 'episode_number',
566 'track_number', 'disc_number', 'release_year',
570 # NB: Keep in sync with the docstring of extractor/common.py
571 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
572 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
573 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
574 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
575 'preference', 'language', 'language_preference', 'quality', 'source_preference',
576 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
577 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
579 _format_selection_exts
= {
580 'audio': set(MEDIA_EXTENSIONS
.common_audio
),
581 'video': set(MEDIA_EXTENSIONS
.common_video
+ ('3gp', )),
582 'storyboards': set(MEDIA_EXTENSIONS
.storyboards
),
585 def __init__(self
, params
=None, auto_init
=True):
586 """Create a FileDownloader object with the given options.
587 @param auto_init Whether to load the default extractors and print header (if verbose).
588 Set to 'no_verbose_header' to not print the header
594 self
._ies
_instances
= {}
595 self
._pps
= {k: [] for k in POSTPROCESS_WHEN}
596 self
._printed
_messages
= set()
597 self
._first
_webpage
_request
= True
598 self
._post
_hooks
= []
599 self
._progress
_hooks
= []
600 self
._postprocessor
_hooks
= []
601 self
._download
_retcode
= 0
602 self
._num
_downloads
= 0
604 self
._playlist
_level
= 0
605 self
._playlist
_urls
= set()
606 self
.cache
= Cache(self
)
608 stdout
= sys
.stderr
if self
.params
.get('logtostderr') else sys
.stdout
609 self
._out
_files
= Namespace(
612 screen
=sys
.stderr
if self
.params
.get('quiet') else stdout
,
613 console
=None if compat_os_name
== 'nt' else next(
614 filter(supports_terminal_sequences
, (sys
.stderr
, sys
.stdout
)), None)
618 windows_enable_vt_mode()
619 except Exception as e
:
620 self
.write_debug(f
'Failed to enable VT mode: {e}')
622 if self
.params
.get('no_color'):
623 if self
.params
.get('color') is not None:
624 self
.report_warning('Overwriting params from "color" with "no_color"')
625 self
.params
['color'] = 'no_color'
627 term_allow_color
= os
.environ
.get('TERM', '').lower() != 'dumb'
629 def process_color_policy(stream
):
630 stream_name
= {sys.stdout: 'stdout', sys.stderr: 'stderr'}
[stream
]
631 policy
= traverse_obj(self
.params
, ('color', (stream_name
, None), {str}
), get_all
=False)
632 if policy
in ('auto', None):
633 return term_allow_color
and supports_terminal_sequences(stream
)
634 assert policy
in ('always', 'never', 'no_color')
635 return {'always': True, 'never': False}
.get(policy
, policy
)
637 self
._allow
_colors
= Namespace(**{
638 name
: process_color_policy(stream
)
639 for name
, stream
in self
._out
_files
.items_
if name
!= 'console'
642 # The code is left like this to be reused for future deprecations
643 MIN_SUPPORTED
, MIN_RECOMMENDED
= (3, 7), (3, 7)
644 current_version
= sys
.version_info
[:2]
645 if current_version
< MIN_RECOMMENDED
:
646 msg
= ('Support for Python version %d.%d has been deprecated. '
647 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
648 '\n You will no longer receive updates on this version')
649 if current_version
< MIN_SUPPORTED
:
650 msg
= 'Python version %d.%d is no longer supported'
651 self
.deprecated_feature(
652 f
'{msg}! Please update to Python %d.%d or above' % (*current_version
, *MIN_RECOMMENDED
))
654 if self
.params
.get('allow_unplayable_formats'):
656 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
657 'This is a developer option intended for debugging. \n'
658 ' If you experience any issues while using this option, '
659 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
661 if self
.params
.get('bidi_workaround', False):
664 master
, slave
= pty
.openpty()
665 width
= shutil
.get_terminal_size().columns
666 width_args
= [] if width
is None else ['-w', str(width
)]
667 sp_kwargs
= {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
669 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
671 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
672 self
._output
_channel
= os
.fdopen(master
, 'rb')
673 except OSError as ose
:
674 if ose
.errno
== errno
.ENOENT
:
676 'Could not find fribidi executable, ignoring --bidi-workaround. '
677 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
681 self
.params
['compat_opts'] = set(self
.params
.get('compat_opts', ()))
682 self
.params
['http_headers'] = HTTPHeaderDict(std_headers
, self
.params
.get('http_headers'))
683 self
._request
_director
= self
.build_request_director(
684 sorted(_REQUEST_HANDLERS
.values(), key
=lambda rh
: rh
.RH_NAME
.lower()))
685 if auto_init
and auto_init
!= 'no_verbose_header':
686 self
.print_debug_header()
688 self
.__header
_cookies
= []
689 self
._load
_cookies
(traverse_obj(self
.params
.get('http_headers'), 'cookie', casesense
=False)) # compat
691 def check_deprecated(param
, option
, suggestion
):
692 if self
.params
.get(param
) is not None:
693 self
.report_warning(f
'{option} is deprecated. Use {suggestion} instead')
697 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
698 if self
.params
.get('geo_verification_proxy') is None:
699 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
701 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
702 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
703 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
705 for msg
in self
.params
.get('_warnings', []):
706 self
.report_warning(msg
)
707 for msg
in self
.params
.get('_deprecation_warnings', []):
708 self
.deprecated_feature(msg
)
710 if 'list-formats' in self
.params
['compat_opts']:
711 self
.params
['listformats_table'] = False
713 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
714 # nooverwrites was unnecessarily changed to overwrites
715 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
716 # This ensures compatibility with both keys
717 self
.params
['overwrites'] = not self
.params
['nooverwrites']
718 elif self
.params
.get('overwrites') is None:
719 self
.params
.pop('overwrites', None)
721 self
.params
['nooverwrites'] = not self
.params
['overwrites']
723 if self
.params
.get('simulate') is None and any((
724 self
.params
.get('list_thumbnails'),
725 self
.params
.get('listformats'),
726 self
.params
.get('listsubtitles'),
728 self
.params
['simulate'] = 'list_only'
730 self
.params
.setdefault('forceprint', {})
731 self
.params
.setdefault('print_to_file', {})
733 # Compatibility with older syntax
734 if not isinstance(params
['forceprint'], dict):
735 self
.params
['forceprint'] = {'video': params['forceprint']}
738 self
.add_default_info_extractors()
740 if (sys
.platform
!= 'win32'
741 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
742 and not self
.params
.get('restrictfilenames', False)):
743 # Unicode filesystem API will throw errors (#1474, #13027)
745 'Assuming --restrict-filenames since file system encoding '
746 'cannot encode all characters. '
747 'Set the LC_ALL environment variable to fix this.')
748 self
.params
['restrictfilenames'] = True
750 self
._parse
_outtmpl
()
752 # Creating format selector here allows us to catch syntax errors before the extraction
753 self
.format_selector
= (
754 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
755 else self
.params
['format'] if callable(self
.params
['format'])
756 else self
.build_format_selector(self
.params
['format']))
759 'post_hooks': self
.add_post_hook
,
760 'progress_hooks': self
.add_progress_hook
,
761 'postprocessor_hooks': self
.add_postprocessor_hook
,
763 for opt
, fn
in hooks
.items():
764 for ph
in self
.params
.get(opt
, []):
767 for pp_def_raw
in self
.params
.get('postprocessors', []):
768 pp_def
= dict(pp_def_raw
)
769 when
= pp_def
.pop('when', 'post_process')
770 self
.add_post_processor(
771 get_postprocessor(pp_def
.pop('key'))(self
, **pp_def
),
774 def preload_download_archive(fn
):
775 """Preload the archive, if any is specified"""
779 elif not is_path_like(fn
):
782 self
.write_debug(f
'Loading archive file {fn!r}')
784 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
785 for line
in archive_file
:
786 archive
.add(line
.strip())
787 except OSError as ioe
:
788 if ioe
.errno
!= errno
.ENOENT
:
792 self
.archive
= preload_download_archive(self
.params
.get('download_archive'))
794 def warn_if_short_id(self
, argv
):
795 # short YouTube ID starting with dash?
797 i
for i
, a
in enumerate(argv
)
798 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
802 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
803 + ['--'] + [argv
[i
] for i
in idxs
]
806 'Long argument string detected. '
807 'Use -- to separate parameters and URLs, like this:\n%s' %
808 args_to_str(correct_argv
))
810 def add_info_extractor(self
, ie
):
811 """Add an InfoExtractor object to the end of the list."""
813 self
._ies
[ie_key
] = ie
814 if not isinstance(ie
, type):
815 self
._ies
_instances
[ie_key
] = ie
816 ie
.set_downloader(self
)
818 def get_info_extractor(self
, ie_key
):
820 Get an instance of an IE with name ie_key, it will try to get one from
821 the _ies list, if there's no instance it will create a new one and add
822 it to the extractor list.
824 ie
= self
._ies
_instances
.get(ie_key
)
826 ie
= get_info_extractor(ie_key
)()
827 self
.add_info_extractor(ie
)
830 def add_default_info_extractors(self
):
832 Add the InfoExtractors returned by gen_extractors to the end of the list
834 all_ies
= {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
835 all_ies
['end'] = UnsupportedURLIE()
837 ie_names
= orderedSet_from_options(
838 self
.params
.get('allowed_extractors', ['default']), {
839 'all': list(all_ies
),
840 'default': [name
for name
, ie
in all_ies
.items() if ie
._ENABLED
],
842 except re
.error
as e
:
843 raise ValueError(f
'Wrong regex for allowed_extractors: {e.pattern}')
844 for name
in ie_names
:
845 self
.add_info_extractor(all_ies
[name
])
846 self
.write_debug(f
'Loaded {len(ie_names)} extractors')
848 def add_post_processor(self
, pp
, when
='post_process'):
849 """Add a PostProcessor object to the end of the chain."""
850 assert when
in POSTPROCESS_WHEN
, f
'Invalid when={when}'
851 self
._pps
[when
].append(pp
)
852 pp
.set_downloader(self
)
854 def add_post_hook(self
, ph
):
855 """Add the post hook"""
856 self
._post
_hooks
.append(ph
)
858 def add_progress_hook(self
, ph
):
859 """Add the download progress hook"""
860 self
._progress
_hooks
.append(ph
)
862 def add_postprocessor_hook(self
, ph
):
863 """Add the postprocessing progress hook"""
864 self
._postprocessor
_hooks
.append(ph
)
865 for pps
in self
._pps
.values():
867 pp
.add_progress_hook(ph
)
869 def _bidi_workaround(self
, message
):
870 if not hasattr(self
, '_output_channel'):
873 assert hasattr(self
, '_output_process')
874 assert isinstance(message
, str)
875 line_count
= message
.count('\n') + 1
876 self
._output
_process
.stdin
.write((message
+ '\n').encode())
877 self
._output
_process
.stdin
.flush()
878 res
= ''.join(self
._output
_channel
.readline().decode()
879 for _
in range(line_count
))
880 return res
[:-len('\n')]
882 def _write_string(self
, message
, out
=None, only_once
=False):
884 if message
in self
._printed
_messages
:
886 self
._printed
_messages
.add(message
)
887 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
889 def to_stdout(self
, message
, skip_eol
=False, quiet
=None):
890 """Print message to stdout"""
891 if quiet
is not None:
892 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
893 'Use "YoutubeDL.to_screen" instead')
894 if skip_eol
is not False:
895 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
896 'Use "YoutubeDL.to_screen" instead')
897 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.out
)
899 def to_screen(self
, message
, skip_eol
=False, quiet
=None, only_once
=False):
900 """Print message to screen if not in quiet mode"""
901 if self
.params
.get('logger'):
902 self
.params
['logger'].debug(message
)
904 if (self
.params
.get('quiet') if quiet
is None else quiet
) and not self
.params
.get('verbose'):
907 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
908 self
._out
_files
.screen
, only_once
=only_once
)
910 def to_stderr(self
, message
, only_once
=False):
911 """Print message to stderr"""
912 assert isinstance(message
, str)
913 if self
.params
.get('logger'):
914 self
.params
['logger'].error(message
)
916 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.error
, only_once
=only_once
)
918 def _send_console_code(self
, code
):
919 if compat_os_name
== 'nt' or not self
._out
_files
.console
:
921 self
._write
_string
(code
, self
._out
_files
.console
)
923 def to_console_title(self
, message
):
924 if not self
.params
.get('consoletitle', False):
926 message
= remove_terminal_sequences(message
)
927 if compat_os_name
== 'nt':
928 if ctypes
.windll
.kernel32
.GetConsoleWindow():
929 # c_wchar_p() might not be necessary if `message` is
930 # already of type unicode()
931 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
933 self
._send
_console
_code
(f
'\033]0;{message}\007')
935 def save_console_title(self
):
936 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
938 self
._send
_console
_code
('\033[22;0t') # Save the title on stack
940 def restore_console_title(self
):
941 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
943 self
._send
_console
_code
('\033[23;0t') # Restore the title from stack
946 self
.save_console_title()
949 def save_cookies(self
):
950 if self
.params
.get('cookiefile') is not None:
951 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
953 def __exit__(self
, *args
):
954 self
.restore_console_title()
959 self
._request
_director
.close()
961 def trouble(self
, message
=None, tb
=None, is_error
=True):
962 """Determine action to take when a download problem appears.
964 Depending on if the downloader has been configured to ignore
965 download errors or not, this method may throw an exception or
966 not when errors are found, after printing the message.
968 @param tb If given, is additional traceback information
969 @param is_error Whether to raise error according to ignorerrors
971 if message
is not None:
972 self
.to_stderr(message
)
973 if self
.params
.get('verbose'):
975 if sys
.exc_info()[0]: # if .trouble has been called from an except block
977 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
978 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
979 tb
+= encode_compat_str(traceback
.format_exc())
981 tb_data
= traceback
.format_list(traceback
.extract_stack())
982 tb
= ''.join(tb_data
)
987 if not self
.params
.get('ignoreerrors'):
988 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
989 exc_info
= sys
.exc_info()[1].exc_info
991 exc_info
= sys
.exc_info()
992 raise DownloadError(message
, exc_info
)
993 self
._download
_retcode
= 1
997 EMPHASIS
='light blue',
1002 BAD_FORMAT
='light red',
1004 SUPPRESS
='light black',
1007 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
1010 original_text
= text
1011 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1012 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', None) or 'ascii'
1013 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
1014 if fallback
is not None and text
!= original_text
:
1016 return format_text(text
, f
) if allow_colors
is True else text
if fallback
is None else fallback
1018 def _format_out(self
, *args
, **kwargs
):
1019 return self
._format
_text
(self
._out
_files
.out
, self
._allow
_colors
.out
, *args
, **kwargs
)
1021 def _format_screen(self
, *args
, **kwargs
):
1022 return self
._format
_text
(self
._out
_files
.screen
, self
._allow
_colors
.screen
, *args
, **kwargs
)
1024 def _format_err(self
, *args
, **kwargs
):
1025 return self
._format
_text
(self
._out
_files
.error
, self
._allow
_colors
.error
, *args
, **kwargs
)
1027 def report_warning(self
, message
, only_once
=False):
1029 Print the message to stderr, it will be prefixed with 'WARNING:'
1030 If stderr is a tty file the 'WARNING:' will be colored
1032 if self
.params
.get('logger') is not None:
1033 self
.params
['logger'].warning(message
)
1035 if self
.params
.get('no_warnings'):
1037 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
1039 def deprecation_warning(self
, message
, *, stacklevel
=0):
1040 deprecation_warning(
1041 message
, stacklevel
=stacklevel
+ 1, printer
=self
.report_error
, is_error
=False)
1043 def deprecated_feature(self
, message
):
1044 if self
.params
.get('logger') is not None:
1045 self
.params
['logger'].warning(f
'Deprecated Feature: {message}')
1046 self
.to_stderr(f
'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1048 def report_error(self
, message
, *args
, **kwargs
):
1050 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1051 in red if stderr is a tty file.
1053 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
1055 def write_debug(self
, message
, only_once
=False):
1056 '''Log debug message or Print message to stderr'''
1057 if not self
.params
.get('verbose', False):
1059 message
= f
'[debug] {message}'
1060 if self
.params
.get('logger'):
1061 self
.params
['logger'].debug(message
)
1063 self
.to_stderr(message
, only_once
)
1065 def report_file_already_downloaded(self
, file_name
):
1066 """Report file has already been fully downloaded."""
1068 self
.to_screen('[download] %s has already been downloaded' % file_name
)
1069 except UnicodeEncodeError:
1070 self
.to_screen('[download] The file has already been downloaded')
1072 def report_file_delete(self
, file_name
):
1073 """Report that existing file will be deleted."""
1075 self
.to_screen('Deleting existing file %s' % file_name
)
1076 except UnicodeEncodeError:
1077 self
.to_screen('Deleting existing file')
1079 def raise_no_formats(self
, info
, forced
=False, *, msg
=None):
1080 has_drm
= info
.get('_has_drm')
1081 ignored
, expected
= self
.params
.get('ignore_no_formats_error'), bool(msg
)
1082 msg
= msg
or has_drm
and 'This video is DRM protected' or 'No video formats found!'
1083 if forced
or not ignored
:
1084 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
1085 expected
=has_drm
or ignored
or expected
)
1087 self
.report_warning(msg
)
1089 def parse_outtmpl(self
):
1090 self
.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1091 self
._parse
_outtmpl
()
1092 return self
.params
['outtmpl']
1094 def _parse_outtmpl(self
):
1096 if self
.params
.get('restrictfilenames'): # Remove spaces in the default template
1097 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
1099 outtmpl
= self
.params
.setdefault('outtmpl', {})
1100 if not isinstance(outtmpl
, dict):
1101 self
.params
['outtmpl'] = outtmpl
= {'default': outtmpl}
1102 outtmpl
.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}
)
1104 def get_output_path(self
, dir_type
='', filename
=None):
1105 paths
= self
.params
.get('paths', {})
1106 assert isinstance(paths
, dict), '"paths" parameter must be a dictionary'
1107 path
= os
.path
.join(
1108 expand_path(paths
.get('home', '').strip()),
1109 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
1111 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1114 def _outtmpl_expandpath(outtmpl
):
1115 # expand_path translates '%%' into '%' and '$$' into '$'
1116 # correspondingly that is not what we want since we need to keep
1117 # '%%' intact for template dict substitution step. Working around
1118 # with boundary-alike separator hack.
1119 sep
= ''.join(random
.choices(string
.ascii_letters
, k
=32))
1120 outtmpl
= outtmpl
.replace('%%', f
'%{sep}%').replace('$$', f
'${sep}$')
1122 # outtmpl should be expand_path'ed before template dict substitution
1123 # because meta fields may contain env variables we don't want to
1124 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1125 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1126 return expand_path(outtmpl
).replace(sep
, '')
1129 def escape_outtmpl(outtmpl
):
1130 ''' Escape any remaining strings like %s, %abc% etc. '''
1132 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1133 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1137 def validate_outtmpl(cls
, outtmpl
):
1138 ''' @return None or Exception object '''
1140 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljhqBUDS]'),
1141 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1142 cls
._outtmpl
_expandpath
(outtmpl
))
1144 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1146 except ValueError as err
:
1150 def _copy_infodict(info_dict
):
1151 info_dict
= dict(info_dict
)
1152 info_dict
.pop('__postprocessors', None)
1153 info_dict
.pop('__pending_error', None)
1156 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1157 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1158 @param sanitize Whether to sanitize the output as a filename.
1159 For backward compatibility, a function can also be passed
1162 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1164 info_dict
= self
._copy
_infodict
(info_dict
)
1165 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1166 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1167 if info_dict
.get('duration', None) is not None
1169 info_dict
['autonumber'] = int(self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
)
1170 info_dict
['video_autonumber'] = self
._num
_videos
1171 if info_dict
.get('resolution') is None:
1172 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1174 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1175 # of %(field)s to %(field)0Nd for backward compatibility
1176 field_size_compat_map
= {
1177 'playlist_index': number_of_digits(info_dict
.get('__last_playlist_index') or 0),
1178 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1179 'autonumber': self
.params
.get('autonumber_size') or 5,
1183 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1188 # Field is of the form key1.key2...
1189 # where keys (except first) can be string, int, slice or "{field, ...}"
1190 FIELD_INNER_RE
= r
'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1191 FIELD_RE
= r
'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1192 'inner': FIELD_INNER_RE
,
1193 'field': rf
'\w*(?:\.{FIELD_INNER_RE})*'
1195 MATH_FIELD_RE
= rf
'(?:{FIELD_RE}|-?{NUMBER_RE})'
1196 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
1197 INTERNAL_FORMAT_RE
= re
.compile(rf
'''(?xs)
1199 (?P<fields>{FIELD_RE})
1200 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1201 (?:>(?P<strf_format>.+?))?
1203 (?P<alternate>(?<!\\),[^|&)]+)?
1204 (?:&(?P<replacement>.*?))?
1205 (?:\|(?P<default>.*?))?
1208 def _traverse_infodict(fields
):
1209 fields
= [f
for x
in re
.split(r
'\.({.+?})\.?', fields
)
1210 for f
in ([x
] if x
.startswith('{') else x
.split('.'))]
1212 if fields
and not fields
[i
]:
1215 for i
, f
in enumerate(fields
):
1216 if not f
.startswith('{'):
1218 assert f
.endswith('}'), f
'No closing brace for {f} in {fields}'
1219 fields
[i
] = {k: k.split('.') for k in f[1:-1].split(',')}
1221 return traverse_obj(info_dict
, fields
, is_user_input
=True, traverse_string
=True)
1223 def get_value(mdict
):
1225 value
= _traverse_infodict(mdict
['fields'])
1228 value
= float_or_none(value
)
1229 if value
is not None:
1232 offset_key
= mdict
['maths']
1234 value
= float_or_none(value
)
1238 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1239 offset_key
).group(0)
1240 offset_key
= offset_key
[len(item
):]
1241 if operator
is None:
1242 operator
= MATH_FUNCTIONS
[item
]
1244 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1245 offset
= float_or_none(item
)
1247 offset
= float_or_none(_traverse_infodict(item
))
1249 value
= operator(value
, multiplier
* offset
)
1250 except (TypeError, ZeroDivisionError):
1253 # Datetime formatting
1254 if mdict
['strf_format']:
1255 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1257 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1258 if sanitize
and value
== '':
1262 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1264 def filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames')):
1265 return sanitize_filename(str(value
), restricted
=restricted
, is_id
=(
1266 bool(re
.search(r
'(^|[_.])id(\.|$)', key
))
1267 if 'filename-sanitization' in self
.params
['compat_opts']
1270 sanitizer
= sanitize
if callable(sanitize
) else filename_sanitizer
1271 sanitize
= bool(sanitize
)
1273 def _dumpjson_default(obj
):
1274 if isinstance(obj
, (set, LazyList
)):
1278 class _ReplacementFormatter(string
.Formatter
):
1279 def get_field(self
, field_name
, args
, kwargs
):
1280 if field_name
.isdigit():
1282 raise ValueError('Unsupported field')
1284 replacement_formatter
= _ReplacementFormatter()
1286 def create_key(outer_mobj
):
1287 if not outer_mobj
.group('has_key'):
1288 return outer_mobj
.group(0)
1289 key
= outer_mobj
.group('key')
1290 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1291 value
, replacement
, default
, last_field
= None, None, na
, ''
1293 mobj
= mobj
.groupdict()
1294 default
= mobj
['default'] if mobj
['default'] is not None else default
1295 value
= get_value(mobj
)
1296 last_field
, replacement
= mobj
['fields'], mobj
['replacement']
1297 if value
is None and mobj
['alternate']:
1298 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['remaining'][1:])
1302 fmt
= outer_mobj
.group('format')
1303 if fmt
== 's' and value
is not None and last_field
in field_size_compat_map
.keys():
1304 fmt
= f
'0{field_size_compat_map[last_field]:d}d'
1306 if None not in (value
, replacement
):
1308 value
= replacement_formatter
.format(replacement
, value
)
1310 value
, default
= None, na
1312 flags
= outer_mobj
.group('conversion') or ''
1313 str_fmt
= f
'{fmt[:-1]}s'
1315 value
, fmt
= default
, 's'
1316 elif fmt
[-1] == 'l': # list
1317 delim
= '\n' if '#' in flags
else ', '
1318 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes)))), str_fmt
1319 elif fmt
[-1] == 'j': # json
1320 value
, fmt
= json
.dumps(
1321 value
, default
=_dumpjson_default
,
1322 indent
=4 if '#' in flags
else None, ensure_ascii
='+' not in flags
), str_fmt
1323 elif fmt
[-1] == 'h': # html
1324 value
, fmt
= escapeHTML(str(value
)), str_fmt
1325 elif fmt
[-1] == 'q': # quoted
1326 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1327 value
, fmt
= ' '.join(map(compat_shlex_quote
, value
)), str_fmt
1328 elif fmt
[-1] == 'B': # bytes
1329 value
= f
'%{str_fmt}'.encode() % str(value
).encode()
1330 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1331 elif fmt
[-1] == 'U': # unicode normalized
1332 value
, fmt
= unicodedata
.normalize(
1333 # "+" = compatibility equivalence, "#" = NFD
1334 'NF%s%s' % ('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1336 elif fmt
[-1] == 'D': # decimal suffix
1337 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1338 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1339 factor
=1024 if '#' in flags
else 1000)
1340 elif fmt
[-1] == 'S': # filename sanitization
1341 value
, fmt
= filename_sanitizer(last_field
, value
, restricted
='#' in flags
), str_fmt
1342 elif fmt
[-1] == 'c':
1344 value
= str(value
)[0]
1347 elif fmt
[-1] not in 'rsa': # numeric
1348 value
= float_or_none(value
)
1350 value
, fmt
= default
, 's'
1353 # If value is an object, sanitize might convert it to a string
1354 # So we convert it to repr first
1356 value
, fmt
= repr(value
), str_fmt
1357 elif fmt
[-1] == 'a':
1358 value
, fmt
= ascii(value
), str_fmt
1359 if fmt
[-1] in 'csra':
1360 value
= sanitizer(last_field
, value
)
1362 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1363 TMPL_DICT
[key
] = value
1364 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1366 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1368 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1369 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1370 return self
.escape_outtmpl(outtmpl
) % info_dict
1372 def _prepare_filename(self
, info_dict
, *, outtmpl
=None, tmpl_type
=None):
1373 assert None in (outtmpl
, tmpl_type
), 'outtmpl and tmpl_type are mutually exclusive'
1375 outtmpl
= self
.params
['outtmpl'].get(tmpl_type
or 'default', self
.params
['outtmpl']['default'])
1377 outtmpl
= self
._outtmpl
_expandpath
(outtmpl
)
1378 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1382 if tmpl_type
in ('', 'temp'):
1383 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1384 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1385 filename
= replace_extension(filename
, ext
, final_ext
)
1387 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1389 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1391 # https://github.com/blackjack4494/youtube-dlc/issues/85
1392 trim_file_name
= self
.params
.get('trim_file_name', False)
1394 no_ext
, *ext
= filename
.rsplit('.', 2)
1395 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1398 except ValueError as err
:
1399 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1402 def prepare_filename(self
, info_dict
, dir_type
='', *, outtmpl
=None, warn
=False):
1403 """Generate the output filename"""
1405 assert not dir_type
, 'outtmpl and dir_type are mutually exclusive'
1407 filename
= self
._prepare
_filename
(info_dict
, tmpl_type
=dir_type
, outtmpl
=outtmpl
)
1408 if not filename
and dir_type
not in ('', 'temp'):
1412 if not self
.params
.get('paths'):
1414 elif filename
== '-':
1415 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1416 elif os
.path
.isabs(filename
):
1417 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1418 if filename
== '-' or not filename
:
1421 return self
.get_output_path(dir_type
, filename
)
1423 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1424 """Returns None if the file should be downloaded"""
1425 _type
= 'video' if 'playlist-match-filter' in self
.params
['compat_opts'] else info_dict
.get('_type', 'video')
1426 assert incomplete
or _type
== 'video', 'Only video result can be considered complete'
1428 video_title
= info_dict
.get('title', info_dict
.get('id', 'entry'))
1431 if _type
in ('playlist', 'multi_video'):
1433 elif _type
in ('url', 'url_transparent') and not try_call(
1434 lambda: self
.get_info_extractor(info_dict
['ie_key']).is_single_video(info_dict
['url'])):
1437 if 'title' in info_dict
:
1438 # This can happen when we're just evaluating the playlist
1439 title
= info_dict
['title']
1440 matchtitle
= self
.params
.get('matchtitle', False)
1442 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1443 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1444 rejecttitle
= self
.params
.get('rejecttitle', False)
1446 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1447 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1449 date
= info_dict
.get('upload_date')
1450 if date
is not None:
1451 dateRange
= self
.params
.get('daterange', DateRange())
1452 if date
not in dateRange
:
1453 return f
'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1454 view_count
= info_dict
.get('view_count')
1455 if view_count
is not None:
1456 min_views
= self
.params
.get('min_views')
1457 if min_views
is not None and view_count
< min_views
:
1458 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1459 max_views
= self
.params
.get('max_views')
1460 if max_views
is not None and view_count
> max_views
:
1461 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1462 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1463 return 'Skipping "%s" because it is age restricted' % video_title
1465 match_filter
= self
.params
.get('match_filter')
1466 if match_filter
is None:
1472 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1474 # For backward compatibility
1475 ret
= None if incomplete
else match_filter(info_dict
)
1476 except DownloadCancelled
as err
:
1477 if err
.msg
is not NO_DEFAULT
:
1479 ret
, cancelled
= err
.msg
, err
1481 if ret
is NO_DEFAULT
:
1483 filename
= self
._format
_screen
(self
.prepare_filename(info_dict
), self
.Styles
.FILENAME
)
1484 reply
= input(self
._format
_screen
(
1485 f
'Download "{filename}"? (Y/n): ', self
.Styles
.EMPHASIS
)).lower().strip()
1486 if reply
in {'y', ''}
:
1490 raise type(cancelled
)(f
'Skipping {video_title}')
1491 return f
'Skipping {video_title}'
1494 if self
.in_download_archive(info_dict
):
1496 format_field(info_dict
, 'id', f
'{self._format_screen("%s", self.Styles.ID)}: '),
1497 format_field(info_dict
, 'title', f
'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1498 'has already been recorded in the archive'))
1499 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1502 reason
= check_filter()
1503 except DownloadCancelled
as e
:
1504 reason
, break_opt
, break_err
= e
.msg
, 'match_filter', type(e
)
1506 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1507 if reason
is not None:
1509 self
.to_screen('[download] ' + reason
)
1510 if self
.params
.get(break_opt
, False):
1515 def add_extra_info(info_dict
, extra_info
):
1516 '''Set the keys from extra_info in info dict if they are missing'''
1517 for key
, value
in extra_info
.items():
1518 info_dict
.setdefault(key
, value
)
1520 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1521 process
=True, force_generic_extractor
=False):
1523 Extract and return the information dictionary of the URL
1526 @param url URL to extract
1529 @param download Whether to download videos
1530 @param process Whether to resolve all unresolved references (URLs, playlist items).
1531 Must be True for download to work
1532 @param ie_key Use only the extractor with this key
1534 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1535 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1538 if extra_info
is None:
1541 if not ie_key
and force_generic_extractor
:
1545 ies
= {ie_key: self._ies[ie_key]}
if ie_key
in self
._ies
else {}
1549 for key
, ie
in ies
.items():
1550 if not ie
.suitable(url
):
1553 if not ie
.working():
1554 self
.report_warning('The program functionality for this site has been marked as broken, '
1555 'and will probably not work.')
1557 temp_id
= ie
.get_temp_id(url
)
1558 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': key}
):
1559 self
.to_screen(f
'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1560 'has already been recorded in the archive')
1561 if self
.params
.get('break_on_existing', False):
1562 raise ExistingVideoReached()
1564 return self
.__extract
_info
(url
, self
.get_info_extractor(key
), download
, extra_info
, process
)
1566 extractors_restricted
= self
.params
.get('allowed_extractors') not in (None, ['default'])
1567 self
.report_error(f
'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1568 tb
=False if extractors_restricted
else None)
1570 def _handle_extraction_exceptions(func
):
1571 @functools.wraps(func
)
1572 def wrapper(self
, *args
, **kwargs
):
1575 return func(self
, *args
, **kwargs
)
1576 except (DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1578 except ReExtractInfo
as e
:
1580 self
.to_screen(f
'{e}; Re-extracting data')
1582 self
.to_stderr('\r')
1583 self
.report_warning(f
'{e}; Re-extracting data')
1585 except GeoRestrictedError
as e
:
1588 msg
+= '\nThis video is available in %s.' % ', '.join(
1589 map(ISO3166Utils
.short2full
, e
.countries
))
1590 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1591 self
.report_error(msg
)
1592 except ExtractorError
as e
: # An error we somewhat expected
1593 self
.report_error(str(e
), e
.format_traceback())
1594 except Exception as e
:
1595 if self
.params
.get('ignoreerrors'):
1596 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1602 def _wait_for_video(self
, ie_result
={}):
1603 if (not self
.params
.get('wait_for_video')
1604 or ie_result
.get('_type', 'video') != 'video'
1605 or ie_result
.get('formats') or ie_result
.get('url')):
1608 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1613 full_msg
= f
'{msg}\n'
1614 if not self
.params
.get('noprogress'):
1615 full_msg
= msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r'
1618 self
.to_screen(full_msg
, skip_eol
=True)
1621 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1622 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1623 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1624 diff
= round(random
.uniform(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
), 0)
1625 self
.report_warning('Release time of video is not known')
1626 elif ie_result
and (diff
or 0) <= 0:
1627 self
.report_warning('Video should already be available according to extracted info')
1628 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1629 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1631 wait_till
= time
.time() + diff
1634 diff
= wait_till
- time
.time()
1637 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1638 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1640 except KeyboardInterrupt:
1642 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1643 except BaseException
as e
:
1644 if not isinstance(e
, ReExtractInfo
):
1648 def _load_cookies(self
, data
, *, from_headers
=True):
1649 """Loads cookies from a `Cookie` header
1651 This tries to work around the security vulnerability of passing cookies to every domain.
1652 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1653 The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1655 @param data The Cookie header as string to load the cookies from
1656 @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1658 for cookie
in LenientSimpleCookie(data
).values():
1659 if from_headers
and any(cookie
.values()):
1660 raise ValueError('Invalid syntax in Cookie Header')
1662 domain
= cookie
.get('domain') or ''
1663 expiry
= cookie
.get('expires')
1664 if expiry
== '': # 0 is valid
1666 prepared_cookie
= http
.cookiejar
.Cookie(
1667 cookie
.get('version') or 0, cookie
.key
, cookie
.value
, None, False,
1668 domain
, True, True, cookie
.get('path') or '', bool(cookie
.get('path')),
1669 cookie
.get('secure') or False, expiry
, False, None, None, {})
1672 self
.cookiejar
.set_cookie(prepared_cookie
)
1674 self
.deprecated_feature(
1675 'Passing cookies as a header is a potential security risk; '
1676 'they will be scoped to the domain of the downloaded urls. '
1677 'Please consider loading cookies from a file or browser instead.')
1678 self
.__header
_cookies
.append(prepared_cookie
)
1680 self
.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1681 tb
=False, is_error
=False)
1683 def _apply_header_cookies(self
, url
):
1684 """Applies stray header cookies to the provided url
1686 This loads header cookies and scopes them to the domain provided in `url`.
1687 While this is not ideal, it helps reduce the risk of them being sent
1688 to an unintended destination while mostly maintaining compatibility.
1690 parsed
= urllib
.parse
.urlparse(url
)
1691 if not parsed
.hostname
:
1694 for cookie
in map(copy
.copy
, self
.__header
_cookies
):
1695 cookie
.domain
= f
'.{parsed.hostname}'
1696 self
.cookiejar
.set_cookie(cookie
)
1698 @_handle_extraction_exceptions
1699 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1700 self
._apply
_header
_cookies
(url
)
1703 ie_result
= ie
.extract(url
)
1704 except UserNotLive
as e
:
1706 if self
.params
.get('wait_for_video'):
1707 self
.report_warning(e
)
1708 self
._wait
_for
_video
()
1710 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1711 self
.report_warning(f
'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1713 if isinstance(ie_result
, list):
1714 # Backwards compatibility: old IE result format
1716 '_type': 'compat_list',
1717 'entries': ie_result
,
1719 if extra_info
.get('original_url'):
1720 ie_result
.setdefault('original_url', extra_info
['original_url'])
1721 self
.add_default_extra_info(ie_result
, ie
, url
)
1723 self
._wait
_for
_video
(ie_result
)
1724 return self
.process_ie_result(ie_result
, download
, extra_info
)
1728 def add_default_extra_info(self
, ie_result
, ie
, url
):
1730 self
.add_extra_info(ie_result
, {
1732 'original_url': url
,
1734 webpage_url
= ie_result
.get('webpage_url')
1736 self
.add_extra_info(ie_result
, {
1737 'webpage_url_basename': url_basename(webpage_url
),
1738 'webpage_url_domain': get_domain(webpage_url
),
1741 self
.add_extra_info(ie_result
, {
1742 'extractor': ie
.IE_NAME
,
1743 'extractor_key': ie
.ie_key(),
1746 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1748 Take the result of the ie(may be modified) and resolve all unresolved
1749 references (URLs, playlist items).
1751 It will also download the videos if 'download'.
1752 Returns the resolved ie_result.
1754 if extra_info
is None:
1756 result_type
= ie_result
.get('_type', 'video')
1758 if result_type
in ('url', 'url_transparent'):
1759 ie_result
['url'] = sanitize_url(
1760 ie_result
['url'], scheme
='http' if self
.params
.get('prefer_insecure') else 'https')
1761 if ie_result
.get('original_url') and not extra_info
.get('original_url'):
1762 extra_info
= {'original_url': ie_result['original_url'], **extra_info}
1764 extract_flat
= self
.params
.get('extract_flat', False)
1765 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1766 or extract_flat
is True):
1767 info_copy
= ie_result
.copy()
1768 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1769 if ie
and not ie_result
.get('id'):
1770 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1771 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1772 self
.add_extra_info(info_copy
, extra_info
)
1773 info_copy
, _
= self
.pre_process(info_copy
)
1774 self
._fill
_common
_fields
(info_copy
, False)
1775 self
.__forced
_printings
(info_copy
)
1776 self
._raise
_pending
_errors
(info_copy
)
1777 if self
.params
.get('force_write_download_archive', False):
1778 self
.record_download_archive(info_copy
)
1781 if result_type
== 'video':
1782 self
.add_extra_info(ie_result
, extra_info
)
1783 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1784 self
._raise
_pending
_errors
(ie_result
)
1785 additional_urls
= (ie_result
or {}).get('additional_urls')
1787 # TODO: Improve MetadataParserPP to allow setting a list
1788 if isinstance(additional_urls
, str):
1789 additional_urls
= [additional_urls
]
1791 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1792 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1793 ie_result
['additional_entries'] = [
1795 url
, download
, extra_info
=extra_info
,
1796 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1797 for url
in additional_urls
1800 elif result_type
== 'url':
1801 # We have to add extra_info to the results because it may be
1802 # contained in a playlist
1803 return self
.extract_info(
1804 ie_result
['url'], download
,
1805 ie_key
=ie_result
.get('ie_key'),
1806 extra_info
=extra_info
)
1807 elif result_type
== 'url_transparent':
1808 # Use the information from the embedding page
1809 info
= self
.extract_info(
1810 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1811 extra_info
=extra_info
, download
=False, process
=False)
1813 # extract_info may return None when ignoreerrors is enabled and
1814 # extraction failed with an error, don't crash and return early
1819 exempted_fields
= {'_type', 'url', 'ie_key'}
1820 if not ie_result
.get('section_end') and ie_result
.get('section_start') is None:
1821 # For video clips, the id etc of the clip extractor should be used
1822 exempted_fields |
= {'id', 'extractor', 'extractor_key'}
1824 new_result
= info
.copy()
1825 new_result
.update(filter_dict(ie_result
, lambda k
, v
: v
is not None and k
not in exempted_fields
))
1827 # Extracted info may not be a video result (i.e.
1828 # info.get('_type', 'video') != video) but rather an url or
1829 # url_transparent. In such cases outer metadata (from ie_result)
1830 # should be propagated to inner one (info). For this to happen
1831 # _type of info should be overridden with url_transparent. This
1832 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1833 if new_result
.get('_type') == 'url':
1834 new_result
['_type'] = 'url_transparent'
1836 return self
.process_ie_result(
1837 new_result
, download
=download
, extra_info
=extra_info
)
1838 elif result_type
in ('playlist', 'multi_video'):
1839 # Protect from infinite recursion due to recursively nested playlists
1840 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1841 webpage_url
= ie_result
.get('webpage_url') # Playlists maynot have webpage_url
1842 if webpage_url
and webpage_url
in self
._playlist
_urls
:
1844 '[download] Skipping already downloaded playlist: %s'
1845 % ie_result
.get('title') or ie_result
.get('id'))
1848 self
._playlist
_level
+= 1
1849 self
._playlist
_urls
.add(webpage_url
)
1850 self
._fill
_common
_fields
(ie_result
, False)
1851 self
._sanitize
_thumbnails
(ie_result
)
1853 return self
.__process
_playlist
(ie_result
, download
)
1855 self
._playlist
_level
-= 1
1856 if not self
._playlist
_level
:
1857 self
._playlist
_urls
.clear()
1858 elif result_type
== 'compat_list':
1859 self
.report_warning(
1860 'Extractor %s returned a compat_list result. '
1861 'It needs to be updated.' % ie_result
.get('extractor'))
1864 self
.add_extra_info(r
, {
1865 'extractor': ie_result
['extractor'],
1866 'webpage_url': ie_result
['webpage_url'],
1867 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1868 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1869 'extractor_key': ie_result
['extractor_key'],
1872 ie_result
['entries'] = [
1873 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1874 for r
in ie_result
['entries']
1878 raise Exception('Invalid result type: %s' % result_type
)
1880 def _ensure_dir_exists(self
, path
):
1881 return make_dir(path
, self
.report_error
)
1884 def _playlist_infodict(ie_result
, strict
=False, **kwargs
):
1886 'playlist_count': ie_result
.get('playlist_count'),
1887 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1888 'playlist_id': ie_result
.get('id'),
1889 'playlist_title': ie_result
.get('title'),
1890 'playlist_uploader': ie_result
.get('uploader'),
1891 'playlist_uploader_id': ie_result
.get('uploader_id'),
1896 if ie_result
.get('webpage_url'):
1898 'webpage_url': ie_result
['webpage_url'],
1899 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1900 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1904 'playlist_index': 0,
1905 '__last_playlist_index': max(ie_result
.get('requested_entries') or (0, 0)),
1906 'extractor': ie_result
['extractor'],
1907 'extractor_key': ie_result
['extractor_key'],
1910 def __process_playlist(self
, ie_result
, download
):
1911 """Process each entry in the playlist"""
1912 assert ie_result
['_type'] in ('playlist', 'multi_video')
1914 common_info
= self
._playlist
_infodict
(ie_result
, strict
=True)
1915 title
= common_info
.get('playlist') or '<Untitled>'
1916 if self
._match
_entry
(common_info
, incomplete
=True) is not None:
1918 self
.to_screen(f
'[download] Downloading {ie_result["_type"]}: {title}')
1920 all_entries
= PlaylistEntries(self
, ie_result
)
1921 entries
= orderedSet(all_entries
.get_requested_items(), lazy
=True)
1923 lazy
= self
.params
.get('lazy_playlist')
1925 resolved_entries
, n_entries
= [], 'N/A'
1926 ie_result
['requested_entries'], ie_result
['entries'] = None, None
1928 entries
= resolved_entries
= list(entries
)
1929 n_entries
= len(resolved_entries
)
1930 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
1931 if not ie_result
.get('playlist_count'):
1932 # Better to do this after potentially exhausting entries
1933 ie_result
['playlist_count'] = all_entries
.get_full_count()
1935 extra
= self
._playlist
_infodict
(ie_result
, n_entries
=int_or_none(n_entries
))
1936 ie_copy
= collections
.ChainMap(ie_result
, extra
)
1938 _infojson_written
= False
1939 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
1940 if write_playlist_files
and self
.params
.get('list_thumbnails'):
1941 self
.list_thumbnails(ie_result
)
1942 if write_playlist_files
and not self
.params
.get('simulate'):
1943 _infojson_written
= self
._write
_info
_json
(
1944 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
1945 if _infojson_written
is None:
1947 if self
._write
_description
('playlist', ie_result
,
1948 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1950 # TODO: This should be passed to ThumbnailsConvertor if necessary
1951 self
._write
_thumbnails
('playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1954 if self
.params
.get('playlistreverse') or self
.params
.get('playlistrandom'):
1955 self
.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once
=True)
1956 elif self
.params
.get('playlistreverse'):
1958 elif self
.params
.get('playlistrandom'):
1959 random
.shuffle(entries
)
1961 self
.to_screen(f
'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1962 f
'{format_field(ie_result, "playlist_count", " of %s")}')
1964 keep_resolved_entries
= self
.params
.get('extract_flat') != 'discard'
1965 if self
.params
.get('extract_flat') == 'discard_in_playlist':
1966 keep_resolved_entries
= ie_result
['_type'] != 'playlist'
1967 if keep_resolved_entries
:
1968 self
.write_debug('The information of all playlist entries will be held in memory')
1971 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1972 for i
, (playlist_index
, entry
) in enumerate(entries
):
1974 resolved_entries
.append((playlist_index
, entry
))
1978 entry
['__x_forwarded_for_ip'] = ie_result
.get('__x_forwarded_for_ip')
1979 if not lazy
and 'playlist-index' in self
.params
['compat_opts']:
1980 playlist_index
= ie_result
['requested_entries'][i
]
1982 entry_copy
= collections
.ChainMap(entry
, {
1984 'n_entries': int_or_none(n_entries
),
1985 'playlist_index': playlist_index
,
1986 'playlist_autonumber': i
+ 1,
1989 if self
._match
_entry
(entry_copy
, incomplete
=True) is not None:
1990 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1991 resolved_entries
[i
] = (playlist_index
, NO_DEFAULT
)
1994 self
.to_screen('[download] Downloading item %s of %s' % (
1995 self
._format
_screen
(i
+ 1, self
.Styles
.ID
), self
._format
_screen
(n_entries
, self
.Styles
.EMPHASIS
)))
1997 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, collections
.ChainMap({
1998 'playlist_index': playlist_index
,
1999 'playlist_autonumber': i
+ 1,
2001 if not entry_result
:
2003 if failures
>= max_failures
:
2005 f
'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2007 if keep_resolved_entries
:
2008 resolved_entries
[i
] = (playlist_index
, entry_result
)
2010 # Update with processed data
2011 ie_result
['entries'] = [e
for _
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2012 ie_result
['requested_entries'] = [i
for i
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2013 if ie_result
['requested_entries'] == try_call(lambda: list(range(1, ie_result
['playlist_count'] + 1))):
2014 # Do not set for full playlist
2015 ie_result
.pop('requested_entries')
2017 # Write the updated info to json
2018 if _infojson_written
is True and self
._write
_info
_json
(
2019 'updated playlist', ie_result
,
2020 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
2023 ie_result
= self
.run_all_pps('playlist', ie_result
)
2024 self
.to_screen(f
'[download] Finished downloading playlist: {title}')
2027 @_handle_extraction_exceptions
2028 def __process_iterable_entry(self
, entry
, download
, extra_info
):
2029 return self
.process_ie_result(
2030 entry
, download
=download
, extra_info
=extra_info
)
2032 def _build_format_filter(self
, filter_spec
):
2033 " Returns a function to filter the formats according to the filter_spec "
2043 operator_rex
= re
.compile(r
'''(?x)\s*
2045 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2046 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2047 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
2048 m
= operator_rex
.fullmatch(filter_spec
)
2051 comparison_value
= int(m
.group('value'))
2053 comparison_value
= parse_filesize(m
.group('value'))
2054 if comparison_value
is None:
2055 comparison_value
= parse_filesize(m
.group('value') + 'B')
2056 if comparison_value
is None:
2058 'Invalid value %r in format specification %r' % (
2059 m
.group('value'), filter_spec
))
2060 op
= OPERATORS
[m
.group('op')]
2065 '^=': lambda attr
, value
: attr
.startswith(value
),
2066 '$=': lambda attr
, value
: attr
.endswith(value
),
2067 '*=': lambda attr
, value
: value
in attr
,
2068 '~=': lambda attr
, value
: value
.search(attr
) is not None
2070 str_operator_rex
= re
.compile(r
'''(?x)\s*
2071 (?P<key>[a-zA-Z0-9._-]+)\s*
2072 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2074 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2075 (?(quote)(?P=quote))\s*
2076 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
2077 m
= str_operator_rex
.fullmatch(filter_spec
)
2079 if m
.group('op') == '~=':
2080 comparison_value
= re
.compile(m
.group('value'))
2082 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
2083 str_op
= STR_OPERATORS
[m
.group('op')]
2084 if m
.group('negation'):
2085 op
= lambda attr
, value
: not str_op(attr
, value
)
2090 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
2093 actual_value
= f
.get(m
.group('key'))
2094 if actual_value
is None:
2095 return m
.group('none_inclusive')
2096 return op(actual_value
, comparison_value
)
2099 def _check_formats(self
, formats
):
2101 self
.to_screen('[info] Testing format %s' % f
['format_id'])
2102 path
= self
.get_output_path('temp')
2103 if not self
._ensure
_dir
_exists
(f
'{path}/'):
2105 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
2108 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
2109 except (DownloadError
, OSError, ValueError) + network_exceptions
:
2112 if os
.path
.exists(temp_file
.name
):
2114 os
.remove(temp_file
.name
)
2116 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
2120 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
2122 def _default_format_spec(self
, info_dict
, download
=True):
2125 merger
= FFmpegMergerPP(self
)
2126 return merger
.available
and merger
.can_merge()
2129 not self
.params
.get('simulate')
2133 or info_dict
.get('is_live') and not self
.params
.get('live_from_start')
2134 or self
.params
['outtmpl']['default'] == '-'))
2137 or self
.params
.get('allow_multiple_audio_streams', False)
2138 or 'format-spec' in self
.params
['compat_opts'])
2141 'best/bestvideo+bestaudio' if prefer_best
2142 else 'bestvideo*+bestaudio/best' if not compat
2143 else 'bestvideo+bestaudio/best')
2145 def build_format_selector(self
, format_spec
):
2146 def syntax_error(note
, start
):
2148 'Invalid format specification: '
2149 '{}\n\t{}\n\t{}^'.format(note
, format_spec
, ' ' * start
[1]))
2150 return SyntaxError(message
)
2152 PICKFIRST
= 'PICKFIRST'
2156 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2158 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
2159 'video': self
.params
.get('allow_multiple_video_streams', False)}
2161 def _parse_filter(tokens
):
2163 for type, string_
, start
, _
, _
in tokens
:
2164 if type == tokenize
.OP
and string_
== ']':
2165 return ''.join(filter_parts
)
2167 filter_parts
.append(string_
)
2169 def _remove_unused_ops(tokens
):
2170 # Remove operators that we don't use and join them with the surrounding strings.
2171 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2172 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
2173 last_string
, last_start
, last_end
, last_line
= None, None, None, None
2174 for type, string_
, start
, end
, line
in tokens
:
2175 if type == tokenize
.OP
and string_
== '[':
2177 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2179 yield type, string_
, start
, end
, line
2180 # everything inside brackets will be handled by _parse_filter
2181 for type, string_
, start
, end
, line
in tokens
:
2182 yield type, string_
, start
, end
, line
2183 if type == tokenize
.OP
and string_
== ']':
2185 elif type == tokenize
.OP
and string_
in ALLOWED_OPS
:
2187 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2189 yield type, string_
, start
, end
, line
2190 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
2192 last_string
= string_
2196 last_string
+= string_
2198 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2200 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
2202 current_selector
= None
2203 for type, string_
, start
, _
, _
in tokens
:
2204 # ENCODING is only defined in python 3.x
2205 if type == getattr(tokenize
, 'ENCODING', None):
2207 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
2208 current_selector
= FormatSelector(SINGLE
, string_
, [])
2209 elif type == tokenize
.OP
:
2211 if not inside_group
:
2212 # ')' will be handled by the parentheses group
2213 tokens
.restore_last_token()
2215 elif inside_merge
and string_
in ['/', ',']:
2216 tokens
.restore_last_token()
2218 elif inside_choice
and string_
== ',':
2219 tokens
.restore_last_token()
2221 elif string_
== ',':
2222 if not current_selector
:
2223 raise syntax_error('"," must follow a format selector', start
)
2224 selectors
.append(current_selector
)
2225 current_selector
= None
2226 elif string_
== '/':
2227 if not current_selector
:
2228 raise syntax_error('"/" must follow a format selector', start
)
2229 first_choice
= current_selector
2230 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
2231 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
2232 elif string_
== '[':
2233 if not current_selector
:
2234 current_selector
= FormatSelector(SINGLE
, 'best', [])
2235 format_filter
= _parse_filter(tokens
)
2236 current_selector
.filters
.append(format_filter
)
2237 elif string_
== '(':
2238 if current_selector
:
2239 raise syntax_error('Unexpected "("', start
)
2240 group
= _parse_format_selection(tokens
, inside_group
=True)
2241 current_selector
= FormatSelector(GROUP
, group
, [])
2242 elif string_
== '+':
2243 if not current_selector
:
2244 raise syntax_error('Unexpected "+"', start
)
2245 selector_1
= current_selector
2246 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2248 raise syntax_error('Expected a selector', start
)
2249 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2251 raise syntax_error(f
'Operator not recognized: "{string_}"', start
)
2252 elif type == tokenize
.ENDMARKER
:
2254 if current_selector
:
2255 selectors
.append(current_selector
)
2258 def _merge(formats_pair
):
2259 format_1
, format_2
= formats_pair
2262 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2263 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2265 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2266 get_no_more
= {'video': False, 'audio': False}
2267 for (i
, fmt_info
) in enumerate(formats_info
):
2268 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2271 for aud_vid
in ['audio', 'video']:
2272 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2273 if get_no_more
[aud_vid
]:
2276 get_no_more
[aud_vid
] = True
2278 if len(formats_info
) == 1:
2279 return formats_info
[0]
2281 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2282 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2284 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2285 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2287 output_ext
= get_compatible_ext(
2288 vcodecs
=[f
.get('vcodec') for f
in video_fmts
],
2289 acodecs
=[f
.get('acodec') for f
in audio_fmts
],
2290 vexts
=[f
['ext'] for f
in video_fmts
],
2291 aexts
=[f
['ext'] for f
in audio_fmts
],
2292 preferences
=(try_call(lambda: self
.params
['merge_output_format'].split('/'))
2293 or self
.params
.get('prefer_free_formats') and ('webm', 'mkv')))
2295 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2298 'requested_formats': formats_info
,
2299 'format': '+'.join(filtered('format')),
2300 'format_id': '+'.join(filtered('format_id')),
2302 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2303 'language': '+'.join(orderedSet(filtered('language'))) or None,
2304 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2305 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2306 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2311 'width': the_only_video
.get('width'),
2312 'height': the_only_video
.get('height'),
2313 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2314 'fps': the_only_video
.get('fps'),
2315 'dynamic_range': the_only_video
.get('dynamic_range'),
2316 'vcodec': the_only_video
.get('vcodec'),
2317 'vbr': the_only_video
.get('vbr'),
2318 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2319 'aspect_ratio': the_only_video
.get('aspect_ratio'),
2324 'acodec': the_only_audio
.get('acodec'),
2325 'abr': the_only_audio
.get('abr'),
2326 'asr': the_only_audio
.get('asr'),
2327 'audio_channels': the_only_audio
.get('audio_channels')
2332 def _check_formats(formats
):
2333 if (self
.params
.get('check_formats') is not None
2334 or self
.params
.get('allow_unplayable_formats')):
2337 elif self
.params
.get('check_formats') == 'selected':
2338 yield from self
._check
_formats
(formats
)
2342 if f
.get('has_drm'):
2343 yield from self
._check
_formats
([f
])
2347 def _build_selector_function(selector
):
2348 if isinstance(selector
, list): # ,
2349 fs
= [_build_selector_function(s
) for s
in selector
]
2351 def selector_function(ctx
):
2354 return selector_function
2356 elif selector
.type == GROUP
: # ()
2357 selector_function
= _build_selector_function(selector
.selector
)
2359 elif selector
.type == PICKFIRST
: # /
2360 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2362 def selector_function(ctx
):
2364 picked_formats
= list(f(ctx
))
2366 return picked_formats
2369 elif selector
.type == MERGE
: # +
2370 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2372 def selector_function(ctx
):
2373 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2376 elif selector
.type == SINGLE
: # atom
2377 format_spec
= selector
.selector
or 'best'
2379 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2380 if format_spec
== 'all':
2381 def selector_function(ctx
):
2382 yield from _check_formats(ctx
['formats'][::-1])
2383 elif format_spec
== 'mergeall':
2384 def selector_function(ctx
):
2385 formats
= list(_check_formats(
2386 f
for f
in ctx
['formats'] if f
.get('vcodec') != 'none' or f
.get('acodec') != 'none'))
2389 merged_format
= formats
[-1]
2390 for f
in formats
[-2::-1]:
2391 merged_format
= _merge((merged_format
, f
))
2395 format_fallback
, seperate_fallback
, format_reverse
, format_idx
= False, None, True, 1
2397 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2399 if mobj
is not None:
2400 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2401 format_reverse
= mobj
.group('bw')[0] == 'b'
2402 format_type
= (mobj
.group('type') or [None])[0]
2403 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
2404 format_modified
= mobj
.group('mod') is not None
2406 format_fallback
= not format_type
and not format_modified
# for b, w
2408 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
2409 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2410 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
2411 if format_type
# bv, ba, wv, wa
2412 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2413 if not format_modified
# b, w
2414 else lambda f
: True) # b*, w*
2415 filter_f
= lambda f
: _filter_f(f
) and (
2416 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2418 if format_spec
in self
._format
_selection
_exts
['audio']:
2419 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2420 elif format_spec
in self
._format
_selection
_exts
['video']:
2421 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2422 seperate_fallback
= lambda f
: f
.get('ext') == format_spec
and f
.get('vcodec') != 'none'
2423 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2424 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2426 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2428 def selector_function(ctx
):
2429 formats
= list(ctx
['formats'])
2430 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2432 if format_fallback
and ctx
['incomplete_formats']:
2433 # for extractors with incomplete formats (audio only (soundcloud)
2434 # or video only (imgur)) best/worst will fallback to
2435 # best/worst {video,audio}-only format
2437 elif seperate_fallback
and not ctx
['has_merged_format']:
2438 # for compatibility with youtube-dl when there is no pre-merged format
2439 matches
= list(filter(seperate_fallback
, formats
))
2440 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2442 yield matches
[format_idx
- 1]
2443 except LazyList
.IndexError:
2446 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2448 def final_selector(ctx
):
2449 ctx_copy
= dict(ctx
)
2450 for _filter
in filters
:
2451 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2452 return selector_function(ctx_copy
)
2453 return final_selector
2455 stream
= io
.BytesIO(format_spec
.encode())
2457 tokens
= list(_remove_unused_ops(tokenize
.tokenize(stream
.readline
)))
2458 except tokenize
.TokenError
:
2459 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2461 class TokenIterator
:
2462 def __init__(self
, tokens
):
2463 self
.tokens
= tokens
2470 if self
.counter
>= len(self
.tokens
):
2471 raise StopIteration()
2472 value
= self
.tokens
[self
.counter
]
2478 def restore_last_token(self
):
2481 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2482 return _build_selector_function(parsed_selector
)
2484 def _calc_headers(self
, info_dict
):
2485 res
= HTTPHeaderDict(self
.params
['http_headers'], info_dict
.get('http_headers'))
2487 cookies
= self
.cookiejar
.get_cookies_for_url(info_dict
['url'])
2489 encoder
= LenientSimpleCookie()
2491 for cookie
in cookies
:
2492 _
, value
= encoder
.value_encode(cookie
.value
)
2493 values
.append(f
'{cookie.name}={value}')
2495 values
.append(f
'Domain={cookie.domain}')
2497 values
.append(f
'Path={cookie.path}')
2499 values
.append('Secure')
2501 values
.append(f
'Expires={cookie.expires}')
2503 values
.append(f
'Version={cookie.version}')
2504 info_dict
['cookies'] = '; '.join(values
)
2506 if 'X-Forwarded-For' not in res
:
2507 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2508 if x_forwarded_for_ip
:
2509 res
['X-Forwarded-For'] = x_forwarded_for_ip
2513 def _calc_cookies(self
, url
):
2514 self
.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2515 return self
.cookiejar
.get_cookie_header(url
)
2517 def _sort_thumbnails(self
, thumbnails
):
2518 thumbnails
.sort(key
=lambda t
: (
2519 t
.get('preference') if t
.get('preference') is not None else -1,
2520 t
.get('width') if t
.get('width') is not None else -1,
2521 t
.get('height') if t
.get('height') is not None else -1,
2522 t
.get('id') if t
.get('id') is not None else '',
2525 def _sanitize_thumbnails(self
, info_dict
):
2526 thumbnails
= info_dict
.get('thumbnails')
2527 if thumbnails
is None:
2528 thumbnail
= info_dict
.get('thumbnail')
2530 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2534 def check_thumbnails(thumbnails
):
2535 for t
in thumbnails
:
2536 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2538 self
.urlopen(HEADRequest(t
['url']))
2539 except network_exceptions
as err
:
2540 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2544 self
._sort
_thumbnails
(thumbnails
)
2545 for i
, t
in enumerate(thumbnails
):
2546 if t
.get('id') is None:
2548 if t
.get('width') and t
.get('height'):
2549 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2550 t
['url'] = sanitize_url(t
['url'])
2552 if self
.params
.get('check_formats') is True:
2553 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2555 info_dict
['thumbnails'] = thumbnails
2557 def _fill_common_fields(self
, info_dict
, final
=True):
2558 # TODO: move sanitization here
2560 title
= info_dict
['fulltitle'] = info_dict
.get('title')
2563 self
.write_debug('Extractor gave empty title. Creating a generic title')
2565 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2566 info_dict
['title'] = f
'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2568 if info_dict
.get('duration') is not None:
2569 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2571 for ts_key
, date_key
in (
2572 ('timestamp', 'upload_date'),
2573 ('release_timestamp', 'release_date'),
2574 ('modified_timestamp', 'modified_date'),
2576 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2577 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2578 # see http://bugs.python.org/issue1646728)
2579 with contextlib
.suppress(ValueError, OverflowError, OSError):
2580 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2581 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2583 live_keys
= ('is_live', 'was_live')
2584 live_status
= info_dict
.get('live_status')
2585 if live_status
is None:
2586 for key
in live_keys
:
2587 if info_dict
.get(key
) is False:
2589 if info_dict
.get(key
):
2592 if all(info_dict
.get(key
) is False for key
in live_keys
):
2593 live_status
= 'not_live'
2595 info_dict
['live_status'] = live_status
2596 for key
in live_keys
:
2597 if info_dict
.get(key
) is None:
2598 info_dict
[key
] = (live_status
== key
)
2599 if live_status
== 'post_live':
2600 info_dict
['was_live'] = True
2602 # Auto generate title fields corresponding to the *_number fields when missing
2603 # in order to always have clean titles. This is very common for TV series.
2604 for field
in ('chapter', 'season', 'episode'):
2605 if final
and info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2606 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2608 def _raise_pending_errors(self
, info
):
2609 err
= info
.pop('__pending_error', None)
2611 self
.report_error(err
, tb
=False)
2613 def sort_formats(self
, info_dict
):
2614 formats
= self
._get
_formats
(info_dict
)
2615 formats
.sort(key
=FormatSorter(
2616 self
, info_dict
.get('_format_sort_fields') or []).calculate_preference
)
2618 def process_video_result(self
, info_dict
, download
=True):
2619 assert info_dict
.get('_type', 'video') == 'video'
2620 self
._num
_videos
+= 1
2622 if 'id' not in info_dict
:
2623 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2624 elif not info_dict
.get('id'):
2625 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2627 def report_force_conversion(field
, field_not
, conversion
):
2628 self
.report_warning(
2629 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2630 % (field
, field_not
, conversion
))
2632 def sanitize_string_field(info
, string_field
):
2633 field
= info
.get(string_field
)
2634 if field
is None or isinstance(field
, str):
2636 report_force_conversion(string_field
, 'a string', 'string')
2637 info
[string_field
] = str(field
)
2639 def sanitize_numeric_fields(info
):
2640 for numeric_field
in self
._NUMERIC
_FIELDS
:
2641 field
= info
.get(numeric_field
)
2642 if field
is None or isinstance(field
, (int, float)):
2644 report_force_conversion(numeric_field
, 'numeric', 'int')
2645 info
[numeric_field
] = int_or_none(field
)
2647 sanitize_string_field(info_dict
, 'id')
2648 sanitize_numeric_fields(info_dict
)
2649 if info_dict
.get('section_end') and info_dict
.get('section_start') is not None:
2650 info_dict
['duration'] = round(info_dict
['section_end'] - info_dict
['section_start'], 3)
2651 if (info_dict
.get('duration') or 0) <= 0 and info_dict
.pop('duration', None):
2652 self
.report_warning('"duration" field is negative, there is an error in extractor')
2654 chapters
= info_dict
.get('chapters') or []
2655 if chapters
and chapters
[0].get('start_time'):
2656 chapters
.insert(0, {'start_time': 0}
)
2658 dummy_chapter
= {'end_time': 0, 'start_time': info_dict.get('duration')}
2659 for idx
, (prev
, current
, next_
) in enumerate(zip(
2660 (dummy_chapter
, *chapters
), chapters
, (*chapters
[1:], dummy_chapter
)), 1):
2661 if current
.get('start_time') is None:
2662 current
['start_time'] = prev
.get('end_time')
2663 if not current
.get('end_time'):
2664 current
['end_time'] = next_
.get('start_time')
2665 if not current
.get('title'):
2666 current
['title'] = f
'<Untitled Chapter {idx}>'
2668 if 'playlist' not in info_dict
:
2669 # It isn't part of a playlist
2670 info_dict
['playlist'] = None
2671 info_dict
['playlist_index'] = None
2673 self
._sanitize
_thumbnails
(info_dict
)
2675 thumbnail
= info_dict
.get('thumbnail')
2676 thumbnails
= info_dict
.get('thumbnails')
2678 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2680 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2682 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2683 info_dict
['display_id'] = info_dict
['id']
2685 self
._fill
_common
_fields
(info_dict
)
2687 for cc_kind
in ('subtitles', 'automatic_captions'):
2688 cc
= info_dict
.get(cc_kind
)
2690 for _
, subtitle
in cc
.items():
2691 for subtitle_format
in subtitle
:
2692 if subtitle_format
.get('url'):
2693 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2694 if subtitle_format
.get('ext') is None:
2695 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2697 automatic_captions
= info_dict
.get('automatic_captions')
2698 subtitles
= info_dict
.get('subtitles')
2700 info_dict
['requested_subtitles'] = self
.process_subtitles(
2701 info_dict
['id'], subtitles
, automatic_captions
)
2703 formats
= self
._get
_formats
(info_dict
)
2705 # Backward compatibility with InfoExtractor._sort_formats
2706 field_preference
= (formats
or [{}])[0].pop('__sort_fields', None)
2707 if field_preference
:
2708 info_dict
['_format_sort_fields'] = field_preference
2710 info_dict
['_has_drm'] = any( # or None ensures --clean-infojson removes it
2711 f
.get('has_drm') and f
['has_drm'] != 'maybe' for f
in formats
) or None
2712 if not self
.params
.get('allow_unplayable_formats'):
2713 formats
= [f
for f
in formats
if not f
.get('has_drm') or f
['has_drm'] == 'maybe']
2715 if formats
and all(f
.get('acodec') == f
.get('vcodec') == 'none' for f
in formats
):
2716 self
.report_warning(
2717 f
'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2718 'only images are available for download. Use --list-formats to see them'.capitalize())
2720 get_from_start
= not info_dict
.get('is_live') or bool(self
.params
.get('live_from_start'))
2721 if not get_from_start
:
2722 info_dict
['title'] += ' ' + datetime
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2723 if info_dict
.get('is_live') and formats
:
2724 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2725 if get_from_start
and not formats
:
2726 self
.raise_no_formats(info_dict
, msg
=(
2727 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2728 'If you want to download from the current time, use --no-live-from-start'))
2730 def is_wellformed(f
):
2733 self
.report_warning(
2734 '"url" field is missing or empty - skipping format, '
2735 'there is an error in extractor')
2737 if isinstance(url
, bytes):
2738 sanitize_string_field(f
, 'url')
2741 # Filter out malformed formats for better extraction robustness
2742 formats
= list(filter(is_wellformed
, formats
or []))
2745 self
.raise_no_formats(info_dict
)
2747 for format
in formats
:
2748 sanitize_string_field(format
, 'format_id')
2749 sanitize_numeric_fields(format
)
2750 format
['url'] = sanitize_url(format
['url'])
2751 if format
.get('ext') is None:
2752 format
['ext'] = determine_ext(format
['url']).lower()
2753 if format
.get('protocol') is None:
2754 format
['protocol'] = determine_protocol(format
)
2755 if format
.get('resolution') is None:
2756 format
['resolution'] = self
.format_resolution(format
, default
=None)
2757 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2758 format
['dynamic_range'] = 'SDR'
2759 if format
.get('aspect_ratio') is None:
2760 format
['aspect_ratio'] = try_call(lambda: round(format
['width'] / format
['height'], 2))
2761 if (not format
.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2762 and info_dict
.get('duration') and format
.get('tbr')
2763 and not format
.get('filesize') and not format
.get('filesize_approx')):
2764 format
['filesize_approx'] = int(info_dict
['duration'] * format
['tbr'] * (1024 / 8))
2765 format
['http_headers'] = self
._calc
_headers
(collections
.ChainMap(format
, info_dict
))
2767 # This is copied to http_headers by the above _calc_headers and can now be removed
2768 if '__x_forwarded_for_ip' in info_dict
:
2769 del info_dict
['__x_forwarded_for_ip']
2773 '_format_sort_fields': info_dict
.get('_format_sort_fields')
2776 # Sanitize and group by format_id
2778 for i
, format
in enumerate(formats
):
2779 if not format
.get('format_id'):
2780 format
['format_id'] = str(i
)
2782 # Sanitize format_id from characters used in format selector expression
2783 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2784 formats_dict
.setdefault(format
['format_id'], []).append(format
)
2786 # Make sure all formats have unique format_id
2787 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2788 for format_id
, ambiguous_formats
in formats_dict
.items():
2789 ambigious_id
= len(ambiguous_formats
) > 1
2790 for i
, format
in enumerate(ambiguous_formats
):
2792 format
['format_id'] = '%s-%d' % (format_id
, i
)
2793 # Ensure there is no conflict between id and ext in format selection
2794 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2795 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2796 format
['format_id'] = 'f%s' % format
['format_id']
2798 if format
.get('format') is None:
2799 format
['format'] = '{id} - {res}{note}'.format(
2800 id=format
['format_id'],
2801 res
=self
.format_resolution(format
),
2802 note
=format_field(format
, 'format_note', ' (%s)'),
2805 if self
.params
.get('check_formats') is True:
2806 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2808 if not formats
or formats
[0] is not info_dict
:
2809 # only set the 'formats' fields if the original info_dict list them
2810 # otherwise we end up with a circular reference, the first (and unique)
2811 # element in the 'formats' field in info_dict is info_dict itself,
2812 # which can't be exported to json
2813 info_dict
['formats'] = formats
2815 info_dict
, _
= self
.pre_process(info_dict
)
2817 if self
._match
_entry
(info_dict
, incomplete
=self
._format
_fields
) is not None:
2820 self
.post_extract(info_dict
)
2821 info_dict
, _
= self
.pre_process(info_dict
, 'after_filter')
2823 # The pre-processors may have modified the formats
2824 formats
= self
._get
_formats
(info_dict
)
2826 list_only
= self
.params
.get('simulate') == 'list_only'
2827 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2828 if self
.params
.get('list_thumbnails'):
2829 self
.list_thumbnails(info_dict
)
2830 if self
.params
.get('listsubtitles'):
2831 if 'automatic_captions' in info_dict
:
2832 self
.list_subtitles(
2833 info_dict
['id'], automatic_captions
, 'automatic captions')
2834 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2835 if self
.params
.get('listformats') or interactive_format_selection
:
2836 self
.list_formats(info_dict
)
2838 # Without this printing, -F --print-json will not work
2839 self
.__forced
_printings
(info_dict
)
2842 format_selector
= self
.format_selector
2844 if interactive_format_selection
:
2845 req_format
= input(self
._format
_screen
('\nEnter format selector ', self
.Styles
.EMPHASIS
)
2846 + '(Press ENTER for default, or Ctrl+C to quit)'
2847 + self
._format
_screen
(': ', self
.Styles
.EMPHASIS
))
2849 format_selector
= self
.build_format_selector(req_format
) if req_format
else None
2850 except SyntaxError as err
:
2851 self
.report_error(err
, tb
=False, is_error
=False)
2854 if format_selector
is None:
2855 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2856 self
.write_debug(f
'Default format spec: {req_format}')
2857 format_selector
= self
.build_format_selector(req_format
)
2859 formats_to_download
= list(format_selector({
2861 'has_merged_format': any('none' not in (f
.get('acodec'), f
.get('vcodec')) for f
in formats
),
2862 'incomplete_formats': (all(f
.get('vcodec') == 'none' for f
in formats
) # No formats with video
2863 or all(f
.get('acodec') == 'none' for f
in formats
)), # OR, No formats with audio
2865 if interactive_format_selection
and not formats_to_download
:
2866 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2870 if not formats_to_download
:
2871 if not self
.params
.get('ignore_no_formats_error'):
2872 raise ExtractorError(
2873 'Requested format is not available. Use --list-formats for a list of available formats',
2874 expected
=True, video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2875 self
.report_warning('Requested format is not available')
2876 # Process what we can, even without any available formats.
2877 formats_to_download
= [{}]
2879 requested_ranges
= tuple(self
.params
.get('download_ranges', lambda *_
: [{}])(info_dict
, self
))
2880 best_format
, downloaded_formats
= formats_to_download
[-1], []
2882 if best_format
and requested_ranges
:
2883 def to_screen(*msg
):
2884 self
.to_screen(f
'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2886 to_screen(f
'Downloading {len(formats_to_download)} format(s):',
2887 (f
['format_id'] for f
in formats_to_download
))
2888 if requested_ranges
!= ({}, ):
2889 to_screen(f
'Downloading {len(requested_ranges)} time ranges:',
2890 (f
'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c
in requested_ranges
))
2891 max_downloads_reached
= False
2893 for fmt
, chapter
in itertools
.product(formats_to_download
, requested_ranges
):
2894 new_info
= self
._copy
_infodict
(info_dict
)
2895 new_info
.update(fmt
)
2896 offset
, duration
= info_dict
.get('section_start') or 0, info_dict
.get('duration') or float('inf')
2897 end_time
= offset
+ min(chapter
.get('end_time', duration
), duration
)
2898 # duration may not be accurate. So allow deviations <1sec
2899 if end_time
== float('inf') or end_time
> offset
+ duration
+ 1:
2901 if chapter
or offset
:
2903 'section_start': offset
+ chapter
.get('start_time', 0),
2904 'section_end': end_time
,
2905 'section_title': chapter
.get('title'),
2906 'section_number': chapter
.get('index'),
2908 downloaded_formats
.append(new_info
)
2910 self
.process_info(new_info
)
2911 except MaxDownloadsReached
:
2912 max_downloads_reached
= True
2913 self
._raise
_pending
_errors
(new_info
)
2914 # Remove copied info
2915 for key
, val
in tuple(new_info
.items()):
2916 if info_dict
.get(key
) == val
:
2918 if max_downloads_reached
:
2921 write_archive
= {f.get('__write_download_archive', False) for f in downloaded_formats}
2922 assert write_archive
.issubset({True, False, 'ignore'}
)
2923 if True in write_archive
and False not in write_archive
:
2924 self
.record_download_archive(info_dict
)
2926 info_dict
['requested_downloads'] = downloaded_formats
2927 info_dict
= self
.run_all_pps('after_video', info_dict
)
2928 if max_downloads_reached
:
2929 raise MaxDownloadsReached()
2931 # We update the info dict with the selected best quality format (backwards compatibility)
2932 info_dict
.update(best_format
)
2935 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2936 """Select the requested subtitles and their format"""
2937 available_subs
, normal_sub_langs
= {}, []
2938 if normal_subtitles
and self
.params
.get('writesubtitles'):
2939 available_subs
.update(normal_subtitles
)
2940 normal_sub_langs
= tuple(normal_subtitles
.keys())
2941 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2942 for lang
, cap_info
in automatic_captions
.items():
2943 if lang
not in available_subs
:
2944 available_subs
[lang
] = cap_info
2946 if not available_subs
or (
2947 not self
.params
.get('writesubtitles')
2948 and not self
.params
.get('writeautomaticsub')):
2951 all_sub_langs
= tuple(available_subs
.keys())
2952 if self
.params
.get('allsubtitles', False):
2953 requested_langs
= all_sub_langs
2954 elif self
.params
.get('subtitleslangs', False):
2956 requested_langs
= orderedSet_from_options(
2957 self
.params
.get('subtitleslangs'), {'all': all_sub_langs}
, use_regex
=True)
2958 except re
.error
as e
:
2959 raise ValueError(f
'Wrong regex for subtitlelangs: {e.pattern}')
2961 requested_langs
= LazyList(itertools
.chain(
2962 ['en'] if 'en' in normal_sub_langs
else [],
2963 filter(lambda f
: f
.startswith('en'), normal_sub_langs
),
2964 ['en'] if 'en' in all_sub_langs
else [],
2965 filter(lambda f
: f
.startswith('en'), all_sub_langs
),
2966 normal_sub_langs
, all_sub_langs
,
2969 self
.to_screen(f
'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2971 formats_query
= self
.params
.get('subtitlesformat', 'best')
2972 formats_preference
= formats_query
.split('/') if formats_query
else []
2974 for lang
in requested_langs
:
2975 formats
= available_subs
.get(lang
)
2977 self
.report_warning(f
'{lang} subtitles not available for {video_id}')
2979 for ext
in formats_preference
:
2983 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2989 self
.report_warning(
2990 'No subtitle format found matching "%s" for language %s, '
2991 'using %s' % (formats_query
, lang
, f
['ext']))
2995 def _forceprint(self
, key
, info_dict
):
2996 if info_dict
is None:
2998 info_copy
= info_dict
.copy()
2999 info_copy
.setdefault('filename', self
.prepare_filename(info_dict
))
3000 if info_dict
.get('requested_formats') is not None:
3001 # For RTMP URLs, also include the playpath
3002 info_copy
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
3003 elif info_dict
.get('url'):
3004 info_copy
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
3005 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
3006 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
3007 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
3008 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
3010 def format_tmpl(tmpl
):
3011 mobj
= re
.fullmatch(r
'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl
)
3016 if tmpl
.startswith('{'):
3017 tmpl
, fmt
= f
'.{tmpl}', '%({})j'
3018 if tmpl
.endswith('='):
3019 tmpl
, fmt
= tmpl
[:-1], '{0} = %({0})#j'
3020 return '\n'.join(map(fmt
.format
, [tmpl
] if mobj
.group('dict') else tmpl
.split(',')))
3022 for tmpl
in self
.params
['forceprint'].get(key
, []):
3023 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
3025 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
3026 filename
= self
.prepare_filename(info_dict
, outtmpl
=file_tmpl
)
3027 tmpl
= format_tmpl(tmpl
)
3028 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
3029 if self
._ensure
_dir
_exists
(filename
):
3030 with open(filename
, 'a', encoding
='utf-8', newline
='') as f
:
3031 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + os
.linesep
)
3035 def __forced_printings(self
, info_dict
, filename
=None, incomplete
=True):
3036 if (self
.params
.get('forcejson')
3037 or self
.params
['forceprint'].get('video')
3038 or self
.params
['print_to_file'].get('video')):
3039 self
.post_extract(info_dict
)
3041 info_dict
['filename'] = filename
3042 info_copy
= self
._forceprint
('video', info_dict
)
3044 def print_field(field
, actual_field
=None, optional
=False):
3045 if actual_field
is None:
3046 actual_field
= field
3047 if self
.params
.get(f
'force{field}') and (
3048 info_copy
.get(field
) is not None or (not optional
and not incomplete
)):
3049 self
.to_stdout(info_copy
[actual_field
])
3051 print_field('title')
3053 print_field('url', 'urls')
3054 print_field('thumbnail', optional
=True)
3055 print_field('description', optional
=True)
3056 print_field('filename')
3057 if self
.params
.get('forceduration') and info_copy
.get('duration') is not None:
3058 self
.to_stdout(formatSeconds(info_copy
['duration']))
3059 print_field('format')
3061 if self
.params
.get('forcejson'):
3062 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
3064 def dl(self
, name
, info
, subtitle
=False, test
=False):
3065 if not info
.get('url'):
3066 self
.raise_no_formats(info
, True)
3069 verbose
= self
.params
.get('verbose')
3072 'quiet': self
.params
.get('quiet') or not verbose
,
3074 'noprogress': not verbose
,
3076 'skip_unavailable_fragments': False,
3077 'keep_fragments': False,
3079 '_no_ytdl_file': True,
3082 params
= self
.params
3083 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
3085 for ph
in self
._progress
_hooks
:
3086 fd
.add_progress_hook(ph
)
3088 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
3089 for f
in info
.get('requested_formats', []) or [info
])
3090 self
.write_debug(f
'Invoking {fd.FD_NAME} downloader on "{urls}"')
3092 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3093 # But it may contain objects that are not deep-copyable
3094 new_info
= self
._copy
_infodict
(info
)
3095 if new_info
.get('http_headers') is None:
3096 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
3097 return fd
.download(name
, new_info
, subtitle
)
3099 def existing_file(self
, filepaths
, *, default_overwrite
=True):
3100 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
3101 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
3102 return existing_files
[0]
3104 for file in existing_files
:
3105 self
.report_file_delete(file)
3109 def process_info(self
, info_dict
):
3110 """Process a single resolved IE result. (Modifies it in-place)"""
3112 assert info_dict
.get('_type', 'video') == 'video'
3113 original_infodict
= info_dict
3115 if 'format' not in info_dict
and 'ext' in info_dict
:
3116 info_dict
['format'] = info_dict
['ext']
3118 if self
._match
_entry
(info_dict
) is not None:
3119 info_dict
['__write_download_archive'] = 'ignore'
3122 # Does nothing under normal operation - for backward compatibility of process_info
3123 self
.post_extract(info_dict
)
3125 def replace_info_dict(new_info
):
3127 if new_info
== info_dict
:
3130 info_dict
.update(new_info
)
3132 new_info
, _
= self
.pre_process(info_dict
, 'video')
3133 replace_info_dict(new_info
)
3134 self
._num
_downloads
+= 1
3136 # info_dict['_filename'] needs to be set for backward compatibility
3137 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
3138 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
3142 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
3144 def check_max_downloads():
3145 if self
._num
_downloads
>= float(self
.params
.get('max_downloads') or 'inf'):
3146 raise MaxDownloadsReached()
3148 if self
.params
.get('simulate'):
3149 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3150 check_max_downloads()
3153 if full_filename
is None:
3155 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
3157 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
3160 if self
._write
_description
('video', info_dict
,
3161 self
.prepare_filename(info_dict
, 'description')) is None:
3164 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
3165 if sub_files
is None:
3167 files_to_move
.update(dict(sub_files
))
3169 thumb_files
= self
._write
_thumbnails
(
3170 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
3171 if thumb_files
is None:
3173 files_to_move
.update(dict(thumb_files
))
3175 infofn
= self
.prepare_filename(info_dict
, 'infojson')
3176 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
3177 if _infojson_written
:
3178 info_dict
['infojson_filename'] = infofn
3179 # For backward compatibility, even though it was a private field
3180 info_dict
['__infojson_filename'] = infofn
3181 elif _infojson_written
is None:
3184 # Note: Annotations are deprecated
3186 if self
.params
.get('writeannotations', False):
3187 annofn
= self
.prepare_filename(info_dict
, 'annotation')
3189 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
3191 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
3192 self
.to_screen('[info] Video annotations are already present')
3193 elif not info_dict
.get('annotations'):
3194 self
.report_warning('There are no annotations to write.')
3197 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
3198 with open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
3199 annofile
.write(info_dict
['annotations'])
3200 except (KeyError, TypeError):
3201 self
.report_warning('There are no annotations to write.')
3203 self
.report_error('Cannot write annotations file: ' + annofn
)
3206 # Write internet shortcut files
3207 def _write_link_file(link_type
):
3208 url
= try_get(info_dict
['webpage_url'], iri_to_uri
)
3210 self
.report_warning(
3211 f
'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3213 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
3214 if not self
._ensure
_dir
_exists
(encodeFilename(linkfn
)):
3216 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
3217 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
3220 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3221 with open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8',
3222 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
3223 template_vars
= {'url': url}
3224 if link_type
== 'desktop':
3225 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
3226 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
3228 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
3233 'url': self
.params
.get('writeurllink'),
3234 'webloc': self
.params
.get('writewebloclink'),
3235 'desktop': self
.params
.get('writedesktoplink'),
3237 if self
.params
.get('writelink'):
3238 link_type
= ('webloc' if sys
.platform
== 'darwin'
3239 else 'desktop' if sys
.platform
.startswith('linux')
3241 write_links
[link_type
] = True
3243 if any(should_write
and not _write_link_file(link_type
)
3244 for link_type
, should_write
in write_links
.items()):
3247 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
3248 replace_info_dict(new_info
)
3250 if self
.params
.get('skip_download'):
3251 info_dict
['filepath'] = temp_filename
3252 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3253 info_dict
['__files_to_move'] = files_to_move
3254 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
3255 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3258 info_dict
.setdefault('__postprocessors', [])
3261 def existing_video_file(*filepaths
):
3262 ext
= info_dict
.get('ext')
3263 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
3264 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
3265 default_overwrite
=False)
3267 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
3270 fd
, success
= None, True
3271 if info_dict
.get('protocol') or info_dict
.get('url'):
3272 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3273 if fd
is not FFmpegFD
and 'no-direct-merge' not in self
.params
['compat_opts'] and (
3274 info_dict
.get('section_start') or info_dict
.get('section_end')):
3275 msg
= ('This format cannot be partially downloaded' if FFmpegFD
.available()
3276 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3277 self
.report_error(f
'{msg}. Aborting')
3280 if info_dict
.get('requested_formats') is not None:
3281 old_ext
= info_dict
['ext']
3282 if self
.params
.get('merge_output_format') is None:
3283 if (info_dict
['ext'] == 'webm'
3284 and info_dict
.get('thumbnails')
3285 # check with type instead of pp_key, __name__, or isinstance
3286 # since we dont want any custom PPs to trigger this
3287 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])): # noqa: E721
3288 info_dict
['ext'] = 'mkv'
3289 self
.report_warning(
3290 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3291 new_ext
= info_dict
['ext']
3293 def correct_ext(filename
, ext
=new_ext
):
3296 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3298 os
.path
.splitext(filename
)[0]
3299 if filename_real_ext
in (old_ext
, new_ext
)
3301 return f
'{filename_wo_ext}.{ext}'
3303 # Ensure filename always has a correct extension for successful merge
3304 full_filename
= correct_ext(full_filename
)
3305 temp_filename
= correct_ext(temp_filename
)
3306 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3308 info_dict
['__real_download'] = False
3309 # NOTE: Copy so that original format dicts are not modified
3310 info_dict
['requested_formats'] = list(map(dict, info_dict
['requested_formats']))
3312 merger
= FFmpegMergerPP(self
)
3314 if dl_filename
is not None:
3315 self
.report_file_already_downloaded(dl_filename
)
3317 for f
in info_dict
['requested_formats'] if fd
!= FFmpegFD
else []:
3318 f
['filepath'] = fname
= prepend_extension(
3319 correct_ext(temp_filename
, info_dict
['ext']),
3320 'f%s' % f
['format_id'], info_dict
['ext'])
3321 downloaded
.append(fname
)
3322 info_dict
['url'] = '\n'.join(f
['url'] for f
in info_dict
['requested_formats'])
3323 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3324 info_dict
['__real_download'] = real_download
3326 if self
.params
.get('allow_unplayable_formats'):
3327 self
.report_warning(
3328 'You have requested merging of multiple formats '
3329 'while also allowing unplayable formats to be downloaded. '
3330 'The formats won\'t be merged to prevent data corruption.')
3331 elif not merger
.available
:
3332 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3333 if not self
.params
.get('ignoreerrors'):
3334 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3336 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3338 if temp_filename
== '-':
3339 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3340 else 'but the formats are incompatible for simultaneous download' if merger
.available
3341 else 'but ffmpeg is not installed')
3342 self
.report_warning(
3343 f
'You have requested downloading multiple formats to stdout {reason}. '
3344 'The formats will be streamed one after the other')
3345 fname
= temp_filename
3346 for f
in info_dict
['requested_formats']:
3347 new_info
= dict(info_dict
)
3348 del new_info
['requested_formats']
3350 if temp_filename
!= '-':
3351 fname
= prepend_extension(
3352 correct_ext(temp_filename
, new_info
['ext']),
3353 'f%s' % f
['format_id'], new_info
['ext'])
3354 if not self
._ensure
_dir
_exists
(fname
):
3356 f
['filepath'] = fname
3357 downloaded
.append(fname
)
3358 partial_success
, real_download
= self
.dl(fname
, new_info
)
3359 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3360 success
= success
and partial_success
3362 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3363 info_dict
['__postprocessors'].append(merger
)
3364 info_dict
['__files_to_merge'] = downloaded
3365 # Even if there were no downloads, it is being merged only now
3366 info_dict
['__real_download'] = True
3368 for file in downloaded
:
3369 files_to_move
[file] = None
3371 # Just a single file
3372 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3373 if dl_filename
is None or dl_filename
== temp_filename
:
3374 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3375 # So we should try to resume the download
3376 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3377 info_dict
['__real_download'] = real_download
3379 self
.report_file_already_downloaded(dl_filename
)
3381 dl_filename
= dl_filename
or temp_filename
3382 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3384 except network_exceptions
as err
:
3385 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
3387 except OSError as err
:
3388 raise UnavailableVideoError(err
)
3389 except (ContentTooShortError
, ) as err
:
3390 self
.report_error(f
'content too short (expected {err.expected} bytes and served {err.downloaded})')
3393 self
._raise
_pending
_errors
(info_dict
)
3394 if success
and full_filename
!= '-':
3398 fixup_policy
= self
.params
.get('fixup')
3399 vid
= info_dict
['id']
3401 if fixup_policy
in ('ignore', 'never'):
3403 elif fixup_policy
== 'warn':
3405 elif fixup_policy
!= 'force':
3406 assert fixup_policy
in ('detect_or_warn', None)
3407 if not info_dict
.get('__real_download'):
3410 def ffmpeg_fixup(cndn
, msg
, cls
):
3411 if not (do_fixup
and cndn
):
3413 elif do_fixup
== 'warn':
3414 self
.report_warning(f
'{vid}: {msg}')
3418 info_dict
['__postprocessors'].append(pp
)
3420 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3422 stretched_ratio
= info_dict
.get('stretched_ratio')
3423 ffmpeg_fixup(stretched_ratio
not in (1, None),
3424 f
'Non-uniform pixel ratio {stretched_ratio}',
3425 FFmpegFixupStretchedPP
)
3427 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3428 downloader
= downloader
.FD_NAME
if downloader
else None
3430 ext
= info_dict
.get('ext')
3431 postprocessed_by_ffmpeg
= info_dict
.get('requested_formats') or any((
3432 isinstance(pp
, FFmpegVideoConvertorPP
)
3433 and resolve_recode_mapping(ext
, pp
.mapping
)[0] not in (ext
, None)
3434 ) for pp
in self
._pps
['post_process'])
3436 if not postprocessed_by_ffmpeg
:
3437 ffmpeg_fixup(ext
== 'm4a' and info_dict
.get('container') == 'm4a_dash',
3438 'writing DASH m4a. Only some players support this container',
3440 ffmpeg_fixup(downloader
== 'hlsnative' and not self
.params
.get('hls_use_mpegts')
3441 or info_dict
.get('is_live') and self
.params
.get('hls_use_mpegts') is None,
3442 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3444 ffmpeg_fixup(info_dict
.get('is_live') and downloader
== 'dashsegments',
3445 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3447 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3448 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP
)
3452 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3453 except PostProcessingError
as err
:
3454 self
.report_error('Postprocessing: %s' % str(err
))
3457 for ph
in self
._post
_hooks
:
3458 ph(info_dict
['filepath'])
3459 except Exception as err
:
3460 self
.report_error('post hooks: %s' % str(err
))
3462 info_dict
['__write_download_archive'] = True
3464 assert info_dict
is original_infodict
# Make sure the info_dict was modified in-place
3465 if self
.params
.get('force_write_download_archive'):
3466 info_dict
['__write_download_archive'] = True
3467 check_max_downloads()
3469 def __download_wrapper(self
, func
):
3470 @functools.wraps(func
)
3471 def wrapper(*args
, **kwargs
):
3473 res
= func(*args
, **kwargs
)
3474 except UnavailableVideoError
as e
:
3475 self
.report_error(e
)
3476 except DownloadCancelled
as e
:
3477 self
.to_screen(f
'[info] {e}')
3478 if not self
.params
.get('break_per_url'):
3480 self
._num
_downloads
= 0
3482 if self
.params
.get('dump_single_json', False):
3483 self
.post_extract(res
)
3484 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3487 def download(self
, url_list
):
3488 """Download a given list of URLs."""
3489 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3490 outtmpl
= self
.params
['outtmpl']['default']
3491 if (len(url_list
) > 1
3493 and '%' not in outtmpl
3494 and self
.params
.get('max_downloads') != 1):
3495 raise SameFileError(outtmpl
)
3497 for url
in url_list
:
3498 self
.__download
_wrapper
(self
.extract_info
)(
3499 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3501 return self
._download
_retcode
3503 def download_with_info_file(self
, info_filename
):
3504 with contextlib
.closing(fileinput
.FileInput(
3505 [info_filename
], mode
='r',
3506 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3507 # FileInput doesn't have a read method, we can't call json.load
3508 infos
= [self
.sanitize_info(info
, self
.params
.get('clean_infojson', True))
3509 for info
in variadic(json
.loads('\n'.join(f
)))]
3511 self
._load
_cookies
(info
.get('cookies'), from_headers
=False)
3512 self
._load
_cookies
(traverse_obj(info
.get('http_headers'), 'Cookie', casesense
=False)) # compat
3514 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3515 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3516 if not isinstance(e
, EntryNotInPlaylist
):
3517 self
.to_stderr('\r')
3518 webpage_url
= info
.get('webpage_url')
3519 if webpage_url
is None:
3521 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3522 self
.download([webpage_url
])
3523 return self
._download
_retcode
3526 def sanitize_info(info_dict
, remove_private_keys
=False):
3527 ''' Sanitize the infodict for converting to json '''
3528 if info_dict
is None:
3530 info_dict
.setdefault('epoch', int(time
.time()))
3531 info_dict
.setdefault('_type', 'video')
3532 info_dict
.setdefault('_version', {
3533 'version': __version__
,
3534 'current_git_head': current_git_head(),
3535 'release_git_head': RELEASE_GIT_HEAD
,
3536 'repository': REPOSITORY
,
3539 if remove_private_keys
:
3540 reject
= lambda k
, v
: v
is None or k
.startswith('__') or k
in {
3541 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3542 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3543 'playlist_autonumber', '_format_sort_fields',
3546 reject
= lambda k
, v
: False
3549 if isinstance(obj
, dict):
3550 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3551 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3552 return list(map(filter_fn
, obj
))
3553 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3558 return filter_fn(info_dict
)
3561 def filter_requested_info(info_dict
, actually_filter
=True):
3562 ''' Alias of sanitize_info for backward compatibility '''
3563 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3565 def _delete_downloaded_files(self
, *files_to_delete
, info
={}, msg
=None):
3566 for filename
in set(filter(None, files_to_delete
)):
3568 self
.to_screen(msg
% filename
)
3572 self
.report_warning(f
'Unable to delete file {filename}')
3573 if filename
in info
.get('__files_to_move', []): # NB: Delete even if None
3574 del info
['__files_to_move'][filename
]
3577 def post_extract(info_dict
):
3578 def actual_post_extract(info_dict
):
3579 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3580 for video_dict
in info_dict
.get('entries', {}):
3581 actual_post_extract(video_dict
or {})
3584 post_extractor
= info_dict
.pop('__post_extractor', None) or (lambda: {})
3585 info_dict
.update(post_extractor())
3587 actual_post_extract(info_dict
or {})
3589 def run_pp(self
, pp
, infodict
):
3590 files_to_delete
= []
3591 if '__files_to_move' not in infodict
:
3592 infodict
['__files_to_move'] = {}
3594 files_to_delete
, infodict
= pp
.run(infodict
)
3595 except PostProcessingError
as e
:
3596 # Must be True and not 'only_download'
3597 if self
.params
.get('ignoreerrors') is True:
3598 self
.report_error(e
)
3602 if not files_to_delete
:
3604 if self
.params
.get('keepvideo', False):
3605 for f
in files_to_delete
:
3606 infodict
['__files_to_move'].setdefault(f
, '')
3608 self
._delete
_downloaded
_files
(
3609 *files_to_delete
, info
=infodict
, msg
='Deleting original file %s (pass -k to keep)')
3612 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3614 self
._forceprint
(key
, info
)
3615 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3616 info
= self
.run_pp(pp
, info
)
3619 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3620 info
= dict(ie_info
)
3621 info
['__files_to_move'] = files_to_move
or {}
3623 info
= self
.run_all_pps(key
, info
)
3624 except PostProcessingError
as err
:
3625 msg
= f
'Preprocessing: {err}'
3626 info
.setdefault('__pending_error', msg
)
3627 self
.report_error(msg
, is_error
=False)
3628 return info
, info
.pop('__files_to_move', None)
3630 def post_process(self
, filename
, info
, files_to_move
=None):
3631 """Run all the postprocessors on the given file."""
3632 info
['filepath'] = filename
3633 info
['__files_to_move'] = files_to_move
or {}
3634 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3635 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3636 del info
['__files_to_move']
3637 return self
.run_all_pps('after_move', info
)
3639 def _make_archive_id(self
, info_dict
):
3640 video_id
= info_dict
.get('id')
3643 # Future-proof against any change in case
3644 # and backwards compatibility with prior versions
3645 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3646 if extractor
is None:
3647 url
= str_or_none(info_dict
.get('url'))
3650 # Try to find matching extractor for the URL and take its ie_key
3651 for ie_key
, ie
in self
._ies
.items():
3652 if ie
.suitable(url
):
3657 return make_archive_id(extractor
, video_id
)
3659 def in_download_archive(self
, info_dict
):
3660 if not self
.archive
:
3663 vid_ids
= [self
._make
_archive
_id
(info_dict
)]
3664 vid_ids
.extend(info_dict
.get('_old_archive_ids') or [])
3665 return any(id_
in self
.archive
for id_
in vid_ids
)
3667 def record_download_archive(self
, info_dict
):
3668 fn
= self
.params
.get('download_archive')
3671 vid_id
= self
._make
_archive
_id
(info_dict
)
3674 self
.write_debug(f
'Adding to archive: {vid_id}')
3675 if is_path_like(fn
):
3676 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3677 archive_file
.write(vid_id
+ '\n')
3678 self
.archive
.add(vid_id
)
3681 def format_resolution(format
, default
='unknown'):
3682 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3684 if format
.get('resolution') is not None:
3685 return format
['resolution']
3686 if format
.get('width') and format
.get('height'):
3687 return '%dx%d' % (format
['width'], format
['height'])
3688 elif format
.get('height'):
3689 return '%sp' % format
['height']
3690 elif format
.get('width'):
3691 return '%dx?' % format
['width']
3694 def _list_format_headers(self
, *headers
):
3695 if self
.params
.get('listformats_table', True) is not False:
3696 return [self
._format
_out
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3699 def _format_note(self
, fdict
):
3701 if fdict
.get('ext') in ['f4f', 'f4m']:
3702 res
+= '(unsupported)'
3703 if fdict
.get('language'):
3706 res
+= '[%s]' % fdict
['language']
3707 if fdict
.get('format_note') is not None:
3710 res
+= fdict
['format_note']
3711 if fdict
.get('tbr') is not None:
3714 res
+= '%4dk' % fdict
['tbr']
3715 if fdict
.get('container') is not None:
3718 res
+= '%s container' % fdict
['container']
3719 if (fdict
.get('vcodec') is not None
3720 and fdict
.get('vcodec') != 'none'):
3723 res
+= fdict
['vcodec']
3724 if fdict
.get('vbr') is not None:
3726 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3728 if fdict
.get('vbr') is not None:
3729 res
+= '%4dk' % fdict
['vbr']
3730 if fdict
.get('fps') is not None:
3733 res
+= '%sfps' % fdict
['fps']
3734 if fdict
.get('acodec') is not None:
3737 if fdict
['acodec'] == 'none':
3740 res
+= '%-5s' % fdict
['acodec']
3741 elif fdict
.get('abr') is not None:
3745 if fdict
.get('abr') is not None:
3746 res
+= '@%3dk' % fdict
['abr']
3747 if fdict
.get('asr') is not None:
3748 res
+= ' (%5dHz)' % fdict
['asr']
3749 if fdict
.get('filesize') is not None:
3752 res
+= format_bytes(fdict
['filesize'])
3753 elif fdict
.get('filesize_approx') is not None:
3756 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3759 def _get_formats(self
, info_dict
):
3760 if info_dict
.get('formats') is None:
3761 if info_dict
.get('url') and info_dict
.get('_type', 'video') == 'video':
3764 return info_dict
['formats']
3766 def render_formats_table(self
, info_dict
):
3767 formats
= self
._get
_formats
(info_dict
)
3770 if not self
.params
.get('listformats_table', True) is not False:
3773 format_field(f
, 'format_id'),
3774 format_field(f
, 'ext'),
3775 self
.format_resolution(f
),
3776 self
._format
_note
(f
)
3777 ] for f
in formats
if (f
.get('preference') or 0) >= -1000]
3778 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3780 def simplified_codec(f
, field
):
3781 assert field
in ('acodec', 'vcodec')
3782 codec
= f
.get(field
)
3785 elif codec
!= 'none':
3786 return '.'.join(codec
.split('.')[:4])
3788 if field
== 'vcodec' and f
.get('acodec') == 'none':
3790 elif field
== 'acodec' and f
.get('vcodec') == 'none':
3792 return self
._format
_out
('audio only' if field
== 'vcodec' else 'video only',
3793 self
.Styles
.SUPPRESS
)
3795 delim
= self
._format
_out
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3798 self
._format
_out
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3799 format_field(f
, 'ext'),
3800 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3801 format_field(f
, 'fps', '\t%d', func
=round),
3802 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3803 format_field(f
, 'audio_channels', '\t%s'),
3805 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
)
3806 or format_field(f
, 'filesize_approx', '≈\t%s', func
=format_bytes
)
3807 or format_field(try_call(lambda: format_bytes(int(info_dict
['duration'] * f
['tbr'] * (1024 / 8)))),
3808 None, self
._format
_out
('~\t%s', self
.Styles
.SUPPRESS
))),
3809 format_field(f
, 'tbr', '\t%dk', func
=round),
3810 shorten_protocol_name(f
.get('protocol', '')),
3812 simplified_codec(f
, 'vcodec'),
3813 format_field(f
, 'vbr', '\t%dk', func
=round),
3814 simplified_codec(f
, 'acodec'),
3815 format_field(f
, 'abr', '\t%dk', func
=round),
3816 format_field(f
, 'asr', '\t%s', func
=format_decimal_suffix
),
3817 join_nonempty(format_field(f
, 'language', '[%s]'), join_nonempty(
3818 self
._format
_out
('UNSUPPORTED', self
.Styles
.BAD_FORMAT
) if f
.get('ext') in ('f4f', 'f4m') else None,
3819 (self
._format
_out
('Maybe DRM', self
.Styles
.WARNING
) if f
.get('has_drm') == 'maybe'
3820 else self
._format
_out
('DRM', self
.Styles
.BAD_FORMAT
) if f
.get('has_drm') else None),
3821 format_field(f
, 'format_note'),
3822 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3823 delim
=', '), delim
=' '),
3824 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3825 header_line
= self
._list
_format
_headers
(
3826 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3827 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3829 return render_table(
3830 header_line
, table
, hide_empty
=True,
3831 delim
=self
._format
_out
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3833 def render_thumbnails_table(self
, info_dict
):
3834 thumbnails
= list(info_dict
.get('thumbnails') or [])
3837 return render_table(
3838 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3839 [[t
.get('id'), t
.get('width') or 'unknown', t
.get('height') or 'unknown', t
['url']] for t
in thumbnails
])
3841 def render_subtitles_table(self
, video_id
, subtitles
):
3842 def _row(lang
, formats
):
3843 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3844 if len(set(names
)) == 1:
3845 names
= [] if names
[0] == 'unknown' else names
[:1]
3846 return [lang
, ', '.join(names
), ', '.join(exts
)]
3850 return render_table(
3851 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3852 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3855 def __list_table(self
, video_id
, name
, func
, *args
):
3858 self
.to_screen(f
'{video_id} has no {name}')
3860 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3861 self
.to_stdout(table
)
3863 def list_formats(self
, info_dict
):
3864 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3866 def list_thumbnails(self
, info_dict
):
3867 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3869 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3870 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3872 def print_debug_header(self
):
3873 if not self
.params
.get('verbose'):
3876 from . import _IN_CLI
# Must be delayed import
3878 # These imports can be slow. So import them only as needed
3879 from .extractor
.extractors
import _LAZY_LOADER
3880 from .extractor
.extractors
import (
3881 _PLUGIN_CLASSES
as plugin_ies
,
3882 _PLUGIN_OVERRIDES
as plugin_ie_overrides
3885 def get_encoding(stream
):
3886 ret
= str(getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__))
3887 additional_info
= []
3888 if os
.environ
.get('TERM', '').lower() == 'dumb':
3889 additional_info
.append('dumb')
3890 if not supports_terminal_sequences(stream
):
3891 from .utils
import WINDOWS_VT_MODE
# Must be imported locally
3892 additional_info
.append('No VT' if WINDOWS_VT_MODE
is False else 'No ANSI')
3894 ret
= f
'{ret} ({",".join(additional_info)})'
3897 encoding_str
= 'Encodings: locale %s, fs %s, pref %s, %s' % (
3898 locale
.getpreferredencoding(),
3899 sys
.getfilesystemencoding(),
3900 self
.get_encoding(),
3902 f
'{key} {get_encoding(stream)}' for key
, stream
in self
._out
_files
.items_
3903 if stream
is not None and key
!= 'console')
3906 logger
= self
.params
.get('logger')
3908 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3909 write_debug(encoding_str
)
3911 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
3912 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3914 source
= detect_variant()
3915 if VARIANT
not in (None, 'pip'):
3918 write_debug(join_nonempty(
3919 f
'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3920 f
'{CHANNEL}@{__version__}',
3921 f
'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD
else '',
3922 '' if source
== 'unknown' else f
'({source})',
3923 '' if _IN_CLI
else 'API' if klass
== YoutubeDL
else f
'API:{self.__module__}.{klass.__qualname__}',
3927 write_debug(f
'params: {self.params}')
3929 if not _LAZY_LOADER
:
3930 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3931 write_debug('Lazy loading extractors is forcibly disabled')
3933 write_debug('Lazy loading extractors is disabled')
3934 if self
.params
['compat_opts']:
3935 write_debug('Compatibility options: %s' % ', '.join(self
.params
['compat_opts']))
3937 if current_git_head():
3938 write_debug(f
'Git HEAD: {current_git_head()}')
3939 write_debug(system_identifier())
3941 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
3942 ffmpeg_features
= {key for key, val in ffmpeg_features.items() if val}
3944 exe_versions
['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features
))
3946 exe_versions
['rtmpdump'] = rtmpdump_version()
3947 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3948 exe_str
= ', '.join(
3949 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3951 write_debug('exe versions: %s' % exe_str
)
3953 from .compat
.compat_utils
import get_package_info
3954 from .dependencies
import available_dependencies
3956 write_debug('Optional libraries: %s' % (', '.join(sorted({
3957 join_nonempty(*get_package_info(m
)) for m
in available_dependencies
.values()
3960 write_debug(f
'Proxy map: {self.proxies}')
3961 # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
3962 for plugin_type
, plugins
in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}
.items():
3963 display_list
= ['%s%s' % (
3964 klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3965 for name
, klass
in plugins
.items()]
3966 if plugin_type
== 'Extractor':
3967 display_list
.extend(f
'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3968 for parent
, plugins
in plugin_ie_overrides
.items())
3969 if not display_list
:
3971 write_debug(f
'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3973 plugin_dirs
= plugin_directories()
3975 write_debug(f
'Plugin directories: {plugin_dirs}')
3978 if False and self
.params
.get('call_home'):
3979 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode()
3980 write_debug('Public IP address: %s' % ipaddr
)
3981 latest_version
= self
.urlopen(
3982 'https://yt-dl.org/latest/version').read().decode()
3983 if version_tuple(latest_version
) > version_tuple(__version__
):
3984 self
.report_warning(
3985 'You are using an outdated version (newest version: %s)! '
3986 'See https://yt-dl.org/update if you need help updating.' %
3989 @functools.cached_property
3991 """Global proxy configuration"""
3992 opts_proxy
= self
.params
.get('proxy')
3993 if opts_proxy
is not None:
3994 if opts_proxy
== '':
3995 opts_proxy
= '__noproxy__'
3996 proxies
= {'all': opts_proxy}
3998 proxies
= urllib
.request
.getproxies()
3999 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4000 if 'http' in proxies
and 'https' not in proxies
:
4001 proxies
['https'] = proxies
['http']
4005 @functools.cached_property
4006 def cookiejar(self
):
4007 """Global cookiejar instance"""
4008 return load_cookies(
4009 self
.params
.get('cookiefile'), self
.params
.get('cookiesfrombrowser'), self
)
4014 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4016 self
.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
4017 handler
= self
._request
_director
.handlers
['Urllib']
4018 return handler
._get
_instance
(cookiejar
=self
.cookiejar
, proxies
=self
.proxies
)
4020 def urlopen(self
, req
):
4021 """ Start an HTTP download """
4022 if isinstance(req
, str):
4024 elif isinstance(req
, urllib
.request
.Request
):
4025 self
.deprecation_warning(
4026 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4027 'Use yt_dlp.networking.common.Request instead.')
4028 req
= urllib_req_to_req(req
)
4029 assert isinstance(req
, Request
)
4031 # compat: Assume user:pass url params are basic auth
4032 url
, basic_auth_header
= extract_basic_auth(req
.url
)
4033 if basic_auth_header
:
4034 req
.headers
['Authorization'] = basic_auth_header
4035 req
.url
= sanitize_url(url
)
4037 clean_proxies(proxies
=req
.proxies
, headers
=req
.headers
)
4038 clean_headers(req
.headers
)
4041 return self
._request
_director
.send(req
)
4042 except NoSupportingHandlers
as e
:
4043 for ue
in e
.unsupported_errors
:
4044 if not (ue
.handler
and ue
.msg
):
4046 if ue
.handler
.RH_KEY
== 'Urllib' and 'unsupported url scheme: "file"' in ue
.msg
.lower():
4048 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4049 'Use --enable-file-urls to enable at your own risk.', cause
=ue
) from ue
4051 except SSLError
as e
:
4052 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e
):
4053 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause
=e
) from e
4054 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e
):
4056 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4057 'Try using --legacy-server-connect', cause
=e
) from e
4059 except HTTPError
as e
: # TODO: Remove in a future release
4060 raise _CompatHTTPError(e
) from e
4062 def build_request_director(self
, handlers
):
4063 logger
= _YDLLogger(self
)
4064 headers
= self
.params
.get('http_headers').copy()
4065 proxies
= self
.proxies
.copy()
4066 clean_headers(headers
)
4067 clean_proxies(proxies
, headers
)
4069 director
= RequestDirector(logger
=logger
, verbose
=self
.params
.get('debug_printtraffic'))
4070 for handler
in handlers
:
4071 director
.add_handler(handler(
4074 cookiejar
=self
.cookiejar
,
4076 prefer_system_certs
='no-certifi' in self
.params
['compat_opts'],
4077 verify
=not self
.params
.get('nocheckcertificate'),
4078 **traverse_obj(self
.params
, {
4079 'verbose': 'debug_printtraffic',
4080 'source_address': 'source_address',
4081 'timeout': 'socket_timeout',
4082 'legacy_ssl_support': 'legacy_server_connect',
4083 'enable_file_urls': 'enable_file_urls',
4085 'client_certificate': 'client_certificate',
4086 'client_certificate_key': 'client_certificate_key',
4087 'client_certificate_password': 'client_certificate_password',
4093 def encode(self
, s
):
4094 if isinstance(s
, bytes):
4095 return s
# Already encoded
4098 return s
.encode(self
.get_encoding())
4099 except UnicodeEncodeError as err
:
4100 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
4103 def get_encoding(self
):
4104 encoding
= self
.params
.get('encoding')
4105 if encoding
is None:
4106 encoding
= preferredencoding()
4109 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
4110 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4111 if overwrite
is None:
4112 overwrite
= self
.params
.get('overwrites', True)
4113 if not self
.params
.get('writeinfojson'):
4116 self
.write_debug(f
'Skipping writing {label} infojson')
4118 elif not self
._ensure
_dir
_exists
(infofn
):
4120 elif not overwrite
and os
.path
.exists(infofn
):
4121 self
.to_screen(f
'[info] {label.title()} metadata is already present')
4124 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
4126 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
4129 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
4132 def _write_description(self
, label
, ie_result
, descfn
):
4133 ''' Write description and returns True = written, False = skip, None = error '''
4134 if not self
.params
.get('writedescription'):
4137 self
.write_debug(f
'Skipping writing {label} description')
4139 elif not self
._ensure
_dir
_exists
(descfn
):
4141 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
4142 self
.to_screen(f
'[info] {label.title()} description is already present')
4143 elif ie_result
.get('description') is None:
4144 self
.to_screen(f
'[info] There\'s no {label} description to write')
4148 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
4149 with open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
4150 descfile
.write(ie_result
['description'])
4152 self
.report_error(f
'Cannot write {label} description file {descfn}')
4156 def _write_subtitles(self
, info_dict
, filename
):
4157 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4159 subtitles
= info_dict
.get('requested_subtitles')
4160 if not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
4161 # subtitles download errors are already managed as troubles in relevant IE
4162 # that way it will silently go on when used with unsupporting IE
4165 self
.to_screen('[info] There are no subtitles for the requested languages')
4167 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
4168 if not sub_filename_base
:
4169 self
.to_screen('[info] Skipping writing video subtitles')
4172 for sub_lang
, sub_info
in subtitles
.items():
4173 sub_format
= sub_info
['ext']
4174 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
4175 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
4176 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
4178 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4179 sub_info
['filepath'] = existing_sub
4180 ret
.append((existing_sub
, sub_filename_final
))
4183 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
4184 if sub_info
.get('data') is not None:
4186 # Use newline='' to prevent conversion of newline characters
4187 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4188 with open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
4189 subfile
.write(sub_info
['data'])
4190 sub_info
['filepath'] = sub_filename
4191 ret
.append((sub_filename
, sub_filename_final
))
4194 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
4198 sub_copy
= sub_info
.copy()
4199 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
4200 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
4201 sub_info
['filepath'] = sub_filename
4202 ret
.append((sub_filename
, sub_filename_final
))
4203 except (DownloadError
, ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
4204 msg
= f
'Unable to download video subtitles for {sub_lang!r}: {err}'
4205 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
4206 if not self
.params
.get('ignoreerrors'):
4207 self
.report_error(msg
)
4208 raise DownloadError(msg
)
4209 self
.report_warning(msg
)
4212 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
4213 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4214 write_all
= self
.params
.get('write_all_thumbnails', False)
4215 thumbnails
, ret
= [], []
4216 if write_all
or self
.params
.get('writethumbnail', False):
4217 thumbnails
= info_dict
.get('thumbnails') or []
4219 self
.to_screen(f
'[info] There are no {label} thumbnails to download')
4221 multiple
= write_all
and len(thumbnails
) > 1
4223 if thumb_filename_base
is None:
4224 thumb_filename_base
= filename
4225 if thumbnails
and not thumb_filename_base
:
4226 self
.write_debug(f
'Skipping writing {label} thumbnail')
4229 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
4230 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
4231 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
4232 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
4233 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
4235 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
4237 self
.to_screen('[info] %s is already present' % (
4238 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize())
4239 t
['filepath'] = existing_thumb
4240 ret
.append((existing_thumb
, thumb_filename_final
))
4242 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
4244 uf
= self
.urlopen(Request(t
['url'], headers
=t
.get('http_headers', {})))
4245 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
4246 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
4247 shutil
.copyfileobj(uf
, thumbf
)
4248 ret
.append((thumb_filename
, thumb_filename_final
))
4249 t
['filepath'] = thumb_filename
4250 except network_exceptions
as err
:
4251 if isinstance(err
, HTTPError
) and err
.status
== 404:
4252 self
.to_screen(f
'[info] {thumb_display_id.title()} does not exist')
4254 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
4256 if ret
and not write_all
: