27 from .cache
import Cache
28 from .compat
import urllib
# isort: split
29 from .compat
import compat_os_name
, compat_shlex_quote
30 from .cookies
import LenientSimpleCookie
, load_cookies
31 from .downloader
import FFmpegFD
, get_suitable_downloader
, shorten_protocol_name
32 from .downloader
.rtmp
import rtmpdump_version
33 from .extractor
import gen_extractor_classes
, get_info_extractor
34 from .extractor
.common
import UnsupportedURLIE
35 from .extractor
.openload
import PhantomJSwrapper
36 from .minicurses
import format_text
37 from .plugins
import directories
as plugin_directories
38 from .postprocessor
import _PLUGIN_CLASSES
as plugin_pps
39 from .postprocessor
import (
41 FFmpegFixupDuplicateMoovPP
,
42 FFmpegFixupDurationPP
,
45 FFmpegFixupStretchedPP
,
46 FFmpegFixupTimestampPP
,
49 FFmpegVideoConvertorPP
,
50 MoveFilesAfterDownloadPP
,
53 from .postprocessor
.ffmpeg
import resolve_mapping
as resolve_recode_mapping
54 from .update
import REPOSITORY
, current_git_head
, detect_variant
81 PerRequestProxyHandler
,
88 UnavailableVideoError
,
90 YoutubeDLCookieProcessor
,
92 YoutubeDLRedirectHandler
,
108 format_decimal_suffix
,
125 orderedSet_from_options
,
129 remove_terminal_sequences
,
140 supports_terminal_sequences
,
150 windows_enable_vt_mode
,
154 from .utils
.networking
import clean_headers
155 from .version
import CHANNEL
, RELEASE_GIT_HEAD
, VARIANT
, __version__
157 if compat_os_name
== 'nt':
164 YoutubeDL objects are the ones responsible of downloading the
165 actual video file and writing it to disk if the user has requested
166 it, among some other tasks. In most cases there should be one per
167 program. As, given a video URL, the downloader doesn't know how to
168 extract all the needed information, task that InfoExtractors do, it
169 has to pass the URL to one of them.
171 For this, YoutubeDL objects have a method that allows
172 InfoExtractors to be registered in a given order. When it is passed
173 a URL, the YoutubeDL object handles it to the first InfoExtractor it
174 finds that reports being able to handle it. The InfoExtractor extracts
175 all the information about the video or videos the URL refers to, and
176 YoutubeDL process the extracted information, possibly using a File
177 Downloader to download the video.
179 YoutubeDL objects accept a lot of parameters. In order not to saturate
180 the object constructor with arguments, it receives a dictionary of
181 options instead. These options are available through the params
182 attribute for the InfoExtractors to use. The YoutubeDL also
183 registers itself as the downloader in charge for the InfoExtractors
184 that are added to it, so this is a "mutual registration".
188 username: Username for authentication purposes.
189 password: Password for authentication purposes.
190 videopassword: Password for accessing a video.
191 ap_mso: Adobe Pass multiple-system operator identifier.
192 ap_username: Multiple-system operator account username.
193 ap_password: Multiple-system operator account password.
194 usenetrc: Use netrc for authentication instead.
195 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
196 netrc_cmd: Use a shell command to get credentials
197 verbose: Print additional info to stdout.
198 quiet: Do not print messages to stdout.
199 no_warnings: Do not print out anything for warnings.
200 forceprint: A dict with keys WHEN mapped to a list of templates to
201 print to stdout. The allowed keys are video or any of the
202 items in utils.POSTPROCESS_WHEN.
203 For compatibility, a single list is also accepted
204 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
205 a list of tuples with (template, filename)
206 forcejson: Force printing info_dict as JSON.
207 dump_single_json: Force printing the info_dict of the whole playlist
208 (or video) as a single JSON line.
209 force_write_download_archive: Force writing download archive regardless
210 of 'skip_download' or 'simulate'.
211 simulate: Do not download the video files. If unset (or None),
212 simulate only if listsubtitles, listformats or list_thumbnails is used
213 format: Video format code. see "FORMAT SELECTION" for more details.
214 You can also pass a function. The function takes 'ctx' as
215 argument and returns the formats to download.
216 See "build_format_selector" for an implementation
217 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
218 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
219 extracting metadata even if the video is not actually
220 available for download (experimental)
221 format_sort: A list of fields by which to sort the video formats.
222 See "Sorting Formats" for more details.
223 format_sort_force: Force the given format_sort. see "Sorting Formats"
225 prefer_free_formats: Whether to prefer video formats with free containers
226 over non-free ones of same quality.
227 allow_multiple_video_streams: Allow multiple video streams to be merged
229 allow_multiple_audio_streams: Allow multiple audio streams to be merged
231 check_formats Whether to test if the formats are downloadable.
232 Can be True (check all), False (check none),
233 'selected' (check selected formats),
234 or None (check only if requested by extractor)
235 paths: Dictionary of output paths. The allowed keys are 'home'
236 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
237 outtmpl: Dictionary of templates for output names. Allowed keys
238 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
239 For compatibility with youtube-dl, a single string can also be used
240 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
241 restrictfilenames: Do not allow "&" and spaces in file names
242 trim_file_name: Limit length of filename (extension excluded)
243 windowsfilenames: Force the filenames to be windows compatible
244 ignoreerrors: Do not stop on download/postprocessing errors.
245 Can be 'only_download' to ignore only download errors.
246 Default is 'only_download' for CLI, but False for API
247 skip_playlist_after_errors: Number of allowed failures until the rest of
248 the playlist is skipped
249 allowed_extractors: List of regexes to match against extractor names that are allowed
250 overwrites: Overwrite all video and metadata files if True,
251 overwrite only non-video files if None
252 and don't overwrite any file if False
253 For compatibility with youtube-dl,
254 "nooverwrites" may also be used instead
255 playlist_items: Specific indices of playlist to download.
256 playlistrandom: Download playlist items in random order.
257 lazy_playlist: Process playlist entries as they are received.
258 matchtitle: Download only matching titles.
259 rejecttitle: Reject downloads for matching titles.
260 logger: Log messages to a logging.Logger instance.
261 logtostderr: Print everything to stderr instead of stdout.
262 consoletitle: Display progress in console window's titlebar.
263 writedescription: Write the video description to a .description file
264 writeinfojson: Write the video description to a .info.json file
265 clean_infojson: Remove internal metadata from the infojson
266 getcomments: Extract video comments. This will not be written to disk
267 unless writeinfojson is also given
268 writeannotations: Write the video annotations to a .annotations.xml file
269 writethumbnail: Write the thumbnail image to a file
270 allow_playlist_files: Whether to write playlists' description, infojson etc
271 also to disk when using the 'write*' options
272 write_all_thumbnails: Write all thumbnail formats to files
273 writelink: Write an internet shortcut file, depending on the
274 current platform (.url/.webloc/.desktop)
275 writeurllink: Write a Windows internet shortcut file (.url)
276 writewebloclink: Write a macOS internet shortcut file (.webloc)
277 writedesktoplink: Write a Linux internet shortcut file (.desktop)
278 writesubtitles: Write the video subtitles to a file
279 writeautomaticsub: Write the automatically generated subtitles to a file
280 listsubtitles: Lists all available subtitles for the video
281 subtitlesformat: The format code for subtitles
282 subtitleslangs: List of languages of the subtitles to download (can be regex).
283 The list may contain "all" to refer to all the available
284 subtitles. The language can be prefixed with a "-" to
285 exclude it from the requested languages, e.g. ['all', '-live_chat']
286 keepvideo: Keep the video file after post-processing
287 daterange: A utils.DateRange object, download only if the upload_date is in the range.
288 skip_download: Skip the actual download of the video file
289 cachedir: Location of the cache files in the filesystem.
290 False to disable filesystem cache.
291 noplaylist: Download single video instead of a playlist if in doubt.
292 age_limit: An integer representing the user's age in years.
293 Unsuitable videos for the given age are skipped.
294 min_views: An integer representing the minimum view count the video
295 must have in order to not be skipped.
296 Videos without view count information are always
297 downloaded. None for no limit.
298 max_views: An integer representing the maximum view count.
299 Videos that are more popular than that are not
301 Videos without view count information are always
302 downloaded. None for no limit.
303 download_archive: A set, or the name of a file where all downloads are recorded.
304 Videos already present in the file are not downloaded again.
305 break_on_existing: Stop the download process after attempting to download a
306 file that is in the archive.
307 break_per_url: Whether break_on_reject and break_on_existing
308 should act on each input URL as opposed to for the entire queue
309 cookiefile: File name or text stream from where cookies should be read and dumped to
310 cookiesfrombrowser: A tuple containing the name of the browser, the profile
311 name/path from where cookies are loaded, the name of the keyring,
312 and the container name, e.g. ('chrome', ) or
313 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
314 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
315 support RFC 5746 secure renegotiation
316 nocheckcertificate: Do not verify SSL certificates
317 client_certificate: Path to client certificate file in PEM format. May include the private key
318 client_certificate_key: Path to private key file for client certificate
319 client_certificate_password: Password for client certificate private key, if encrypted.
320 If not provided and the key is encrypted, yt-dlp will ask interactively
321 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
322 (Only supported by some extractors)
323 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
324 http_headers: A dictionary of custom headers to be used for all requests
325 proxy: URL of the proxy server to use
326 geo_verification_proxy: URL of the proxy to use for IP address verification
327 on geo-restricted sites.
328 socket_timeout: Time to wait for unresponsive hosts, in seconds
329 bidi_workaround: Work around buggy terminals without bidirectional text
330 support, using fridibi
331 debug_printtraffic:Print out sent and received HTTP traffic
332 default_search: Prepend this string if an input url is not valid.
333 'auto' for elaborate guessing
334 encoding: Use this encoding instead of the system-specified.
335 extract_flat: Whether to resolve and process url_results further
336 * False: Always process. Default for API
337 * True: Never process
338 * 'in_playlist': Do not process inside playlist/multi_video
339 * 'discard': Always process, but don't return the result
340 from inside playlist/multi_video
341 * 'discard_in_playlist': Same as "discard", but only for
342 playlists (not multi_video). Default for CLI
343 wait_for_video: If given, wait for scheduled streams to become available.
344 The value should be a tuple containing the range
345 (min_secs, max_secs) to wait between retries
346 postprocessors: A list of dictionaries, each with an entry
347 * key: The name of the postprocessor. See
348 yt_dlp/postprocessor/__init__.py for a list.
349 * when: When to run the postprocessor. Allowed values are
350 the entries of utils.POSTPROCESS_WHEN
351 Assumed to be 'post_process' if not given
352 progress_hooks: A list of functions that get called on download
353 progress, with a dictionary with the entries
354 * status: One of "downloading", "error", or "finished".
355 Check this first and ignore unknown values.
356 * info_dict: The extracted info_dict
358 If status is one of "downloading", or "finished", the
359 following properties may also be present:
360 * filename: The final filename (always present)
361 * tmpfilename: The filename we're currently writing to
362 * downloaded_bytes: Bytes on disk
363 * total_bytes: Size of the whole file, None if unknown
364 * total_bytes_estimate: Guess of the eventual file size,
366 * elapsed: The number of seconds since download started.
367 * eta: The estimated time in seconds, None if unknown
368 * speed: The download speed in bytes/second, None if
370 * fragment_index: The counter of the currently
371 downloaded video fragment.
372 * fragment_count: The number of fragments (= individual
373 files that will be merged)
375 Progress hooks are guaranteed to be called at least once
376 (with status "finished") if the download is successful.
377 postprocessor_hooks: A list of functions that get called on postprocessing
378 progress, with a dictionary with the entries
379 * status: One of "started", "processing", or "finished".
380 Check this first and ignore unknown values.
381 * postprocessor: Name of the postprocessor
382 * info_dict: The extracted info_dict
384 Progress hooks are guaranteed to be called at least twice
385 (with status "started" and "finished") if the processing is successful.
386 merge_output_format: "/" separated list of extensions to use when merging formats.
387 final_ext: Expected final extension; used to detect when the file was
388 already downloaded and converted
389 fixup: Automatically correct known faults of the file.
391 - "never": do nothing
392 - "warn": only emit a warning
393 - "detect_or_warn": check whether we can do anything
394 about it, warn otherwise (default)
395 source_address: Client-side IP address to bind to.
396 sleep_interval_requests: Number of seconds to sleep between requests
398 sleep_interval: Number of seconds to sleep before each download when
399 used alone or a lower bound of a range for randomized
400 sleep before each download (minimum possible number
401 of seconds to sleep) when used along with
403 max_sleep_interval:Upper bound of a range for randomized sleep before each
404 download (maximum possible number of seconds to sleep).
405 Must only be used along with sleep_interval.
406 Actual sleep time will be a random float from range
407 [sleep_interval; max_sleep_interval].
408 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
409 listformats: Print an overview of available video formats and exit.
410 list_thumbnails: Print a table of all thumbnails and exit.
411 match_filter: A function that gets called for every video with the signature
412 (info_dict, *, incomplete: bool) -> Optional[str]
413 For backward compatibility with youtube-dl, the signature
414 (info_dict) -> Optional[str] is also allowed.
415 - If it returns a message, the video is ignored.
416 - If it returns None, the video is downloaded.
417 - If it returns utils.NO_DEFAULT, the user is interactively
418 asked whether to download the video.
419 - Raise utils.DownloadCancelled(msg) to abort remaining
420 downloads when a video is rejected.
421 match_filter_func in utils.py is one example for this.
422 color: A Dictionary with output stream names as keys
423 and their respective color policy as values.
424 Can also just be a single color policy,
425 in which case it applies to all outputs.
426 Valid stream names are 'stdout' and 'stderr'.
427 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
428 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
431 Two-letter ISO 3166-2 country code that will be used for
432 explicit geographic restriction bypassing via faking
433 X-Forwarded-For HTTP header
435 IP range in CIDR notation that will be used similarly to
437 external_downloader: A dictionary of protocol keys and the executable of the
438 external downloader to use for it. The allowed protocols
439 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
440 Set the value to 'native' to use the native downloader
441 compat_opts: Compatibility options. See "Differences in default behavior".
442 The following options do not work when used through the API:
443 filename, abort-on-error, multistreams, no-live-chat, format-sort
444 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
445 Refer __init__.py for their implementation
446 progress_template: Dictionary of templates for progress outputs.
447 Allowed keys are 'download', 'postprocess',
448 'download-title' (console title) and 'postprocess-title'.
449 The template is mapped on a dictionary with keys 'progress' and 'info'
450 retry_sleep_functions: Dictionary of functions that takes the number of attempts
451 as argument and returns the time to sleep in seconds.
452 Allowed keys are 'http', 'fragment', 'file_access'
453 download_ranges: A callback function that gets called for every video with
454 the signature (info_dict, ydl) -> Iterable[Section].
455 Only the returned sections will be downloaded.
456 Each Section is a dict with the following keys:
457 * start_time: Start time of the section in seconds
458 * end_time: End time of the section in seconds
459 * title: Section title (Optional)
460 * index: Section number (Optional)
461 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
462 noprogress: Do not print the progress bar
463 live_from_start: Whether to download livestreams videos from the start
465 The following parameters are not used by YoutubeDL itself, they are used by
466 the downloader (see yt_dlp/downloader/common.py):
467 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
468 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
469 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
470 external_downloader_args, concurrent_fragment_downloads.
472 The following options are used by the post processors:
473 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
474 to the binary or its containing directory.
475 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
476 and a list of additional command-line arguments for the
477 postprocessor/executable. The dict can also have "PP+EXE" keys
478 which are used when the given exe is used by the given PP.
479 Use 'default' as the name for arguments to passed to all PP
480 For compatibility with youtube-dl, a single list of args
483 The following options are used by the extractors:
484 extractor_retries: Number of times to retry for known errors (default: 3)
485 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
486 hls_split_discontinuity: Split HLS playlists to different formats at
487 discontinuities such as ad breaks (default: False)
488 extractor_args: A dictionary of arguments to be passed to the extractors.
489 See "EXTRACTOR ARGUMENTS" for details.
490 E.g. {'youtube': {'skip': ['dash', 'hls']}}
491 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
493 The following options are deprecated and may be removed in the future:
495 break_on_reject: Stop the download process when encountering a video that
496 has been filtered out.
497 - `raise DownloadCancelled(msg)` in match_filter instead
498 force_generic_extractor: Force downloader to use the generic extractor
499 - Use allowed_extractors = ['generic', 'default']
500 playliststart: - Use playlist_items
501 Playlist item to start at.
502 playlistend: - Use playlist_items
503 Playlist item to end at.
504 playlistreverse: - Use playlist_items
505 Download playlist items in reverse order.
506 forceurl: - Use forceprint
507 Force printing final URL.
508 forcetitle: - Use forceprint
509 Force printing title.
510 forceid: - Use forceprint
512 forcethumbnail: - Use forceprint
513 Force printing thumbnail URL.
514 forcedescription: - Use forceprint
515 Force printing description.
516 forcefilename: - Use forceprint
517 Force printing final filename.
518 forceduration: - Use forceprint
519 Force printing duration.
520 allsubtitles: - Use subtitleslangs = ['all']
521 Downloads all the subtitles of the video
522 (requires writesubtitles or writeautomaticsub)
523 include_ads: - Doesn't work
525 call_home: - Not implemented
526 Boolean, true iff we are allowed to contact the
527 yt-dlp servers for debugging.
528 post_hooks: - Register a custom postprocessor
529 A list of functions that get called as the final step
530 for each video file, after all postprocessors have been
531 called. The filename will be passed as the only argument.
532 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
533 Use the native HLS downloader instead of ffmpeg/avconv
534 if True, otherwise use ffmpeg/avconv if False, otherwise
535 use downloader suggested by extractor if None.
536 prefer_ffmpeg: - avconv support is deprecated
537 If False, use avconv instead of ffmpeg if both are available,
538 otherwise prefer ffmpeg.
539 youtube_include_dash_manifest: - Use extractor_args
540 If True (default), DASH manifests and related
541 data will be downloaded and processed by extractor.
542 You can reduce network I/O by disabling it if you don't
543 care about DASH. (only for youtube)
544 youtube_include_hls_manifest: - Use extractor_args
545 If True (default), HLS manifests and related
546 data will be downloaded and processed by extractor.
547 You can reduce network I/O by disabling it if you don't
548 care about HLS. (only for youtube)
549 no_color: Same as `color='no_color'`
553 'width', 'height', 'asr', 'audio_channels', 'fps',
554 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
555 'timestamp', 'release_timestamp',
556 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
557 'average_rating', 'comment_count', 'age_limit',
558 'start_time', 'end_time',
559 'chapter_number', 'season_number', 'episode_number',
560 'track_number', 'disc_number', 'release_year',
564 # NB: Keep in sync with the docstring of extractor/common.py
565 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
566 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
567 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
568 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
569 'preference', 'language', 'language_preference', 'quality', 'source_preference',
570 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
571 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
573 _format_selection_exts
= {
574 'audio': set(MEDIA_EXTENSIONS
.common_audio
),
575 'video': set(MEDIA_EXTENSIONS
.common_video
+ ('3gp', )),
576 'storyboards': set(MEDIA_EXTENSIONS
.storyboards
),
579 def __init__(self
, params
=None, auto_init
=True):
580 """Create a FileDownloader object with the given options.
581 @param auto_init Whether to load the default extractors and print header (if verbose).
582 Set to 'no_verbose_header' to not print the header
588 self
._ies
_instances
= {}
589 self
._pps
= {k: [] for k in POSTPROCESS_WHEN}
590 self
._printed
_messages
= set()
591 self
._first
_webpage
_request
= True
592 self
._post
_hooks
= []
593 self
._progress
_hooks
= []
594 self
._postprocessor
_hooks
= []
595 self
._download
_retcode
= 0
596 self
._num
_downloads
= 0
598 self
._playlist
_level
= 0
599 self
._playlist
_urls
= set()
600 self
.cache
= Cache(self
)
602 stdout
= sys
.stderr
if self
.params
.get('logtostderr') else sys
.stdout
603 self
._out
_files
= Namespace(
606 screen
=sys
.stderr
if self
.params
.get('quiet') else stdout
,
607 console
=None if compat_os_name
== 'nt' else next(
608 filter(supports_terminal_sequences
, (sys
.stderr
, sys
.stdout
)), None)
612 windows_enable_vt_mode()
613 except Exception as e
:
614 self
.write_debug(f
'Failed to enable VT mode: {e}')
616 if self
.params
.get('no_color'):
617 if self
.params
.get('color') is not None:
618 self
.report_warning('Overwriting params from "color" with "no_color"')
619 self
.params
['color'] = 'no_color'
621 term_allow_color
= os
.environ
.get('TERM', '').lower() != 'dumb'
623 def process_color_policy(stream
):
624 stream_name
= {sys.stdout: 'stdout', sys.stderr: 'stderr'}
[stream
]
625 policy
= traverse_obj(self
.params
, ('color', (stream_name
, None), {str}
), get_all
=False)
626 if policy
in ('auto', None):
627 return term_allow_color
and supports_terminal_sequences(stream
)
628 assert policy
in ('always', 'never', 'no_color')
629 return {'always': True, 'never': False}
.get(policy
, policy
)
631 self
._allow
_colors
= Namespace(**{
632 name
: process_color_policy(stream
)
633 for name
, stream
in self
._out
_files
.items_
if name
!= 'console'
636 # The code is left like this to be reused for future deprecations
637 MIN_SUPPORTED
, MIN_RECOMMENDED
= (3, 7), (3, 7)
638 current_version
= sys
.version_info
[:2]
639 if current_version
< MIN_RECOMMENDED
:
640 msg
= ('Support for Python version %d.%d has been deprecated. '
641 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
642 '\n You will no longer receive updates on this version')
643 if current_version
< MIN_SUPPORTED
:
644 msg
= 'Python version %d.%d is no longer supported'
645 self
.deprecated_feature(
646 f
'{msg}! Please update to Python %d.%d or above' % (*current_version
, *MIN_RECOMMENDED
))
648 if self
.params
.get('allow_unplayable_formats'):
650 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
651 'This is a developer option intended for debugging. \n'
652 ' If you experience any issues while using this option, '
653 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
655 if self
.params
.get('bidi_workaround', False):
658 master
, slave
= pty
.openpty()
659 width
= shutil
.get_terminal_size().columns
660 width_args
= [] if width
is None else ['-w', str(width
)]
661 sp_kwargs
= {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
663 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
665 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
666 self
._output
_channel
= os
.fdopen(master
, 'rb')
667 except OSError as ose
:
668 if ose
.errno
== errno
.ENOENT
:
670 'Could not find fribidi executable, ignoring --bidi-workaround. '
671 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
675 self
.params
['compat_opts'] = set(self
.params
.get('compat_opts', ()))
676 self
.params
['http_headers'] = merge_headers(std_headers
, self
.params
.get('http_headers', {}))
677 if auto_init
and auto_init
!= 'no_verbose_header':
678 self
.print_debug_header()
680 self
.__header
_cookies
= []
681 self
._load
_cookies
(traverse_obj(self
.params
.get('http_headers'), 'cookie', casesense
=False)) # compat
683 def check_deprecated(param
, option
, suggestion
):
684 if self
.params
.get(param
) is not None:
685 self
.report_warning(f
'{option} is deprecated. Use {suggestion} instead')
689 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
690 if self
.params
.get('geo_verification_proxy') is None:
691 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
693 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
694 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
695 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
697 for msg
in self
.params
.get('_warnings', []):
698 self
.report_warning(msg
)
699 for msg
in self
.params
.get('_deprecation_warnings', []):
700 self
.deprecated_feature(msg
)
702 if 'list-formats' in self
.params
['compat_opts']:
703 self
.params
['listformats_table'] = False
705 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
706 # nooverwrites was unnecessarily changed to overwrites
707 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
708 # This ensures compatibility with both keys
709 self
.params
['overwrites'] = not self
.params
['nooverwrites']
710 elif self
.params
.get('overwrites') is None:
711 self
.params
.pop('overwrites', None)
713 self
.params
['nooverwrites'] = not self
.params
['overwrites']
715 if self
.params
.get('simulate') is None and any((
716 self
.params
.get('list_thumbnails'),
717 self
.params
.get('listformats'),
718 self
.params
.get('listsubtitles'),
720 self
.params
['simulate'] = 'list_only'
722 self
.params
.setdefault('forceprint', {})
723 self
.params
.setdefault('print_to_file', {})
725 # Compatibility with older syntax
726 if not isinstance(params
['forceprint'], dict):
727 self
.params
['forceprint'] = {'video': params['forceprint']}
730 self
.add_default_info_extractors()
732 if (sys
.platform
!= 'win32'
733 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
734 and not self
.params
.get('restrictfilenames', False)):
735 # Unicode filesystem API will throw errors (#1474, #13027)
737 'Assuming --restrict-filenames since file system encoding '
738 'cannot encode all characters. '
739 'Set the LC_ALL environment variable to fix this.')
740 self
.params
['restrictfilenames'] = True
742 self
._parse
_outtmpl
()
744 # Creating format selector here allows us to catch syntax errors before the extraction
745 self
.format_selector
= (
746 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
747 else self
.params
['format'] if callable(self
.params
['format'])
748 else self
.build_format_selector(self
.params
['format']))
751 'post_hooks': self
.add_post_hook
,
752 'progress_hooks': self
.add_progress_hook
,
753 'postprocessor_hooks': self
.add_postprocessor_hook
,
755 for opt
, fn
in hooks
.items():
756 for ph
in self
.params
.get(opt
, []):
759 for pp_def_raw
in self
.params
.get('postprocessors', []):
760 pp_def
= dict(pp_def_raw
)
761 when
= pp_def
.pop('when', 'post_process')
762 self
.add_post_processor(
763 get_postprocessor(pp_def
.pop('key'))(self
, **pp_def
),
768 def preload_download_archive(fn
):
769 """Preload the archive, if any is specified"""
773 elif not is_path_like(fn
):
776 self
.write_debug(f
'Loading archive file {fn!r}')
778 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
779 for line
in archive_file
:
780 archive
.add(line
.strip())
781 except OSError as ioe
:
782 if ioe
.errno
!= errno
.ENOENT
:
786 self
.archive
= preload_download_archive(self
.params
.get('download_archive'))
788 def warn_if_short_id(self
, argv
):
789 # short YouTube ID starting with dash?
791 i
for i
, a
in enumerate(argv
)
792 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
796 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
797 + ['--'] + [argv
[i
] for i
in idxs
]
800 'Long argument string detected. '
801 'Use -- to separate parameters and URLs, like this:\n%s' %
802 args_to_str(correct_argv
))
804 def add_info_extractor(self
, ie
):
805 """Add an InfoExtractor object to the end of the list."""
807 self
._ies
[ie_key
] = ie
808 if not isinstance(ie
, type):
809 self
._ies
_instances
[ie_key
] = ie
810 ie
.set_downloader(self
)
812 def get_info_extractor(self
, ie_key
):
814 Get an instance of an IE with name ie_key, it will try to get one from
815 the _ies list, if there's no instance it will create a new one and add
816 it to the extractor list.
818 ie
= self
._ies
_instances
.get(ie_key
)
820 ie
= get_info_extractor(ie_key
)()
821 self
.add_info_extractor(ie
)
824 def add_default_info_extractors(self
):
826 Add the InfoExtractors returned by gen_extractors to the end of the list
828 all_ies
= {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
829 all_ies
['end'] = UnsupportedURLIE()
831 ie_names
= orderedSet_from_options(
832 self
.params
.get('allowed_extractors', ['default']), {
833 'all': list(all_ies
),
834 'default': [name
for name
, ie
in all_ies
.items() if ie
._ENABLED
],
836 except re
.error
as e
:
837 raise ValueError(f
'Wrong regex for allowed_extractors: {e.pattern}')
838 for name
in ie_names
:
839 self
.add_info_extractor(all_ies
[name
])
840 self
.write_debug(f
'Loaded {len(ie_names)} extractors')
842 def add_post_processor(self
, pp
, when
='post_process'):
843 """Add a PostProcessor object to the end of the chain."""
844 assert when
in POSTPROCESS_WHEN
, f
'Invalid when={when}'
845 self
._pps
[when
].append(pp
)
846 pp
.set_downloader(self
)
848 def add_post_hook(self
, ph
):
849 """Add the post hook"""
850 self
._post
_hooks
.append(ph
)
852 def add_progress_hook(self
, ph
):
853 """Add the download progress hook"""
854 self
._progress
_hooks
.append(ph
)
856 def add_postprocessor_hook(self
, ph
):
857 """Add the postprocessing progress hook"""
858 self
._postprocessor
_hooks
.append(ph
)
859 for pps
in self
._pps
.values():
861 pp
.add_progress_hook(ph
)
863 def _bidi_workaround(self
, message
):
864 if not hasattr(self
, '_output_channel'):
867 assert hasattr(self
, '_output_process')
868 assert isinstance(message
, str)
869 line_count
= message
.count('\n') + 1
870 self
._output
_process
.stdin
.write((message
+ '\n').encode())
871 self
._output
_process
.stdin
.flush()
872 res
= ''.join(self
._output
_channel
.readline().decode()
873 for _
in range(line_count
))
874 return res
[:-len('\n')]
876 def _write_string(self
, message
, out
=None, only_once
=False):
878 if message
in self
._printed
_messages
:
880 self
._printed
_messages
.add(message
)
881 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
883 def to_stdout(self
, message
, skip_eol
=False, quiet
=None):
884 """Print message to stdout"""
885 if quiet
is not None:
886 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
887 'Use "YoutubeDL.to_screen" instead')
888 if skip_eol
is not False:
889 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
890 'Use "YoutubeDL.to_screen" instead')
891 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.out
)
893 def to_screen(self
, message
, skip_eol
=False, quiet
=None, only_once
=False):
894 """Print message to screen if not in quiet mode"""
895 if self
.params
.get('logger'):
896 self
.params
['logger'].debug(message
)
898 if (self
.params
.get('quiet') if quiet
is None else quiet
) and not self
.params
.get('verbose'):
901 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
902 self
._out
_files
.screen
, only_once
=only_once
)
904 def to_stderr(self
, message
, only_once
=False):
905 """Print message to stderr"""
906 assert isinstance(message
, str)
907 if self
.params
.get('logger'):
908 self
.params
['logger'].error(message
)
910 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.error
, only_once
=only_once
)
912 def _send_console_code(self
, code
):
913 if compat_os_name
== 'nt' or not self
._out
_files
.console
:
915 self
._write
_string
(code
, self
._out
_files
.console
)
917 def to_console_title(self
, message
):
918 if not self
.params
.get('consoletitle', False):
920 message
= remove_terminal_sequences(message
)
921 if compat_os_name
== 'nt':
922 if ctypes
.windll
.kernel32
.GetConsoleWindow():
923 # c_wchar_p() might not be necessary if `message` is
924 # already of type unicode()
925 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
927 self
._send
_console
_code
(f
'\033]0;{message}\007')
929 def save_console_title(self
):
930 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
932 self
._send
_console
_code
('\033[22;0t') # Save the title on stack
934 def restore_console_title(self
):
935 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
937 self
._send
_console
_code
('\033[23;0t') # Restore the title from stack
940 self
.save_console_title()
943 def save_cookies(self
):
944 if self
.params
.get('cookiefile') is not None:
945 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
947 def __exit__(self
, *args
):
948 self
.restore_console_title()
951 def trouble(self
, message
=None, tb
=None, is_error
=True):
952 """Determine action to take when a download problem appears.
954 Depending on if the downloader has been configured to ignore
955 download errors or not, this method may throw an exception or
956 not when errors are found, after printing the message.
958 @param tb If given, is additional traceback information
959 @param is_error Whether to raise error according to ignorerrors
961 if message
is not None:
962 self
.to_stderr(message
)
963 if self
.params
.get('verbose'):
965 if sys
.exc_info()[0]: # if .trouble has been called from an except block
967 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
968 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
969 tb
+= encode_compat_str(traceback
.format_exc())
971 tb_data
= traceback
.format_list(traceback
.extract_stack())
972 tb
= ''.join(tb_data
)
977 if not self
.params
.get('ignoreerrors'):
978 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
979 exc_info
= sys
.exc_info()[1].exc_info
981 exc_info
= sys
.exc_info()
982 raise DownloadError(message
, exc_info
)
983 self
._download
_retcode
= 1
987 EMPHASIS
='light blue',
992 BAD_FORMAT
='light red',
994 SUPPRESS
='light black',
997 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
1000 original_text
= text
1001 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1002 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', None) or 'ascii'
1003 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
1004 if fallback
is not None and text
!= original_text
:
1006 return format_text(text
, f
) if allow_colors
is True else text
if fallback
is None else fallback
1008 def _format_out(self
, *args
, **kwargs
):
1009 return self
._format
_text
(self
._out
_files
.out
, self
._allow
_colors
.out
, *args
, **kwargs
)
1011 def _format_screen(self
, *args
, **kwargs
):
1012 return self
._format
_text
(self
._out
_files
.screen
, self
._allow
_colors
.screen
, *args
, **kwargs
)
1014 def _format_err(self
, *args
, **kwargs
):
1015 return self
._format
_text
(self
._out
_files
.error
, self
._allow
_colors
.error
, *args
, **kwargs
)
1017 def report_warning(self
, message
, only_once
=False):
1019 Print the message to stderr, it will be prefixed with 'WARNING:'
1020 If stderr is a tty file the 'WARNING:' will be colored
1022 if self
.params
.get('logger') is not None:
1023 self
.params
['logger'].warning(message
)
1025 if self
.params
.get('no_warnings'):
1027 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
1029 def deprecation_warning(self
, message
, *, stacklevel
=0):
1030 deprecation_warning(
1031 message
, stacklevel
=stacklevel
+ 1, printer
=self
.report_error
, is_error
=False)
1033 def deprecated_feature(self
, message
):
1034 if self
.params
.get('logger') is not None:
1035 self
.params
['logger'].warning(f
'Deprecated Feature: {message}')
1036 self
.to_stderr(f
'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1038 def report_error(self
, message
, *args
, **kwargs
):
1040 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1041 in red if stderr is a tty file.
1043 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
1045 def write_debug(self
, message
, only_once
=False):
1046 '''Log debug message or Print message to stderr'''
1047 if not self
.params
.get('verbose', False):
1049 message
= f
'[debug] {message}'
1050 if self
.params
.get('logger'):
1051 self
.params
['logger'].debug(message
)
1053 self
.to_stderr(message
, only_once
)
1055 def report_file_already_downloaded(self
, file_name
):
1056 """Report file has already been fully downloaded."""
1058 self
.to_screen('[download] %s has already been downloaded' % file_name
)
1059 except UnicodeEncodeError:
1060 self
.to_screen('[download] The file has already been downloaded')
1062 def report_file_delete(self
, file_name
):
1063 """Report that existing file will be deleted."""
1065 self
.to_screen('Deleting existing file %s' % file_name
)
1066 except UnicodeEncodeError:
1067 self
.to_screen('Deleting existing file')
1069 def raise_no_formats(self
, info
, forced
=False, *, msg
=None):
1070 has_drm
= info
.get('_has_drm')
1071 ignored
, expected
= self
.params
.get('ignore_no_formats_error'), bool(msg
)
1072 msg
= msg
or has_drm
and 'This video is DRM protected' or 'No video formats found!'
1073 if forced
or not ignored
:
1074 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
1075 expected
=has_drm
or ignored
or expected
)
1077 self
.report_warning(msg
)
1079 def parse_outtmpl(self
):
1080 self
.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1081 self
._parse
_outtmpl
()
1082 return self
.params
['outtmpl']
1084 def _parse_outtmpl(self
):
1086 if self
.params
.get('restrictfilenames'): # Remove spaces in the default template
1087 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
1089 outtmpl
= self
.params
.setdefault('outtmpl', {})
1090 if not isinstance(outtmpl
, dict):
1091 self
.params
['outtmpl'] = outtmpl
= {'default': outtmpl}
1092 outtmpl
.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}
)
1094 def get_output_path(self
, dir_type
='', filename
=None):
1095 paths
= self
.params
.get('paths', {})
1096 assert isinstance(paths
, dict), '"paths" parameter must be a dictionary'
1097 path
= os
.path
.join(
1098 expand_path(paths
.get('home', '').strip()),
1099 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
1101 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1104 def _outtmpl_expandpath(outtmpl
):
1105 # expand_path translates '%%' into '%' and '$$' into '$'
1106 # correspondingly that is not what we want since we need to keep
1107 # '%%' intact for template dict substitution step. Working around
1108 # with boundary-alike separator hack.
1109 sep
= ''.join(random
.choices(string
.ascii_letters
, k
=32))
1110 outtmpl
= outtmpl
.replace('%%', f
'%{sep}%').replace('$$', f
'${sep}$')
1112 # outtmpl should be expand_path'ed before template dict substitution
1113 # because meta fields may contain env variables we don't want to
1114 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1115 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1116 return expand_path(outtmpl
).replace(sep
, '')
1119 def escape_outtmpl(outtmpl
):
1120 ''' Escape any remaining strings like %s, %abc% etc. '''
1122 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1123 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1127 def validate_outtmpl(cls
, outtmpl
):
1128 ''' @return None or Exception object '''
1130 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljhqBUDS]'),
1131 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1132 cls
._outtmpl
_expandpath
(outtmpl
))
1134 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1136 except ValueError as err
:
1140 def _copy_infodict(info_dict
):
1141 info_dict
= dict(info_dict
)
1142 info_dict
.pop('__postprocessors', None)
1143 info_dict
.pop('__pending_error', None)
1146 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1147 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1148 @param sanitize Whether to sanitize the output as a filename.
1149 For backward compatibility, a function can also be passed
1152 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1154 info_dict
= self
._copy
_infodict
(info_dict
)
1155 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1156 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1157 if info_dict
.get('duration', None) is not None
1159 info_dict
['autonumber'] = int(self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
)
1160 info_dict
['video_autonumber'] = self
._num
_videos
1161 if info_dict
.get('resolution') is None:
1162 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1164 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1165 # of %(field)s to %(field)0Nd for backward compatibility
1166 field_size_compat_map
= {
1167 'playlist_index': number_of_digits(info_dict
.get('__last_playlist_index') or 0),
1168 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1169 'autonumber': self
.params
.get('autonumber_size') or 5,
1173 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1178 # Field is of the form key1.key2...
1179 # where keys (except first) can be string, int, slice or "{field, ...}"
1180 FIELD_INNER_RE
= r
'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1181 FIELD_RE
= r
'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1182 'inner': FIELD_INNER_RE
,
1183 'field': rf
'\w*(?:\.{FIELD_INNER_RE})*'
1185 MATH_FIELD_RE
= rf
'(?:{FIELD_RE}|-?{NUMBER_RE})'
1186 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
1187 INTERNAL_FORMAT_RE
= re
.compile(rf
'''(?xs)
1189 (?P<fields>{FIELD_RE})
1190 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1191 (?:>(?P<strf_format>.+?))?
1193 (?P<alternate>(?<!\\),[^|&)]+)?
1194 (?:&(?P<replacement>.*?))?
1195 (?:\|(?P<default>.*?))?
1198 def _traverse_infodict(fields
):
1199 fields
= [f
for x
in re
.split(r
'\.({.+?})\.?', fields
)
1200 for f
in ([x
] if x
.startswith('{') else x
.split('.'))]
1202 if fields
and not fields
[i
]:
1205 for i
, f
in enumerate(fields
):
1206 if not f
.startswith('{'):
1208 assert f
.endswith('}'), f
'No closing brace for {f} in {fields}'
1209 fields
[i
] = {k: k.split('.') for k in f[1:-1].split(',')}
1211 return traverse_obj(info_dict
, fields
, is_user_input
=True, traverse_string
=True)
1213 def get_value(mdict
):
1215 value
= _traverse_infodict(mdict
['fields'])
1218 value
= float_or_none(value
)
1219 if value
is not None:
1222 offset_key
= mdict
['maths']
1224 value
= float_or_none(value
)
1228 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1229 offset_key
).group(0)
1230 offset_key
= offset_key
[len(item
):]
1231 if operator
is None:
1232 operator
= MATH_FUNCTIONS
[item
]
1234 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1235 offset
= float_or_none(item
)
1237 offset
= float_or_none(_traverse_infodict(item
))
1239 value
= operator(value
, multiplier
* offset
)
1240 except (TypeError, ZeroDivisionError):
1243 # Datetime formatting
1244 if mdict
['strf_format']:
1245 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1247 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1248 if sanitize
and value
== '':
1252 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1254 def filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames')):
1255 return sanitize_filename(str(value
), restricted
=restricted
, is_id
=(
1256 bool(re
.search(r
'(^|[_.])id(\.|$)', key
))
1257 if 'filename-sanitization' in self
.params
['compat_opts']
1260 sanitizer
= sanitize
if callable(sanitize
) else filename_sanitizer
1261 sanitize
= bool(sanitize
)
1263 def _dumpjson_default(obj
):
1264 if isinstance(obj
, (set, LazyList
)):
1268 class _ReplacementFormatter(string
.Formatter
):
1269 def get_field(self
, field_name
, args
, kwargs
):
1270 if field_name
.isdigit():
1272 raise ValueError('Unsupported field')
1274 replacement_formatter
= _ReplacementFormatter()
1276 def create_key(outer_mobj
):
1277 if not outer_mobj
.group('has_key'):
1278 return outer_mobj
.group(0)
1279 key
= outer_mobj
.group('key')
1280 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1281 value
, replacement
, default
, last_field
= None, None, na
, ''
1283 mobj
= mobj
.groupdict()
1284 default
= mobj
['default'] if mobj
['default'] is not None else default
1285 value
= get_value(mobj
)
1286 last_field
, replacement
= mobj
['fields'], mobj
['replacement']
1287 if value
is None and mobj
['alternate']:
1288 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['remaining'][1:])
1292 fmt
= outer_mobj
.group('format')
1293 if fmt
== 's' and value
is not None and last_field
in field_size_compat_map
.keys():
1294 fmt
= f
'0{field_size_compat_map[last_field]:d}d'
1296 if None not in (value
, replacement
):
1298 value
= replacement_formatter
.format(replacement
, value
)
1300 value
, default
= None, na
1302 flags
= outer_mobj
.group('conversion') or ''
1303 str_fmt
= f
'{fmt[:-1]}s'
1305 value
, fmt
= default
, 's'
1306 elif fmt
[-1] == 'l': # list
1307 delim
= '\n' if '#' in flags
else ', '
1308 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes)))), str_fmt
1309 elif fmt
[-1] == 'j': # json
1310 value
, fmt
= json
.dumps(
1311 value
, default
=_dumpjson_default
,
1312 indent
=4 if '#' in flags
else None, ensure_ascii
='+' not in flags
), str_fmt
1313 elif fmt
[-1] == 'h': # html
1314 value
, fmt
= escapeHTML(str(value
)), str_fmt
1315 elif fmt
[-1] == 'q': # quoted
1316 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1317 value
, fmt
= ' '.join(map(compat_shlex_quote
, value
)), str_fmt
1318 elif fmt
[-1] == 'B': # bytes
1319 value
= f
'%{str_fmt}'.encode() % str(value
).encode()
1320 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1321 elif fmt
[-1] == 'U': # unicode normalized
1322 value
, fmt
= unicodedata
.normalize(
1323 # "+" = compatibility equivalence, "#" = NFD
1324 'NF%s%s' % ('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1326 elif fmt
[-1] == 'D': # decimal suffix
1327 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1328 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1329 factor
=1024 if '#' in flags
else 1000)
1330 elif fmt
[-1] == 'S': # filename sanitization
1331 value
, fmt
= filename_sanitizer(last_field
, value
, restricted
='#' in flags
), str_fmt
1332 elif fmt
[-1] == 'c':
1334 value
= str(value
)[0]
1337 elif fmt
[-1] not in 'rsa': # numeric
1338 value
= float_or_none(value
)
1340 value
, fmt
= default
, 's'
1343 # If value is an object, sanitize might convert it to a string
1344 # So we convert it to repr first
1346 value
, fmt
= repr(value
), str_fmt
1347 elif fmt
[-1] == 'a':
1348 value
, fmt
= ascii(value
), str_fmt
1349 if fmt
[-1] in 'csra':
1350 value
= sanitizer(last_field
, value
)
1352 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1353 TMPL_DICT
[key
] = value
1354 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1356 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1358 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1359 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1360 return self
.escape_outtmpl(outtmpl
) % info_dict
1362 def _prepare_filename(self
, info_dict
, *, outtmpl
=None, tmpl_type
=None):
1363 assert None in (outtmpl
, tmpl_type
), 'outtmpl and tmpl_type are mutually exclusive'
1365 outtmpl
= self
.params
['outtmpl'].get(tmpl_type
or 'default', self
.params
['outtmpl']['default'])
1367 outtmpl
= self
._outtmpl
_expandpath
(outtmpl
)
1368 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1372 if tmpl_type
in ('', 'temp'):
1373 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1374 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1375 filename
= replace_extension(filename
, ext
, final_ext
)
1377 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1379 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1381 # https://github.com/blackjack4494/youtube-dlc/issues/85
1382 trim_file_name
= self
.params
.get('trim_file_name', False)
1384 no_ext
, *ext
= filename
.rsplit('.', 2)
1385 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1388 except ValueError as err
:
1389 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1392 def prepare_filename(self
, info_dict
, dir_type
='', *, outtmpl
=None, warn
=False):
1393 """Generate the output filename"""
1395 assert not dir_type
, 'outtmpl and dir_type are mutually exclusive'
1397 filename
= self
._prepare
_filename
(info_dict
, tmpl_type
=dir_type
, outtmpl
=outtmpl
)
1398 if not filename
and dir_type
not in ('', 'temp'):
1402 if not self
.params
.get('paths'):
1404 elif filename
== '-':
1405 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1406 elif os
.path
.isabs(filename
):
1407 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1408 if filename
== '-' or not filename
:
1411 return self
.get_output_path(dir_type
, filename
)
1413 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1414 """Returns None if the file should be downloaded"""
1415 _type
= 'video' if 'playlist-match-filter' in self
.params
['compat_opts'] else info_dict
.get('_type', 'video')
1416 assert incomplete
or _type
== 'video', 'Only video result can be considered complete'
1418 video_title
= info_dict
.get('title', info_dict
.get('id', 'entry'))
1421 if _type
in ('playlist', 'multi_video'):
1423 elif _type
in ('url', 'url_transparent') and not try_call(
1424 lambda: self
.get_info_extractor(info_dict
['ie_key']).is_single_video(info_dict
['url'])):
1427 if 'title' in info_dict
:
1428 # This can happen when we're just evaluating the playlist
1429 title
= info_dict
['title']
1430 matchtitle
= self
.params
.get('matchtitle', False)
1432 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1433 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1434 rejecttitle
= self
.params
.get('rejecttitle', False)
1436 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1437 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1439 date
= info_dict
.get('upload_date')
1440 if date
is not None:
1441 dateRange
= self
.params
.get('daterange', DateRange())
1442 if date
not in dateRange
:
1443 return f
'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1444 view_count
= info_dict
.get('view_count')
1445 if view_count
is not None:
1446 min_views
= self
.params
.get('min_views')
1447 if min_views
is not None and view_count
< min_views
:
1448 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1449 max_views
= self
.params
.get('max_views')
1450 if max_views
is not None and view_count
> max_views
:
1451 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1452 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1453 return 'Skipping "%s" because it is age restricted' % video_title
1455 match_filter
= self
.params
.get('match_filter')
1456 if match_filter
is None:
1462 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1464 # For backward compatibility
1465 ret
= None if incomplete
else match_filter(info_dict
)
1466 except DownloadCancelled
as err
:
1467 if err
.msg
is not NO_DEFAULT
:
1469 ret
, cancelled
= err
.msg
, err
1471 if ret
is NO_DEFAULT
:
1473 filename
= self
._format
_screen
(self
.prepare_filename(info_dict
), self
.Styles
.FILENAME
)
1474 reply
= input(self
._format
_screen
(
1475 f
'Download "{filename}"? (Y/n): ', self
.Styles
.EMPHASIS
)).lower().strip()
1476 if reply
in {'y', ''}
:
1480 raise type(cancelled
)(f
'Skipping {video_title}')
1481 return f
'Skipping {video_title}'
1484 if self
.in_download_archive(info_dict
):
1485 reason
= '%s has already been recorded in the archive' % video_title
1486 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1489 reason
= check_filter()
1490 except DownloadCancelled
as e
:
1491 reason
, break_opt
, break_err
= e
.msg
, 'match_filter', type(e
)
1493 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1494 if reason
is not None:
1496 self
.to_screen('[download] ' + reason
)
1497 if self
.params
.get(break_opt
, False):
1502 def add_extra_info(info_dict
, extra_info
):
1503 '''Set the keys from extra_info in info dict if they are missing'''
1504 for key
, value
in extra_info
.items():
1505 info_dict
.setdefault(key
, value
)
1507 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1508 process
=True, force_generic_extractor
=False):
1510 Extract and return the information dictionary of the URL
1513 @param url URL to extract
1516 @param download Whether to download videos
1517 @param process Whether to resolve all unresolved references (URLs, playlist items).
1518 Must be True for download to work
1519 @param ie_key Use only the extractor with this key
1521 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1522 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1525 if extra_info
is None:
1528 if not ie_key
and force_generic_extractor
:
1532 ies
= {ie_key: self._ies[ie_key]}
if ie_key
in self
._ies
else {}
1536 for key
, ie
in ies
.items():
1537 if not ie
.suitable(url
):
1540 if not ie
.working():
1541 self
.report_warning('The program functionality for this site has been marked as broken, '
1542 'and will probably not work.')
1544 temp_id
= ie
.get_temp_id(url
)
1545 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': key}
):
1546 self
.to_screen(f
'[{key}] {temp_id}: has already been recorded in the archive')
1547 if self
.params
.get('break_on_existing', False):
1548 raise ExistingVideoReached()
1550 return self
.__extract
_info
(url
, self
.get_info_extractor(key
), download
, extra_info
, process
)
1552 extractors_restricted
= self
.params
.get('allowed_extractors') not in (None, ['default'])
1553 self
.report_error(f
'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1554 tb
=False if extractors_restricted
else None)
1556 def _handle_extraction_exceptions(func
):
1557 @functools.wraps(func
)
1558 def wrapper(self
, *args
, **kwargs
):
1561 return func(self
, *args
, **kwargs
)
1562 except (DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1564 except ReExtractInfo
as e
:
1566 self
.to_screen(f
'{e}; Re-extracting data')
1568 self
.to_stderr('\r')
1569 self
.report_warning(f
'{e}; Re-extracting data')
1571 except GeoRestrictedError
as e
:
1574 msg
+= '\nThis video is available in %s.' % ', '.join(
1575 map(ISO3166Utils
.short2full
, e
.countries
))
1576 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1577 self
.report_error(msg
)
1578 except ExtractorError
as e
: # An error we somewhat expected
1579 self
.report_error(str(e
), e
.format_traceback())
1580 except Exception as e
:
1581 if self
.params
.get('ignoreerrors'):
1582 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1588 def _wait_for_video(self
, ie_result
={}):
1589 if (not self
.params
.get('wait_for_video')
1590 or ie_result
.get('_type', 'video') != 'video'
1591 or ie_result
.get('formats') or ie_result
.get('url')):
1594 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1599 full_msg
= f
'{msg}\n'
1600 if not self
.params
.get('noprogress'):
1601 full_msg
= msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r'
1604 self
.to_screen(full_msg
, skip_eol
=True)
1607 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1608 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1609 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1610 diff
= round(random
.uniform(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
), 0)
1611 self
.report_warning('Release time of video is not known')
1612 elif ie_result
and (diff
or 0) <= 0:
1613 self
.report_warning('Video should already be available according to extracted info')
1614 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1615 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1617 wait_till
= time
.time() + diff
1620 diff
= wait_till
- time
.time()
1623 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1624 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1626 except KeyboardInterrupt:
1628 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1629 except BaseException
as e
:
1630 if not isinstance(e
, ReExtractInfo
):
1634 def _load_cookies(self
, data
, *, from_headers
=True):
1635 """Loads cookies from a `Cookie` header
1637 This tries to work around the security vulnerability of passing cookies to every domain.
1638 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1639 The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1641 @param data The Cookie header as string to load the cookies from
1642 @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1644 for cookie
in LenientSimpleCookie(data
).values():
1645 if from_headers
and any(cookie
.values()):
1646 raise ValueError('Invalid syntax in Cookie Header')
1648 domain
= cookie
.get('domain') or ''
1649 expiry
= cookie
.get('expires')
1650 if expiry
== '': # 0 is valid
1652 prepared_cookie
= http
.cookiejar
.Cookie(
1653 cookie
.get('version') or 0, cookie
.key
, cookie
.value
, None, False,
1654 domain
, True, True, cookie
.get('path') or '', bool(cookie
.get('path')),
1655 cookie
.get('secure') or False, expiry
, False, None, None, {})
1658 self
.cookiejar
.set_cookie(prepared_cookie
)
1660 self
.deprecated_feature(
1661 'Passing cookies as a header is a potential security risk; '
1662 'they will be scoped to the domain of the downloaded urls. '
1663 'Please consider loading cookies from a file or browser instead.')
1664 self
.__header
_cookies
.append(prepared_cookie
)
1666 self
.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1667 tb
=False, is_error
=False)
1669 def _apply_header_cookies(self
, url
):
1670 """Applies stray header cookies to the provided url
1672 This loads header cookies and scopes them to the domain provided in `url`.
1673 While this is not ideal, it helps reduce the risk of them being sent
1674 to an unintended destination while mostly maintaining compatibility.
1676 parsed
= urllib
.parse
.urlparse(url
)
1677 if not parsed
.hostname
:
1680 for cookie
in map(copy
.copy
, self
.__header
_cookies
):
1681 cookie
.domain
= f
'.{parsed.hostname}'
1682 self
.cookiejar
.set_cookie(cookie
)
1684 @_handle_extraction_exceptions
1685 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1686 self
._apply
_header
_cookies
(url
)
1689 ie_result
= ie
.extract(url
)
1690 except UserNotLive
as e
:
1692 if self
.params
.get('wait_for_video'):
1693 self
.report_warning(e
)
1694 self
._wait
_for
_video
()
1696 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1697 self
.report_warning(f
'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1699 if isinstance(ie_result
, list):
1700 # Backwards compatibility: old IE result format
1702 '_type': 'compat_list',
1703 'entries': ie_result
,
1705 if extra_info
.get('original_url'):
1706 ie_result
.setdefault('original_url', extra_info
['original_url'])
1707 self
.add_default_extra_info(ie_result
, ie
, url
)
1709 self
._wait
_for
_video
(ie_result
)
1710 return self
.process_ie_result(ie_result
, download
, extra_info
)
1714 def add_default_extra_info(self
, ie_result
, ie
, url
):
1716 self
.add_extra_info(ie_result
, {
1718 'original_url': url
,
1720 webpage_url
= ie_result
.get('webpage_url')
1722 self
.add_extra_info(ie_result
, {
1723 'webpage_url_basename': url_basename(webpage_url
),
1724 'webpage_url_domain': get_domain(webpage_url
),
1727 self
.add_extra_info(ie_result
, {
1728 'extractor': ie
.IE_NAME
,
1729 'extractor_key': ie
.ie_key(),
1732 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1734 Take the result of the ie(may be modified) and resolve all unresolved
1735 references (URLs, playlist items).
1737 It will also download the videos if 'download'.
1738 Returns the resolved ie_result.
1740 if extra_info
is None:
1742 result_type
= ie_result
.get('_type', 'video')
1744 if result_type
in ('url', 'url_transparent'):
1745 ie_result
['url'] = sanitize_url(
1746 ie_result
['url'], scheme
='http' if self
.params
.get('prefer_insecure') else 'https')
1747 if ie_result
.get('original_url') and not extra_info
.get('original_url'):
1748 extra_info
= {'original_url': ie_result['original_url'], **extra_info}
1750 extract_flat
= self
.params
.get('extract_flat', False)
1751 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1752 or extract_flat
is True):
1753 info_copy
= ie_result
.copy()
1754 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1755 if ie
and not ie_result
.get('id'):
1756 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1757 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1758 self
.add_extra_info(info_copy
, extra_info
)
1759 info_copy
, _
= self
.pre_process(info_copy
)
1760 self
._fill
_common
_fields
(info_copy
, False)
1761 self
.__forced
_printings
(info_copy
)
1762 self
._raise
_pending
_errors
(info_copy
)
1763 if self
.params
.get('force_write_download_archive', False):
1764 self
.record_download_archive(info_copy
)
1767 if result_type
== 'video':
1768 self
.add_extra_info(ie_result
, extra_info
)
1769 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1770 self
._raise
_pending
_errors
(ie_result
)
1771 additional_urls
= (ie_result
or {}).get('additional_urls')
1773 # TODO: Improve MetadataParserPP to allow setting a list
1774 if isinstance(additional_urls
, str):
1775 additional_urls
= [additional_urls
]
1777 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1778 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1779 ie_result
['additional_entries'] = [
1781 url
, download
, extra_info
=extra_info
,
1782 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1783 for url
in additional_urls
1786 elif result_type
== 'url':
1787 # We have to add extra_info to the results because it may be
1788 # contained in a playlist
1789 return self
.extract_info(
1790 ie_result
['url'], download
,
1791 ie_key
=ie_result
.get('ie_key'),
1792 extra_info
=extra_info
)
1793 elif result_type
== 'url_transparent':
1794 # Use the information from the embedding page
1795 info
= self
.extract_info(
1796 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1797 extra_info
=extra_info
, download
=False, process
=False)
1799 # extract_info may return None when ignoreerrors is enabled and
1800 # extraction failed with an error, don't crash and return early
1805 exempted_fields
= {'_type', 'url', 'ie_key'}
1806 if not ie_result
.get('section_end') and ie_result
.get('section_start') is None:
1807 # For video clips, the id etc of the clip extractor should be used
1808 exempted_fields |
= {'id', 'extractor', 'extractor_key'}
1810 new_result
= info
.copy()
1811 new_result
.update(filter_dict(ie_result
, lambda k
, v
: v
is not None and k
not in exempted_fields
))
1813 # Extracted info may not be a video result (i.e.
1814 # info.get('_type', 'video') != video) but rather an url or
1815 # url_transparent. In such cases outer metadata (from ie_result)
1816 # should be propagated to inner one (info). For this to happen
1817 # _type of info should be overridden with url_transparent. This
1818 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1819 if new_result
.get('_type') == 'url':
1820 new_result
['_type'] = 'url_transparent'
1822 return self
.process_ie_result(
1823 new_result
, download
=download
, extra_info
=extra_info
)
1824 elif result_type
in ('playlist', 'multi_video'):
1825 # Protect from infinite recursion due to recursively nested playlists
1826 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1827 webpage_url
= ie_result
.get('webpage_url') # Playlists maynot have webpage_url
1828 if webpage_url
and webpage_url
in self
._playlist
_urls
:
1830 '[download] Skipping already downloaded playlist: %s'
1831 % ie_result
.get('title') or ie_result
.get('id'))
1834 self
._playlist
_level
+= 1
1835 self
._playlist
_urls
.add(webpage_url
)
1836 self
._fill
_common
_fields
(ie_result
, False)
1837 self
._sanitize
_thumbnails
(ie_result
)
1839 return self
.__process
_playlist
(ie_result
, download
)
1841 self
._playlist
_level
-= 1
1842 if not self
._playlist
_level
:
1843 self
._playlist
_urls
.clear()
1844 elif result_type
== 'compat_list':
1845 self
.report_warning(
1846 'Extractor %s returned a compat_list result. '
1847 'It needs to be updated.' % ie_result
.get('extractor'))
1850 self
.add_extra_info(r
, {
1851 'extractor': ie_result
['extractor'],
1852 'webpage_url': ie_result
['webpage_url'],
1853 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1854 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1855 'extractor_key': ie_result
['extractor_key'],
1858 ie_result
['entries'] = [
1859 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1860 for r
in ie_result
['entries']
1864 raise Exception('Invalid result type: %s' % result_type
)
1866 def _ensure_dir_exists(self
, path
):
1867 return make_dir(path
, self
.report_error
)
1870 def _playlist_infodict(ie_result
, strict
=False, **kwargs
):
1872 'playlist_count': ie_result
.get('playlist_count'),
1873 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1874 'playlist_id': ie_result
.get('id'),
1875 'playlist_title': ie_result
.get('title'),
1876 'playlist_uploader': ie_result
.get('uploader'),
1877 'playlist_uploader_id': ie_result
.get('uploader_id'),
1882 if ie_result
.get('webpage_url'):
1884 'webpage_url': ie_result
['webpage_url'],
1885 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1886 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1890 'playlist_index': 0,
1891 '__last_playlist_index': max(ie_result
.get('requested_entries') or (0, 0)),
1892 'extractor': ie_result
['extractor'],
1893 'extractor_key': ie_result
['extractor_key'],
1896 def __process_playlist(self
, ie_result
, download
):
1897 """Process each entry in the playlist"""
1898 assert ie_result
['_type'] in ('playlist', 'multi_video')
1900 common_info
= self
._playlist
_infodict
(ie_result
, strict
=True)
1901 title
= common_info
.get('playlist') or '<Untitled>'
1902 if self
._match
_entry
(common_info
, incomplete
=True) is not None:
1904 self
.to_screen(f
'[download] Downloading {ie_result["_type"]}: {title}')
1906 all_entries
= PlaylistEntries(self
, ie_result
)
1907 entries
= orderedSet(all_entries
.get_requested_items(), lazy
=True)
1909 lazy
= self
.params
.get('lazy_playlist')
1911 resolved_entries
, n_entries
= [], 'N/A'
1912 ie_result
['requested_entries'], ie_result
['entries'] = None, None
1914 entries
= resolved_entries
= list(entries
)
1915 n_entries
= len(resolved_entries
)
1916 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
1917 if not ie_result
.get('playlist_count'):
1918 # Better to do this after potentially exhausting entries
1919 ie_result
['playlist_count'] = all_entries
.get_full_count()
1921 extra
= self
._playlist
_infodict
(ie_result
, n_entries
=int_or_none(n_entries
))
1922 ie_copy
= collections
.ChainMap(ie_result
, extra
)
1924 _infojson_written
= False
1925 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
1926 if write_playlist_files
and self
.params
.get('list_thumbnails'):
1927 self
.list_thumbnails(ie_result
)
1928 if write_playlist_files
and not self
.params
.get('simulate'):
1929 _infojson_written
= self
._write
_info
_json
(
1930 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
1931 if _infojson_written
is None:
1933 if self
._write
_description
('playlist', ie_result
,
1934 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1936 # TODO: This should be passed to ThumbnailsConvertor if necessary
1937 self
._write
_thumbnails
('playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1940 if self
.params
.get('playlistreverse') or self
.params
.get('playlistrandom'):
1941 self
.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once
=True)
1942 elif self
.params
.get('playlistreverse'):
1944 elif self
.params
.get('playlistrandom'):
1945 random
.shuffle(entries
)
1947 self
.to_screen(f
'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1948 f
'{format_field(ie_result, "playlist_count", " of %s")}')
1950 keep_resolved_entries
= self
.params
.get('extract_flat') != 'discard'
1951 if self
.params
.get('extract_flat') == 'discard_in_playlist':
1952 keep_resolved_entries
= ie_result
['_type'] != 'playlist'
1953 if keep_resolved_entries
:
1954 self
.write_debug('The information of all playlist entries will be held in memory')
1957 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1958 for i
, (playlist_index
, entry
) in enumerate(entries
):
1960 resolved_entries
.append((playlist_index
, entry
))
1964 entry
['__x_forwarded_for_ip'] = ie_result
.get('__x_forwarded_for_ip')
1965 if not lazy
and 'playlist-index' in self
.params
['compat_opts']:
1966 playlist_index
= ie_result
['requested_entries'][i
]
1968 entry_copy
= collections
.ChainMap(entry
, {
1970 'n_entries': int_or_none(n_entries
),
1971 'playlist_index': playlist_index
,
1972 'playlist_autonumber': i
+ 1,
1975 if self
._match
_entry
(entry_copy
, incomplete
=True) is not None:
1976 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1977 resolved_entries
[i
] = (playlist_index
, NO_DEFAULT
)
1980 self
.to_screen('[download] Downloading item %s of %s' % (
1981 self
._format
_screen
(i
+ 1, self
.Styles
.ID
), self
._format
_screen
(n_entries
, self
.Styles
.EMPHASIS
)))
1983 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, collections
.ChainMap({
1984 'playlist_index': playlist_index
,
1985 'playlist_autonumber': i
+ 1,
1987 if not entry_result
:
1989 if failures
>= max_failures
:
1991 f
'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1993 if keep_resolved_entries
:
1994 resolved_entries
[i
] = (playlist_index
, entry_result
)
1996 # Update with processed data
1997 ie_result
['entries'] = [e
for _
, e
in resolved_entries
if e
is not NO_DEFAULT
]
1998 ie_result
['requested_entries'] = [i
for i
, e
in resolved_entries
if e
is not NO_DEFAULT
]
1999 if ie_result
['requested_entries'] == try_call(lambda: list(range(1, ie_result
['playlist_count'] + 1))):
2000 # Do not set for full playlist
2001 ie_result
.pop('requested_entries')
2003 # Write the updated info to json
2004 if _infojson_written
is True and self
._write
_info
_json
(
2005 'updated playlist', ie_result
,
2006 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
2009 ie_result
= self
.run_all_pps('playlist', ie_result
)
2010 self
.to_screen(f
'[download] Finished downloading playlist: {title}')
2013 @_handle_extraction_exceptions
2014 def __process_iterable_entry(self
, entry
, download
, extra_info
):
2015 return self
.process_ie_result(
2016 entry
, download
=download
, extra_info
=extra_info
)
2018 def _build_format_filter(self
, filter_spec
):
2019 " Returns a function to filter the formats according to the filter_spec "
2029 operator_rex
= re
.compile(r
'''(?x)\s*
2031 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2032 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2033 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
2034 m
= operator_rex
.fullmatch(filter_spec
)
2037 comparison_value
= int(m
.group('value'))
2039 comparison_value
= parse_filesize(m
.group('value'))
2040 if comparison_value
is None:
2041 comparison_value
= parse_filesize(m
.group('value') + 'B')
2042 if comparison_value
is None:
2044 'Invalid value %r in format specification %r' % (
2045 m
.group('value'), filter_spec
))
2046 op
= OPERATORS
[m
.group('op')]
2051 '^=': lambda attr
, value
: attr
.startswith(value
),
2052 '$=': lambda attr
, value
: attr
.endswith(value
),
2053 '*=': lambda attr
, value
: value
in attr
,
2054 '~=': lambda attr
, value
: value
.search(attr
) is not None
2056 str_operator_rex
= re
.compile(r
'''(?x)\s*
2057 (?P<key>[a-zA-Z0-9._-]+)\s*
2058 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2060 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2061 (?(quote)(?P=quote))\s*
2062 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
2063 m
= str_operator_rex
.fullmatch(filter_spec
)
2065 if m
.group('op') == '~=':
2066 comparison_value
= re
.compile(m
.group('value'))
2068 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
2069 str_op
= STR_OPERATORS
[m
.group('op')]
2070 if m
.group('negation'):
2071 op
= lambda attr
, value
: not str_op(attr
, value
)
2076 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
2079 actual_value
= f
.get(m
.group('key'))
2080 if actual_value
is None:
2081 return m
.group('none_inclusive')
2082 return op(actual_value
, comparison_value
)
2085 def _check_formats(self
, formats
):
2087 self
.to_screen('[info] Testing format %s' % f
['format_id'])
2088 path
= self
.get_output_path('temp')
2089 if not self
._ensure
_dir
_exists
(f
'{path}/'):
2091 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
2094 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
2095 except (DownloadError
, OSError, ValueError) + network_exceptions
:
2098 if os
.path
.exists(temp_file
.name
):
2100 os
.remove(temp_file
.name
)
2102 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
2106 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
2108 def _default_format_spec(self
, info_dict
, download
=True):
2111 merger
= FFmpegMergerPP(self
)
2112 return merger
.available
and merger
.can_merge()
2115 not self
.params
.get('simulate')
2119 or info_dict
.get('is_live') and not self
.params
.get('live_from_start')
2120 or self
.params
['outtmpl']['default'] == '-'))
2123 or self
.params
.get('allow_multiple_audio_streams', False)
2124 or 'format-spec' in self
.params
['compat_opts'])
2127 'best/bestvideo+bestaudio' if prefer_best
2128 else 'bestvideo*+bestaudio/best' if not compat
2129 else 'bestvideo+bestaudio/best')
2131 def build_format_selector(self
, format_spec
):
2132 def syntax_error(note
, start
):
2134 'Invalid format specification: '
2135 '{}\n\t{}\n\t{}^'.format(note
, format_spec
, ' ' * start
[1]))
2136 return SyntaxError(message
)
2138 PICKFIRST
= 'PICKFIRST'
2142 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2144 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
2145 'video': self
.params
.get('allow_multiple_video_streams', False)}
2147 def _parse_filter(tokens
):
2149 for type, string_
, start
, _
, _
in tokens
:
2150 if type == tokenize
.OP
and string_
== ']':
2151 return ''.join(filter_parts
)
2153 filter_parts
.append(string_
)
2155 def _remove_unused_ops(tokens
):
2156 # Remove operators that we don't use and join them with the surrounding strings.
2157 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2158 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
2159 last_string
, last_start
, last_end
, last_line
= None, None, None, None
2160 for type, string_
, start
, end
, line
in tokens
:
2161 if type == tokenize
.OP
and string_
== '[':
2163 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2165 yield type, string_
, start
, end
, line
2166 # everything inside brackets will be handled by _parse_filter
2167 for type, string_
, start
, end
, line
in tokens
:
2168 yield type, string_
, start
, end
, line
2169 if type == tokenize
.OP
and string_
== ']':
2171 elif type == tokenize
.OP
and string_
in ALLOWED_OPS
:
2173 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2175 yield type, string_
, start
, end
, line
2176 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
2178 last_string
= string_
2182 last_string
+= string_
2184 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2186 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
2188 current_selector
= None
2189 for type, string_
, start
, _
, _
in tokens
:
2190 # ENCODING is only defined in python 3.x
2191 if type == getattr(tokenize
, 'ENCODING', None):
2193 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
2194 current_selector
= FormatSelector(SINGLE
, string_
, [])
2195 elif type == tokenize
.OP
:
2197 if not inside_group
:
2198 # ')' will be handled by the parentheses group
2199 tokens
.restore_last_token()
2201 elif inside_merge
and string_
in ['/', ',']:
2202 tokens
.restore_last_token()
2204 elif inside_choice
and string_
== ',':
2205 tokens
.restore_last_token()
2207 elif string_
== ',':
2208 if not current_selector
:
2209 raise syntax_error('"," must follow a format selector', start
)
2210 selectors
.append(current_selector
)
2211 current_selector
= None
2212 elif string_
== '/':
2213 if not current_selector
:
2214 raise syntax_error('"/" must follow a format selector', start
)
2215 first_choice
= current_selector
2216 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
2217 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
2218 elif string_
== '[':
2219 if not current_selector
:
2220 current_selector
= FormatSelector(SINGLE
, 'best', [])
2221 format_filter
= _parse_filter(tokens
)
2222 current_selector
.filters
.append(format_filter
)
2223 elif string_
== '(':
2224 if current_selector
:
2225 raise syntax_error('Unexpected "("', start
)
2226 group
= _parse_format_selection(tokens
, inside_group
=True)
2227 current_selector
= FormatSelector(GROUP
, group
, [])
2228 elif string_
== '+':
2229 if not current_selector
:
2230 raise syntax_error('Unexpected "+"', start
)
2231 selector_1
= current_selector
2232 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2234 raise syntax_error('Expected a selector', start
)
2235 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2237 raise syntax_error(f
'Operator not recognized: "{string_}"', start
)
2238 elif type == tokenize
.ENDMARKER
:
2240 if current_selector
:
2241 selectors
.append(current_selector
)
2244 def _merge(formats_pair
):
2245 format_1
, format_2
= formats_pair
2248 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2249 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2251 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2252 get_no_more
= {'video': False, 'audio': False}
2253 for (i
, fmt_info
) in enumerate(formats_info
):
2254 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2257 for aud_vid
in ['audio', 'video']:
2258 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2259 if get_no_more
[aud_vid
]:
2262 get_no_more
[aud_vid
] = True
2264 if len(formats_info
) == 1:
2265 return formats_info
[0]
2267 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2268 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2270 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2271 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2273 output_ext
= get_compatible_ext(
2274 vcodecs
=[f
.get('vcodec') for f
in video_fmts
],
2275 acodecs
=[f
.get('acodec') for f
in audio_fmts
],
2276 vexts
=[f
['ext'] for f
in video_fmts
],
2277 aexts
=[f
['ext'] for f
in audio_fmts
],
2278 preferences
=(try_call(lambda: self
.params
['merge_output_format'].split('/'))
2279 or self
.params
.get('prefer_free_formats') and ('webm', 'mkv')))
2281 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2284 'requested_formats': formats_info
,
2285 'format': '+'.join(filtered('format')),
2286 'format_id': '+'.join(filtered('format_id')),
2288 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2289 'language': '+'.join(orderedSet(filtered('language'))) or None,
2290 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2291 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2292 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2297 'width': the_only_video
.get('width'),
2298 'height': the_only_video
.get('height'),
2299 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2300 'fps': the_only_video
.get('fps'),
2301 'dynamic_range': the_only_video
.get('dynamic_range'),
2302 'vcodec': the_only_video
.get('vcodec'),
2303 'vbr': the_only_video
.get('vbr'),
2304 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2305 'aspect_ratio': the_only_video
.get('aspect_ratio'),
2310 'acodec': the_only_audio
.get('acodec'),
2311 'abr': the_only_audio
.get('abr'),
2312 'asr': the_only_audio
.get('asr'),
2313 'audio_channels': the_only_audio
.get('audio_channels')
2318 def _check_formats(formats
):
2319 if (self
.params
.get('check_formats') is not None
2320 or self
.params
.get('allow_unplayable_formats')):
2323 elif self
.params
.get('check_formats') == 'selected':
2324 yield from self
._check
_formats
(formats
)
2328 if f
.get('has_drm'):
2329 yield from self
._check
_formats
([f
])
2333 def _build_selector_function(selector
):
2334 if isinstance(selector
, list): # ,
2335 fs
= [_build_selector_function(s
) for s
in selector
]
2337 def selector_function(ctx
):
2340 return selector_function
2342 elif selector
.type == GROUP
: # ()
2343 selector_function
= _build_selector_function(selector
.selector
)
2345 elif selector
.type == PICKFIRST
: # /
2346 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2348 def selector_function(ctx
):
2350 picked_formats
= list(f(ctx
))
2352 return picked_formats
2355 elif selector
.type == MERGE
: # +
2356 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2358 def selector_function(ctx
):
2359 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2362 elif selector
.type == SINGLE
: # atom
2363 format_spec
= selector
.selector
or 'best'
2365 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2366 if format_spec
== 'all':
2367 def selector_function(ctx
):
2368 yield from _check_formats(ctx
['formats'][::-1])
2369 elif format_spec
== 'mergeall':
2370 def selector_function(ctx
):
2371 formats
= list(_check_formats(
2372 f
for f
in ctx
['formats'] if f
.get('vcodec') != 'none' or f
.get('acodec') != 'none'))
2375 merged_format
= formats
[-1]
2376 for f
in formats
[-2::-1]:
2377 merged_format
= _merge((merged_format
, f
))
2381 format_fallback
, seperate_fallback
, format_reverse
, format_idx
= False, None, True, 1
2383 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2385 if mobj
is not None:
2386 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2387 format_reverse
= mobj
.group('bw')[0] == 'b'
2388 format_type
= (mobj
.group('type') or [None])[0]
2389 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
2390 format_modified
= mobj
.group('mod') is not None
2392 format_fallback
= not format_type
and not format_modified
# for b, w
2394 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
2395 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2396 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
2397 if format_type
# bv, ba, wv, wa
2398 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2399 if not format_modified
# b, w
2400 else lambda f
: True) # b*, w*
2401 filter_f
= lambda f
: _filter_f(f
) and (
2402 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2404 if format_spec
in self
._format
_selection
_exts
['audio']:
2405 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2406 elif format_spec
in self
._format
_selection
_exts
['video']:
2407 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2408 seperate_fallback
= lambda f
: f
.get('ext') == format_spec
and f
.get('vcodec') != 'none'
2409 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2410 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2412 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2414 def selector_function(ctx
):
2415 formats
= list(ctx
['formats'])
2416 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2418 if format_fallback
and ctx
['incomplete_formats']:
2419 # for extractors with incomplete formats (audio only (soundcloud)
2420 # or video only (imgur)) best/worst will fallback to
2421 # best/worst {video,audio}-only format
2423 elif seperate_fallback
and not ctx
['has_merged_format']:
2424 # for compatibility with youtube-dl when there is no pre-merged format
2425 matches
= list(filter(seperate_fallback
, formats
))
2426 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2428 yield matches
[format_idx
- 1]
2429 except LazyList
.IndexError:
2432 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2434 def final_selector(ctx
):
2435 ctx_copy
= dict(ctx
)
2436 for _filter
in filters
:
2437 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2438 return selector_function(ctx_copy
)
2439 return final_selector
2441 stream
= io
.BytesIO(format_spec
.encode())
2443 tokens
= list(_remove_unused_ops(tokenize
.tokenize(stream
.readline
)))
2444 except tokenize
.TokenError
:
2445 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2447 class TokenIterator
:
2448 def __init__(self
, tokens
):
2449 self
.tokens
= tokens
2456 if self
.counter
>= len(self
.tokens
):
2457 raise StopIteration()
2458 value
= self
.tokens
[self
.counter
]
2464 def restore_last_token(self
):
2467 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2468 return _build_selector_function(parsed_selector
)
2470 def _calc_headers(self
, info_dict
):
2471 res
= merge_headers(self
.params
['http_headers'], info_dict
.get('http_headers') or {})
2473 cookies
= self
.cookiejar
.get_cookies_for_url(info_dict
['url'])
2475 encoder
= LenientSimpleCookie()
2477 for cookie
in cookies
:
2478 _
, value
= encoder
.value_encode(cookie
.value
)
2479 values
.append(f
'{cookie.name}={value}')
2481 values
.append(f
'Domain={cookie.domain}')
2483 values
.append(f
'Path={cookie.path}')
2485 values
.append('Secure')
2487 values
.append(f
'Expires={cookie.expires}')
2489 values
.append(f
'Version={cookie.version}')
2490 info_dict
['cookies'] = '; '.join(values
)
2492 if 'X-Forwarded-For' not in res
:
2493 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2494 if x_forwarded_for_ip
:
2495 res
['X-Forwarded-For'] = x_forwarded_for_ip
2499 def _calc_cookies(self
, url
):
2500 self
.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2501 return self
.cookiejar
.get_cookie_header(url
)
2503 def _sort_thumbnails(self
, thumbnails
):
2504 thumbnails
.sort(key
=lambda t
: (
2505 t
.get('preference') if t
.get('preference') is not None else -1,
2506 t
.get('width') if t
.get('width') is not None else -1,
2507 t
.get('height') if t
.get('height') is not None else -1,
2508 t
.get('id') if t
.get('id') is not None else '',
2511 def _sanitize_thumbnails(self
, info_dict
):
2512 thumbnails
= info_dict
.get('thumbnails')
2513 if thumbnails
is None:
2514 thumbnail
= info_dict
.get('thumbnail')
2516 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2520 def check_thumbnails(thumbnails
):
2521 for t
in thumbnails
:
2522 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2524 self
.urlopen(HEADRequest(t
['url']))
2525 except network_exceptions
as err
:
2526 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2530 self
._sort
_thumbnails
(thumbnails
)
2531 for i
, t
in enumerate(thumbnails
):
2532 if t
.get('id') is None:
2534 if t
.get('width') and t
.get('height'):
2535 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2536 t
['url'] = sanitize_url(t
['url'])
2538 if self
.params
.get('check_formats') is True:
2539 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2541 info_dict
['thumbnails'] = thumbnails
2543 def _fill_common_fields(self
, info_dict
, final
=True):
2544 # TODO: move sanitization here
2546 title
= info_dict
['fulltitle'] = info_dict
.get('title')
2549 self
.write_debug('Extractor gave empty title. Creating a generic title')
2551 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2552 info_dict
['title'] = f
'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2554 if info_dict
.get('duration') is not None:
2555 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2557 for ts_key
, date_key
in (
2558 ('timestamp', 'upload_date'),
2559 ('release_timestamp', 'release_date'),
2560 ('modified_timestamp', 'modified_date'),
2562 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2563 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2564 # see http://bugs.python.org/issue1646728)
2565 with contextlib
.suppress(ValueError, OverflowError, OSError):
2566 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2567 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2569 live_keys
= ('is_live', 'was_live')
2570 live_status
= info_dict
.get('live_status')
2571 if live_status
is None:
2572 for key
in live_keys
:
2573 if info_dict
.get(key
) is False:
2575 if info_dict
.get(key
):
2578 if all(info_dict
.get(key
) is False for key
in live_keys
):
2579 live_status
= 'not_live'
2581 info_dict
['live_status'] = live_status
2582 for key
in live_keys
:
2583 if info_dict
.get(key
) is None:
2584 info_dict
[key
] = (live_status
== key
)
2585 if live_status
== 'post_live':
2586 info_dict
['was_live'] = True
2588 # Auto generate title fields corresponding to the *_number fields when missing
2589 # in order to always have clean titles. This is very common for TV series.
2590 for field
in ('chapter', 'season', 'episode'):
2591 if final
and info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2592 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2594 def _raise_pending_errors(self
, info
):
2595 err
= info
.pop('__pending_error', None)
2597 self
.report_error(err
, tb
=False)
2599 def sort_formats(self
, info_dict
):
2600 formats
= self
._get
_formats
(info_dict
)
2601 formats
.sort(key
=FormatSorter(
2602 self
, info_dict
.get('_format_sort_fields') or []).calculate_preference
)
2604 def process_video_result(self
, info_dict
, download
=True):
2605 assert info_dict
.get('_type', 'video') == 'video'
2606 self
._num
_videos
+= 1
2608 if 'id' not in info_dict
:
2609 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2610 elif not info_dict
.get('id'):
2611 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2613 def report_force_conversion(field
, field_not
, conversion
):
2614 self
.report_warning(
2615 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2616 % (field
, field_not
, conversion
))
2618 def sanitize_string_field(info
, string_field
):
2619 field
= info
.get(string_field
)
2620 if field
is None or isinstance(field
, str):
2622 report_force_conversion(string_field
, 'a string', 'string')
2623 info
[string_field
] = str(field
)
2625 def sanitize_numeric_fields(info
):
2626 for numeric_field
in self
._NUMERIC
_FIELDS
:
2627 field
= info
.get(numeric_field
)
2628 if field
is None or isinstance(field
, (int, float)):
2630 report_force_conversion(numeric_field
, 'numeric', 'int')
2631 info
[numeric_field
] = int_or_none(field
)
2633 sanitize_string_field(info_dict
, 'id')
2634 sanitize_numeric_fields(info_dict
)
2635 if info_dict
.get('section_end') and info_dict
.get('section_start') is not None:
2636 info_dict
['duration'] = round(info_dict
['section_end'] - info_dict
['section_start'], 3)
2637 if (info_dict
.get('duration') or 0) <= 0 and info_dict
.pop('duration', None):
2638 self
.report_warning('"duration" field is negative, there is an error in extractor')
2640 chapters
= info_dict
.get('chapters') or []
2641 if chapters
and chapters
[0].get('start_time'):
2642 chapters
.insert(0, {'start_time': 0}
)
2644 dummy_chapter
= {'end_time': 0, 'start_time': info_dict.get('duration')}
2645 for idx
, (prev
, current
, next_
) in enumerate(zip(
2646 (dummy_chapter
, *chapters
), chapters
, (*chapters
[1:], dummy_chapter
)), 1):
2647 if current
.get('start_time') is None:
2648 current
['start_time'] = prev
.get('end_time')
2649 if not current
.get('end_time'):
2650 current
['end_time'] = next_
.get('start_time')
2651 if not current
.get('title'):
2652 current
['title'] = f
'<Untitled Chapter {idx}>'
2654 if 'playlist' not in info_dict
:
2655 # It isn't part of a playlist
2656 info_dict
['playlist'] = None
2657 info_dict
['playlist_index'] = None
2659 self
._sanitize
_thumbnails
(info_dict
)
2661 thumbnail
= info_dict
.get('thumbnail')
2662 thumbnails
= info_dict
.get('thumbnails')
2664 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2666 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2668 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2669 info_dict
['display_id'] = info_dict
['id']
2671 self
._fill
_common
_fields
(info_dict
)
2673 for cc_kind
in ('subtitles', 'automatic_captions'):
2674 cc
= info_dict
.get(cc_kind
)
2676 for _
, subtitle
in cc
.items():
2677 for subtitle_format
in subtitle
:
2678 if subtitle_format
.get('url'):
2679 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2680 if subtitle_format
.get('ext') is None:
2681 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2683 automatic_captions
= info_dict
.get('automatic_captions')
2684 subtitles
= info_dict
.get('subtitles')
2686 info_dict
['requested_subtitles'] = self
.process_subtitles(
2687 info_dict
['id'], subtitles
, automatic_captions
)
2689 formats
= self
._get
_formats
(info_dict
)
2691 # Backward compatibility with InfoExtractor._sort_formats
2692 field_preference
= (formats
or [{}])[0].pop('__sort_fields', None)
2693 if field_preference
:
2694 info_dict
['_format_sort_fields'] = field_preference
2696 info_dict
['_has_drm'] = any( # or None ensures --clean-infojson removes it
2697 f
.get('has_drm') and f
['has_drm'] != 'maybe' for f
in formats
) or None
2698 if not self
.params
.get('allow_unplayable_formats'):
2699 formats
= [f
for f
in formats
if not f
.get('has_drm') or f
['has_drm'] == 'maybe']
2701 if formats
and all(f
.get('acodec') == f
.get('vcodec') == 'none' for f
in formats
):
2702 self
.report_warning(
2703 f
'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2704 'only images are available for download. Use --list-formats to see them'.capitalize())
2706 get_from_start
= not info_dict
.get('is_live') or bool(self
.params
.get('live_from_start'))
2707 if not get_from_start
:
2708 info_dict
['title'] += ' ' + datetime
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2709 if info_dict
.get('is_live') and formats
:
2710 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2711 if get_from_start
and not formats
:
2712 self
.raise_no_formats(info_dict
, msg
=(
2713 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2714 'If you want to download from the current time, use --no-live-from-start'))
2716 def is_wellformed(f
):
2719 self
.report_warning(
2720 '"url" field is missing or empty - skipping format, '
2721 'there is an error in extractor')
2723 if isinstance(url
, bytes):
2724 sanitize_string_field(f
, 'url')
2727 # Filter out malformed formats for better extraction robustness
2728 formats
= list(filter(is_wellformed
, formats
or []))
2731 self
.raise_no_formats(info_dict
)
2733 for format
in formats
:
2734 sanitize_string_field(format
, 'format_id')
2735 sanitize_numeric_fields(format
)
2736 format
['url'] = sanitize_url(format
['url'])
2737 if format
.get('ext') is None:
2738 format
['ext'] = determine_ext(format
['url']).lower()
2739 if format
.get('protocol') is None:
2740 format
['protocol'] = determine_protocol(format
)
2741 if format
.get('resolution') is None:
2742 format
['resolution'] = self
.format_resolution(format
, default
=None)
2743 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2744 format
['dynamic_range'] = 'SDR'
2745 if format
.get('aspect_ratio') is None:
2746 format
['aspect_ratio'] = try_call(lambda: round(format
['width'] / format
['height'], 2))
2747 if (not format
.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2748 and info_dict
.get('duration') and format
.get('tbr')
2749 and not format
.get('filesize') and not format
.get('filesize_approx')):
2750 format
['filesize_approx'] = int(info_dict
['duration'] * format
['tbr'] * (1024 / 8))
2751 format
['http_headers'] = self
._calc
_headers
(collections
.ChainMap(format
, info_dict
))
2753 # This is copied to http_headers by the above _calc_headers and can now be removed
2754 if '__x_forwarded_for_ip' in info_dict
:
2755 del info_dict
['__x_forwarded_for_ip']
2759 '_format_sort_fields': info_dict
.get('_format_sort_fields')
2762 # Sanitize and group by format_id
2764 for i
, format
in enumerate(formats
):
2765 if not format
.get('format_id'):
2766 format
['format_id'] = str(i
)
2768 # Sanitize format_id from characters used in format selector expression
2769 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2770 formats_dict
.setdefault(format
['format_id'], []).append(format
)
2772 # Make sure all formats have unique format_id
2773 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2774 for format_id
, ambiguous_formats
in formats_dict
.items():
2775 ambigious_id
= len(ambiguous_formats
) > 1
2776 for i
, format
in enumerate(ambiguous_formats
):
2778 format
['format_id'] = '%s-%d' % (format_id
, i
)
2779 # Ensure there is no conflict between id and ext in format selection
2780 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2781 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2782 format
['format_id'] = 'f%s' % format
['format_id']
2784 if format
.get('format') is None:
2785 format
['format'] = '{id} - {res}{note}'.format(
2786 id=format
['format_id'],
2787 res
=self
.format_resolution(format
),
2788 note
=format_field(format
, 'format_note', ' (%s)'),
2791 if self
.params
.get('check_formats') is True:
2792 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2794 if not formats
or formats
[0] is not info_dict
:
2795 # only set the 'formats' fields if the original info_dict list them
2796 # otherwise we end up with a circular reference, the first (and unique)
2797 # element in the 'formats' field in info_dict is info_dict itself,
2798 # which can't be exported to json
2799 info_dict
['formats'] = formats
2801 info_dict
, _
= self
.pre_process(info_dict
)
2803 if self
._match
_entry
(info_dict
, incomplete
=self
._format
_fields
) is not None:
2806 self
.post_extract(info_dict
)
2807 info_dict
, _
= self
.pre_process(info_dict
, 'after_filter')
2809 # The pre-processors may have modified the formats
2810 formats
= self
._get
_formats
(info_dict
)
2812 list_only
= self
.params
.get('simulate') == 'list_only'
2813 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2814 if self
.params
.get('list_thumbnails'):
2815 self
.list_thumbnails(info_dict
)
2816 if self
.params
.get('listsubtitles'):
2817 if 'automatic_captions' in info_dict
:
2818 self
.list_subtitles(
2819 info_dict
['id'], automatic_captions
, 'automatic captions')
2820 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2821 if self
.params
.get('listformats') or interactive_format_selection
:
2822 self
.list_formats(info_dict
)
2824 # Without this printing, -F --print-json will not work
2825 self
.__forced
_printings
(info_dict
)
2828 format_selector
= self
.format_selector
2830 if interactive_format_selection
:
2831 req_format
= input(self
._format
_screen
('\nEnter format selector ', self
.Styles
.EMPHASIS
)
2832 + '(Press ENTER for default, or Ctrl+C to quit)'
2833 + self
._format
_screen
(': ', self
.Styles
.EMPHASIS
))
2835 format_selector
= self
.build_format_selector(req_format
) if req_format
else None
2836 except SyntaxError as err
:
2837 self
.report_error(err
, tb
=False, is_error
=False)
2840 if format_selector
is None:
2841 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2842 self
.write_debug(f
'Default format spec: {req_format}')
2843 format_selector
= self
.build_format_selector(req_format
)
2845 formats_to_download
= list(format_selector({
2847 'has_merged_format': any('none' not in (f
.get('acodec'), f
.get('vcodec')) for f
in formats
),
2848 'incomplete_formats': (all(f
.get('vcodec') == 'none' for f
in formats
) # No formats with video
2849 or all(f
.get('acodec') == 'none' for f
in formats
)), # OR, No formats with audio
2851 if interactive_format_selection
and not formats_to_download
:
2852 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2856 if not formats_to_download
:
2857 if not self
.params
.get('ignore_no_formats_error'):
2858 raise ExtractorError(
2859 'Requested format is not available. Use --list-formats for a list of available formats',
2860 expected
=True, video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2861 self
.report_warning('Requested format is not available')
2862 # Process what we can, even without any available formats.
2863 formats_to_download
= [{}]
2865 requested_ranges
= tuple(self
.params
.get('download_ranges', lambda *_
: [{}])(info_dict
, self
))
2866 best_format
, downloaded_formats
= formats_to_download
[-1], []
2868 if best_format
and requested_ranges
:
2869 def to_screen(*msg
):
2870 self
.to_screen(f
'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2872 to_screen(f
'Downloading {len(formats_to_download)} format(s):',
2873 (f
['format_id'] for f
in formats_to_download
))
2874 if requested_ranges
!= ({}, ):
2875 to_screen(f
'Downloading {len(requested_ranges)} time ranges:',
2876 (f
'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c
in requested_ranges
))
2877 max_downloads_reached
= False
2879 for fmt
, chapter
in itertools
.product(formats_to_download
, requested_ranges
):
2880 new_info
= self
._copy
_infodict
(info_dict
)
2881 new_info
.update(fmt
)
2882 offset
, duration
= info_dict
.get('section_start') or 0, info_dict
.get('duration') or float('inf')
2883 end_time
= offset
+ min(chapter
.get('end_time', duration
), duration
)
2884 # duration may not be accurate. So allow deviations <1sec
2885 if end_time
== float('inf') or end_time
> offset
+ duration
+ 1:
2887 if chapter
or offset
:
2889 'section_start': offset
+ chapter
.get('start_time', 0),
2890 'section_end': end_time
,
2891 'section_title': chapter
.get('title'),
2892 'section_number': chapter
.get('index'),
2894 downloaded_formats
.append(new_info
)
2896 self
.process_info(new_info
)
2897 except MaxDownloadsReached
:
2898 max_downloads_reached
= True
2899 self
._raise
_pending
_errors
(new_info
)
2900 # Remove copied info
2901 for key
, val
in tuple(new_info
.items()):
2902 if info_dict
.get(key
) == val
:
2904 if max_downloads_reached
:
2907 write_archive
= {f.get('__write_download_archive', False) for f in downloaded_formats}
2908 assert write_archive
.issubset({True, False, 'ignore'}
)
2909 if True in write_archive
and False not in write_archive
:
2910 self
.record_download_archive(info_dict
)
2912 info_dict
['requested_downloads'] = downloaded_formats
2913 info_dict
= self
.run_all_pps('after_video', info_dict
)
2914 if max_downloads_reached
:
2915 raise MaxDownloadsReached()
2917 # We update the info dict with the selected best quality format (backwards compatibility)
2918 info_dict
.update(best_format
)
2921 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2922 """Select the requested subtitles and their format"""
2923 available_subs
, normal_sub_langs
= {}, []
2924 if normal_subtitles
and self
.params
.get('writesubtitles'):
2925 available_subs
.update(normal_subtitles
)
2926 normal_sub_langs
= tuple(normal_subtitles
.keys())
2927 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2928 for lang
, cap_info
in automatic_captions
.items():
2929 if lang
not in available_subs
:
2930 available_subs
[lang
] = cap_info
2932 if not available_subs
or (
2933 not self
.params
.get('writesubtitles')
2934 and not self
.params
.get('writeautomaticsub')):
2937 all_sub_langs
= tuple(available_subs
.keys())
2938 if self
.params
.get('allsubtitles', False):
2939 requested_langs
= all_sub_langs
2940 elif self
.params
.get('subtitleslangs', False):
2942 requested_langs
= orderedSet_from_options(
2943 self
.params
.get('subtitleslangs'), {'all': all_sub_langs}
, use_regex
=True)
2944 except re
.error
as e
:
2945 raise ValueError(f
'Wrong regex for subtitlelangs: {e.pattern}')
2947 requested_langs
= LazyList(itertools
.chain(
2948 ['en'] if 'en' in normal_sub_langs
else [],
2949 filter(lambda f
: f
.startswith('en'), normal_sub_langs
),
2950 ['en'] if 'en' in all_sub_langs
else [],
2951 filter(lambda f
: f
.startswith('en'), all_sub_langs
),
2952 normal_sub_langs
, all_sub_langs
,
2955 self
.to_screen(f
'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2957 formats_query
= self
.params
.get('subtitlesformat', 'best')
2958 formats_preference
= formats_query
.split('/') if formats_query
else []
2960 for lang
in requested_langs
:
2961 formats
= available_subs
.get(lang
)
2963 self
.report_warning(f
'{lang} subtitles not available for {video_id}')
2965 for ext
in formats_preference
:
2969 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2975 self
.report_warning(
2976 'No subtitle format found matching "%s" for language %s, '
2977 'using %s' % (formats_query
, lang
, f
['ext']))
2981 def _forceprint(self
, key
, info_dict
):
2982 if info_dict
is None:
2984 info_copy
= info_dict
.copy()
2985 info_copy
.setdefault('filename', self
.prepare_filename(info_dict
))
2986 if info_dict
.get('requested_formats') is not None:
2987 # For RTMP URLs, also include the playpath
2988 info_copy
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2989 elif info_dict
.get('url'):
2990 info_copy
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
2991 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
2992 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
2993 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
2994 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
2996 def format_tmpl(tmpl
):
2997 mobj
= re
.fullmatch(r
'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl
)
3002 if tmpl
.startswith('{'):
3003 tmpl
, fmt
= f
'.{tmpl}', '%({})j'
3004 if tmpl
.endswith('='):
3005 tmpl
, fmt
= tmpl
[:-1], '{0} = %({0})#j'
3006 return '\n'.join(map(fmt
.format
, [tmpl
] if mobj
.group('dict') else tmpl
.split(',')))
3008 for tmpl
in self
.params
['forceprint'].get(key
, []):
3009 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
3011 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
3012 filename
= self
.prepare_filename(info_dict
, outtmpl
=file_tmpl
)
3013 tmpl
= format_tmpl(tmpl
)
3014 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
3015 if self
._ensure
_dir
_exists
(filename
):
3016 with open(filename
, 'a', encoding
='utf-8', newline
='') as f
:
3017 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + os
.linesep
)
3021 def __forced_printings(self
, info_dict
, filename
=None, incomplete
=True):
3022 if (self
.params
.get('forcejson')
3023 or self
.params
['forceprint'].get('video')
3024 or self
.params
['print_to_file'].get('video')):
3025 self
.post_extract(info_dict
)
3027 info_dict
['filename'] = filename
3028 info_copy
= self
._forceprint
('video', info_dict
)
3030 def print_field(field
, actual_field
=None, optional
=False):
3031 if actual_field
is None:
3032 actual_field
= field
3033 if self
.params
.get(f
'force{field}') and (
3034 info_copy
.get(field
) is not None or (not optional
and not incomplete
)):
3035 self
.to_stdout(info_copy
[actual_field
])
3037 print_field('title')
3039 print_field('url', 'urls')
3040 print_field('thumbnail', optional
=True)
3041 print_field('description', optional
=True)
3042 print_field('filename')
3043 if self
.params
.get('forceduration') and info_copy
.get('duration') is not None:
3044 self
.to_stdout(formatSeconds(info_copy
['duration']))
3045 print_field('format')
3047 if self
.params
.get('forcejson'):
3048 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
3050 def dl(self
, name
, info
, subtitle
=False, test
=False):
3051 if not info
.get('url'):
3052 self
.raise_no_formats(info
, True)
3055 verbose
= self
.params
.get('verbose')
3058 'quiet': self
.params
.get('quiet') or not verbose
,
3060 'noprogress': not verbose
,
3062 'skip_unavailable_fragments': False,
3063 'keep_fragments': False,
3065 '_no_ytdl_file': True,
3068 params
= self
.params
3069 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
3071 for ph
in self
._progress
_hooks
:
3072 fd
.add_progress_hook(ph
)
3074 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
3075 for f
in info
.get('requested_formats', []) or [info
])
3076 self
.write_debug(f
'Invoking {fd.FD_NAME} downloader on "{urls}"')
3078 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3079 # But it may contain objects that are not deep-copyable
3080 new_info
= self
._copy
_infodict
(info
)
3081 if new_info
.get('http_headers') is None:
3082 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
3083 return fd
.download(name
, new_info
, subtitle
)
3085 def existing_file(self
, filepaths
, *, default_overwrite
=True):
3086 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
3087 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
3088 return existing_files
[0]
3090 for file in existing_files
:
3091 self
.report_file_delete(file)
3095 def process_info(self
, info_dict
):
3096 """Process a single resolved IE result. (Modifies it in-place)"""
3098 assert info_dict
.get('_type', 'video') == 'video'
3099 original_infodict
= info_dict
3101 if 'format' not in info_dict
and 'ext' in info_dict
:
3102 info_dict
['format'] = info_dict
['ext']
3104 if self
._match
_entry
(info_dict
) is not None:
3105 info_dict
['__write_download_archive'] = 'ignore'
3108 # Does nothing under normal operation - for backward compatibility of process_info
3109 self
.post_extract(info_dict
)
3111 def replace_info_dict(new_info
):
3113 if new_info
== info_dict
:
3116 info_dict
.update(new_info
)
3118 new_info
, _
= self
.pre_process(info_dict
, 'video')
3119 replace_info_dict(new_info
)
3120 self
._num
_downloads
+= 1
3122 # info_dict['_filename'] needs to be set for backward compatibility
3123 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
3124 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
3128 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
3130 def check_max_downloads():
3131 if self
._num
_downloads
>= float(self
.params
.get('max_downloads') or 'inf'):
3132 raise MaxDownloadsReached()
3134 if self
.params
.get('simulate'):
3135 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3136 check_max_downloads()
3139 if full_filename
is None:
3141 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
3143 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
3146 if self
._write
_description
('video', info_dict
,
3147 self
.prepare_filename(info_dict
, 'description')) is None:
3150 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
3151 if sub_files
is None:
3153 files_to_move
.update(dict(sub_files
))
3155 thumb_files
= self
._write
_thumbnails
(
3156 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
3157 if thumb_files
is None:
3159 files_to_move
.update(dict(thumb_files
))
3161 infofn
= self
.prepare_filename(info_dict
, 'infojson')
3162 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
3163 if _infojson_written
:
3164 info_dict
['infojson_filename'] = infofn
3165 # For backward compatibility, even though it was a private field
3166 info_dict
['__infojson_filename'] = infofn
3167 elif _infojson_written
is None:
3170 # Note: Annotations are deprecated
3172 if self
.params
.get('writeannotations', False):
3173 annofn
= self
.prepare_filename(info_dict
, 'annotation')
3175 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
3177 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
3178 self
.to_screen('[info] Video annotations are already present')
3179 elif not info_dict
.get('annotations'):
3180 self
.report_warning('There are no annotations to write.')
3183 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
3184 with open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
3185 annofile
.write(info_dict
['annotations'])
3186 except (KeyError, TypeError):
3187 self
.report_warning('There are no annotations to write.')
3189 self
.report_error('Cannot write annotations file: ' + annofn
)
3192 # Write internet shortcut files
3193 def _write_link_file(link_type
):
3194 url
= try_get(info_dict
['webpage_url'], iri_to_uri
)
3196 self
.report_warning(
3197 f
'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3199 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
3200 if not self
._ensure
_dir
_exists
(encodeFilename(linkfn
)):
3202 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
3203 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
3206 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3207 with open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8',
3208 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
3209 template_vars
= {'url': url}
3210 if link_type
== 'desktop':
3211 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
3212 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
3214 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
3219 'url': self
.params
.get('writeurllink'),
3220 'webloc': self
.params
.get('writewebloclink'),
3221 'desktop': self
.params
.get('writedesktoplink'),
3223 if self
.params
.get('writelink'):
3224 link_type
= ('webloc' if sys
.platform
== 'darwin'
3225 else 'desktop' if sys
.platform
.startswith('linux')
3227 write_links
[link_type
] = True
3229 if any(should_write
and not _write_link_file(link_type
)
3230 for link_type
, should_write
in write_links
.items()):
3233 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
3234 replace_info_dict(new_info
)
3236 if self
.params
.get('skip_download'):
3237 info_dict
['filepath'] = temp_filename
3238 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3239 info_dict
['__files_to_move'] = files_to_move
3240 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
3241 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3244 info_dict
.setdefault('__postprocessors', [])
3247 def existing_video_file(*filepaths
):
3248 ext
= info_dict
.get('ext')
3249 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
3250 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
3251 default_overwrite
=False)
3253 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
3256 fd
, success
= None, True
3257 if info_dict
.get('protocol') or info_dict
.get('url'):
3258 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3259 if fd
is not FFmpegFD
and 'no-direct-merge' not in self
.params
['compat_opts'] and (
3260 info_dict
.get('section_start') or info_dict
.get('section_end')):
3261 msg
= ('This format cannot be partially downloaded' if FFmpegFD
.available()
3262 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3263 self
.report_error(f
'{msg}. Aborting')
3266 if info_dict
.get('requested_formats') is not None:
3267 old_ext
= info_dict
['ext']
3268 if self
.params
.get('merge_output_format') is None:
3269 if (info_dict
['ext'] == 'webm'
3270 and info_dict
.get('thumbnails')
3271 # check with type instead of pp_key, __name__, or isinstance
3272 # since we dont want any custom PPs to trigger this
3273 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])): # noqa: E721
3274 info_dict
['ext'] = 'mkv'
3275 self
.report_warning(
3276 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3277 new_ext
= info_dict
['ext']
3279 def correct_ext(filename
, ext
=new_ext
):
3282 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3284 os
.path
.splitext(filename
)[0]
3285 if filename_real_ext
in (old_ext
, new_ext
)
3287 return f
'{filename_wo_ext}.{ext}'
3289 # Ensure filename always has a correct extension for successful merge
3290 full_filename
= correct_ext(full_filename
)
3291 temp_filename
= correct_ext(temp_filename
)
3292 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3294 info_dict
['__real_download'] = False
3295 # NOTE: Copy so that original format dicts are not modified
3296 info_dict
['requested_formats'] = list(map(dict, info_dict
['requested_formats']))
3298 merger
= FFmpegMergerPP(self
)
3300 if dl_filename
is not None:
3301 self
.report_file_already_downloaded(dl_filename
)
3303 for f
in info_dict
['requested_formats'] if fd
!= FFmpegFD
else []:
3304 f
['filepath'] = fname
= prepend_extension(
3305 correct_ext(temp_filename
, info_dict
['ext']),
3306 'f%s' % f
['format_id'], info_dict
['ext'])
3307 downloaded
.append(fname
)
3308 info_dict
['url'] = '\n'.join(f
['url'] for f
in info_dict
['requested_formats'])
3309 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3310 info_dict
['__real_download'] = real_download
3312 if self
.params
.get('allow_unplayable_formats'):
3313 self
.report_warning(
3314 'You have requested merging of multiple formats '
3315 'while also allowing unplayable formats to be downloaded. '
3316 'The formats won\'t be merged to prevent data corruption.')
3317 elif not merger
.available
:
3318 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3319 if not self
.params
.get('ignoreerrors'):
3320 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3322 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3324 if temp_filename
== '-':
3325 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3326 else 'but the formats are incompatible for simultaneous download' if merger
.available
3327 else 'but ffmpeg is not installed')
3328 self
.report_warning(
3329 f
'You have requested downloading multiple formats to stdout {reason}. '
3330 'The formats will be streamed one after the other')
3331 fname
= temp_filename
3332 for f
in info_dict
['requested_formats']:
3333 new_info
= dict(info_dict
)
3334 del new_info
['requested_formats']
3336 if temp_filename
!= '-':
3337 fname
= prepend_extension(
3338 correct_ext(temp_filename
, new_info
['ext']),
3339 'f%s' % f
['format_id'], new_info
['ext'])
3340 if not self
._ensure
_dir
_exists
(fname
):
3342 f
['filepath'] = fname
3343 downloaded
.append(fname
)
3344 partial_success
, real_download
= self
.dl(fname
, new_info
)
3345 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3346 success
= success
and partial_success
3348 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3349 info_dict
['__postprocessors'].append(merger
)
3350 info_dict
['__files_to_merge'] = downloaded
3351 # Even if there were no downloads, it is being merged only now
3352 info_dict
['__real_download'] = True
3354 for file in downloaded
:
3355 files_to_move
[file] = None
3357 # Just a single file
3358 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3359 if dl_filename
is None or dl_filename
== temp_filename
:
3360 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3361 # So we should try to resume the download
3362 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3363 info_dict
['__real_download'] = real_download
3365 self
.report_file_already_downloaded(dl_filename
)
3367 dl_filename
= dl_filename
or temp_filename
3368 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3370 except network_exceptions
as err
:
3371 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
3373 except OSError as err
:
3374 raise UnavailableVideoError(err
)
3375 except (ContentTooShortError
, ) as err
:
3376 self
.report_error(f
'content too short (expected {err.expected} bytes and served {err.downloaded})')
3379 self
._raise
_pending
_errors
(info_dict
)
3380 if success
and full_filename
!= '-':
3384 fixup_policy
= self
.params
.get('fixup')
3385 vid
= info_dict
['id']
3387 if fixup_policy
in ('ignore', 'never'):
3389 elif fixup_policy
== 'warn':
3391 elif fixup_policy
!= 'force':
3392 assert fixup_policy
in ('detect_or_warn', None)
3393 if not info_dict
.get('__real_download'):
3396 def ffmpeg_fixup(cndn
, msg
, cls
):
3397 if not (do_fixup
and cndn
):
3399 elif do_fixup
== 'warn':
3400 self
.report_warning(f
'{vid}: {msg}')
3404 info_dict
['__postprocessors'].append(pp
)
3406 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3408 stretched_ratio
= info_dict
.get('stretched_ratio')
3409 ffmpeg_fixup(stretched_ratio
not in (1, None),
3410 f
'Non-uniform pixel ratio {stretched_ratio}',
3411 FFmpegFixupStretchedPP
)
3413 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3414 downloader
= downloader
.FD_NAME
if downloader
else None
3416 ext
= info_dict
.get('ext')
3417 postprocessed_by_ffmpeg
= info_dict
.get('requested_formats') or any((
3418 isinstance(pp
, FFmpegVideoConvertorPP
)
3419 and resolve_recode_mapping(ext
, pp
.mapping
)[0] not in (ext
, None)
3420 ) for pp
in self
._pps
['post_process'])
3422 if not postprocessed_by_ffmpeg
:
3423 ffmpeg_fixup(ext
== 'm4a' and info_dict
.get('container') == 'm4a_dash',
3424 'writing DASH m4a. Only some players support this container',
3426 ffmpeg_fixup(downloader
== 'hlsnative' and not self
.params
.get('hls_use_mpegts')
3427 or info_dict
.get('is_live') and self
.params
.get('hls_use_mpegts') is None,
3428 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3430 ffmpeg_fixup(info_dict
.get('is_live') and downloader
== 'dashsegments',
3431 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3433 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3434 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP
)
3438 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3439 except PostProcessingError
as err
:
3440 self
.report_error('Postprocessing: %s' % str(err
))
3443 for ph
in self
._post
_hooks
:
3444 ph(info_dict
['filepath'])
3445 except Exception as err
:
3446 self
.report_error('post hooks: %s' % str(err
))
3448 info_dict
['__write_download_archive'] = True
3450 assert info_dict
is original_infodict
# Make sure the info_dict was modified in-place
3451 if self
.params
.get('force_write_download_archive'):
3452 info_dict
['__write_download_archive'] = True
3453 check_max_downloads()
3455 def __download_wrapper(self
, func
):
3456 @functools.wraps(func
)
3457 def wrapper(*args
, **kwargs
):
3459 res
= func(*args
, **kwargs
)
3460 except UnavailableVideoError
as e
:
3461 self
.report_error(e
)
3462 except DownloadCancelled
as e
:
3463 self
.to_screen(f
'[info] {e}')
3464 if not self
.params
.get('break_per_url'):
3466 self
._num
_downloads
= 0
3468 if self
.params
.get('dump_single_json', False):
3469 self
.post_extract(res
)
3470 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3473 def download(self
, url_list
):
3474 """Download a given list of URLs."""
3475 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3476 outtmpl
= self
.params
['outtmpl']['default']
3477 if (len(url_list
) > 1
3479 and '%' not in outtmpl
3480 and self
.params
.get('max_downloads') != 1):
3481 raise SameFileError(outtmpl
)
3483 for url
in url_list
:
3484 self
.__download
_wrapper
(self
.extract_info
)(
3485 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3487 return self
._download
_retcode
3489 def download_with_info_file(self
, info_filename
):
3490 with contextlib
.closing(fileinput
.FileInput(
3491 [info_filename
], mode
='r',
3492 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3493 # FileInput doesn't have a read method, we can't call json.load
3494 infos
= [self
.sanitize_info(info
, self
.params
.get('clean_infojson', True))
3495 for info
in variadic(json
.loads('\n'.join(f
)))]
3497 self
._load
_cookies
(info
.get('cookies'), from_headers
=False)
3498 self
._load
_cookies
(traverse_obj(info
.get('http_headers'), 'Cookie', casesense
=False)) # compat
3500 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3501 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3502 if not isinstance(e
, EntryNotInPlaylist
):
3503 self
.to_stderr('\r')
3504 webpage_url
= info
.get('webpage_url')
3505 if webpage_url
is None:
3507 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3508 self
.download([webpage_url
])
3509 return self
._download
_retcode
3512 def sanitize_info(info_dict
, remove_private_keys
=False):
3513 ''' Sanitize the infodict for converting to json '''
3514 if info_dict
is None:
3516 info_dict
.setdefault('epoch', int(time
.time()))
3517 info_dict
.setdefault('_type', 'video')
3518 info_dict
.setdefault('_version', {
3519 'version': __version__
,
3520 'current_git_head': current_git_head(),
3521 'release_git_head': RELEASE_GIT_HEAD
,
3522 'repository': REPOSITORY
,
3525 if remove_private_keys
:
3526 reject
= lambda k
, v
: v
is None or k
.startswith('__') or k
in {
3527 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3528 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3529 'playlist_autonumber', '_format_sort_fields',
3532 reject
= lambda k
, v
: False
3535 if isinstance(obj
, dict):
3536 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3537 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3538 return list(map(filter_fn
, obj
))
3539 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3544 return filter_fn(info_dict
)
3547 def filter_requested_info(info_dict
, actually_filter
=True):
3548 ''' Alias of sanitize_info for backward compatibility '''
3549 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3551 def _delete_downloaded_files(self
, *files_to_delete
, info
={}, msg
=None):
3552 for filename
in set(filter(None, files_to_delete
)):
3554 self
.to_screen(msg
% filename
)
3558 self
.report_warning(f
'Unable to delete file {filename}')
3559 if filename
in info
.get('__files_to_move', []): # NB: Delete even if None
3560 del info
['__files_to_move'][filename
]
3563 def post_extract(info_dict
):
3564 def actual_post_extract(info_dict
):
3565 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3566 for video_dict
in info_dict
.get('entries', {}):
3567 actual_post_extract(video_dict
or {})
3570 post_extractor
= info_dict
.pop('__post_extractor', None) or (lambda: {})
3571 info_dict
.update(post_extractor())
3573 actual_post_extract(info_dict
or {})
3575 def run_pp(self
, pp
, infodict
):
3576 files_to_delete
= []
3577 if '__files_to_move' not in infodict
:
3578 infodict
['__files_to_move'] = {}
3580 files_to_delete
, infodict
= pp
.run(infodict
)
3581 except PostProcessingError
as e
:
3582 # Must be True and not 'only_download'
3583 if self
.params
.get('ignoreerrors') is True:
3584 self
.report_error(e
)
3588 if not files_to_delete
:
3590 if self
.params
.get('keepvideo', False):
3591 for f
in files_to_delete
:
3592 infodict
['__files_to_move'].setdefault(f
, '')
3594 self
._delete
_downloaded
_files
(
3595 *files_to_delete
, info
=infodict
, msg
='Deleting original file %s (pass -k to keep)')
3598 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3600 self
._forceprint
(key
, info
)
3601 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3602 info
= self
.run_pp(pp
, info
)
3605 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3606 info
= dict(ie_info
)
3607 info
['__files_to_move'] = files_to_move
or {}
3609 info
= self
.run_all_pps(key
, info
)
3610 except PostProcessingError
as err
:
3611 msg
= f
'Preprocessing: {err}'
3612 info
.setdefault('__pending_error', msg
)
3613 self
.report_error(msg
, is_error
=False)
3614 return info
, info
.pop('__files_to_move', None)
3616 def post_process(self
, filename
, info
, files_to_move
=None):
3617 """Run all the postprocessors on the given file."""
3618 info
['filepath'] = filename
3619 info
['__files_to_move'] = files_to_move
or {}
3620 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3621 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3622 del info
['__files_to_move']
3623 return self
.run_all_pps('after_move', info
)
3625 def _make_archive_id(self
, info_dict
):
3626 video_id
= info_dict
.get('id')
3629 # Future-proof against any change in case
3630 # and backwards compatibility with prior versions
3631 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3632 if extractor
is None:
3633 url
= str_or_none(info_dict
.get('url'))
3636 # Try to find matching extractor for the URL and take its ie_key
3637 for ie_key
, ie
in self
._ies
.items():
3638 if ie
.suitable(url
):
3643 return make_archive_id(extractor
, video_id
)
3645 def in_download_archive(self
, info_dict
):
3646 if not self
.archive
:
3649 vid_ids
= [self
._make
_archive
_id
(info_dict
)]
3650 vid_ids
.extend(info_dict
.get('_old_archive_ids') or [])
3651 return any(id_
in self
.archive
for id_
in vid_ids
)
3653 def record_download_archive(self
, info_dict
):
3654 fn
= self
.params
.get('download_archive')
3657 vid_id
= self
._make
_archive
_id
(info_dict
)
3660 self
.write_debug(f
'Adding to archive: {vid_id}')
3661 if is_path_like(fn
):
3662 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3663 archive_file
.write(vid_id
+ '\n')
3664 self
.archive
.add(vid_id
)
3667 def format_resolution(format
, default
='unknown'):
3668 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3670 if format
.get('resolution') is not None:
3671 return format
['resolution']
3672 if format
.get('width') and format
.get('height'):
3673 return '%dx%d' % (format
['width'], format
['height'])
3674 elif format
.get('height'):
3675 return '%sp' % format
['height']
3676 elif format
.get('width'):
3677 return '%dx?' % format
['width']
3680 def _list_format_headers(self
, *headers
):
3681 if self
.params
.get('listformats_table', True) is not False:
3682 return [self
._format
_out
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3685 def _format_note(self
, fdict
):
3687 if fdict
.get('ext') in ['f4f', 'f4m']:
3688 res
+= '(unsupported)'
3689 if fdict
.get('language'):
3692 res
+= '[%s]' % fdict
['language']
3693 if fdict
.get('format_note') is not None:
3696 res
+= fdict
['format_note']
3697 if fdict
.get('tbr') is not None:
3700 res
+= '%4dk' % fdict
['tbr']
3701 if fdict
.get('container') is not None:
3704 res
+= '%s container' % fdict
['container']
3705 if (fdict
.get('vcodec') is not None
3706 and fdict
.get('vcodec') != 'none'):
3709 res
+= fdict
['vcodec']
3710 if fdict
.get('vbr') is not None:
3712 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3714 if fdict
.get('vbr') is not None:
3715 res
+= '%4dk' % fdict
['vbr']
3716 if fdict
.get('fps') is not None:
3719 res
+= '%sfps' % fdict
['fps']
3720 if fdict
.get('acodec') is not None:
3723 if fdict
['acodec'] == 'none':
3726 res
+= '%-5s' % fdict
['acodec']
3727 elif fdict
.get('abr') is not None:
3731 if fdict
.get('abr') is not None:
3732 res
+= '@%3dk' % fdict
['abr']
3733 if fdict
.get('asr') is not None:
3734 res
+= ' (%5dHz)' % fdict
['asr']
3735 if fdict
.get('filesize') is not None:
3738 res
+= format_bytes(fdict
['filesize'])
3739 elif fdict
.get('filesize_approx') is not None:
3742 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3745 def _get_formats(self
, info_dict
):
3746 if info_dict
.get('formats') is None:
3747 if info_dict
.get('url') and info_dict
.get('_type', 'video') == 'video':
3750 return info_dict
['formats']
3752 def render_formats_table(self
, info_dict
):
3753 formats
= self
._get
_formats
(info_dict
)
3756 if not self
.params
.get('listformats_table', True) is not False:
3759 format_field(f
, 'format_id'),
3760 format_field(f
, 'ext'),
3761 self
.format_resolution(f
),
3762 self
._format
_note
(f
)
3763 ] for f
in formats
if (f
.get('preference') or 0) >= -1000]
3764 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3766 def simplified_codec(f
, field
):
3767 assert field
in ('acodec', 'vcodec')
3768 codec
= f
.get(field
)
3771 elif codec
!= 'none':
3772 return '.'.join(codec
.split('.')[:4])
3774 if field
== 'vcodec' and f
.get('acodec') == 'none':
3776 elif field
== 'acodec' and f
.get('vcodec') == 'none':
3778 return self
._format
_out
('audio only' if field
== 'vcodec' else 'video only',
3779 self
.Styles
.SUPPRESS
)
3781 delim
= self
._format
_out
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3784 self
._format
_out
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3785 format_field(f
, 'ext'),
3786 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3787 format_field(f
, 'fps', '\t%d', func
=round),
3788 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3789 format_field(f
, 'audio_channels', '\t%s'),
3791 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
)
3792 or format_field(f
, 'filesize_approx', '≈\t%s', func
=format_bytes
)
3793 or format_field(try_call(lambda: format_bytes(int(info_dict
['duration'] * f
['tbr'] * (1024 / 8)))),
3794 None, self
._format
_out
('~\t%s', self
.Styles
.SUPPRESS
))),
3795 format_field(f
, 'tbr', '\t%dk', func
=round),
3796 shorten_protocol_name(f
.get('protocol', '')),
3798 simplified_codec(f
, 'vcodec'),
3799 format_field(f
, 'vbr', '\t%dk', func
=round),
3800 simplified_codec(f
, 'acodec'),
3801 format_field(f
, 'abr', '\t%dk', func
=round),
3802 format_field(f
, 'asr', '\t%s', func
=format_decimal_suffix
),
3803 join_nonempty(format_field(f
, 'language', '[%s]'), join_nonempty(
3804 self
._format
_out
('UNSUPPORTED', self
.Styles
.BAD_FORMAT
) if f
.get('ext') in ('f4f', 'f4m') else None,
3805 (self
._format
_out
('Maybe DRM', self
.Styles
.WARNING
) if f
.get('has_drm') == 'maybe'
3806 else self
._format
_out
('DRM', self
.Styles
.BAD_FORMAT
) if f
.get('has_drm') else None),
3807 format_field(f
, 'format_note'),
3808 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3809 delim
=', '), delim
=' '),
3810 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3811 header_line
= self
._list
_format
_headers
(
3812 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3813 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3815 return render_table(
3816 header_line
, table
, hide_empty
=True,
3817 delim
=self
._format
_out
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3819 def render_thumbnails_table(self
, info_dict
):
3820 thumbnails
= list(info_dict
.get('thumbnails') or [])
3823 return render_table(
3824 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3825 [[t
.get('id'), t
.get('width') or 'unknown', t
.get('height') or 'unknown', t
['url']] for t
in thumbnails
])
3827 def render_subtitles_table(self
, video_id
, subtitles
):
3828 def _row(lang
, formats
):
3829 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3830 if len(set(names
)) == 1:
3831 names
= [] if names
[0] == 'unknown' else names
[:1]
3832 return [lang
, ', '.join(names
), ', '.join(exts
)]
3836 return render_table(
3837 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3838 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3841 def __list_table(self
, video_id
, name
, func
, *args
):
3844 self
.to_screen(f
'{video_id} has no {name}')
3846 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3847 self
.to_stdout(table
)
3849 def list_formats(self
, info_dict
):
3850 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3852 def list_thumbnails(self
, info_dict
):
3853 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3855 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3856 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3858 def print_debug_header(self
):
3859 if not self
.params
.get('verbose'):
3862 from . import _IN_CLI
# Must be delayed import
3864 # These imports can be slow. So import them only as needed
3865 from .extractor
.extractors
import _LAZY_LOADER
3866 from .extractor
.extractors
import (
3867 _PLUGIN_CLASSES
as plugin_ies
,
3868 _PLUGIN_OVERRIDES
as plugin_ie_overrides
3871 def get_encoding(stream
):
3872 ret
= str(getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__))
3873 additional_info
= []
3874 if os
.environ
.get('TERM', '').lower() == 'dumb':
3875 additional_info
.append('dumb')
3876 if not supports_terminal_sequences(stream
):
3877 from .utils
import WINDOWS_VT_MODE
# Must be imported locally
3878 additional_info
.append('No VT' if WINDOWS_VT_MODE
is False else 'No ANSI')
3880 ret
= f
'{ret} ({",".join(additional_info)})'
3883 encoding_str
= 'Encodings: locale %s, fs %s, pref %s, %s' % (
3884 locale
.getpreferredencoding(),
3885 sys
.getfilesystemencoding(),
3886 self
.get_encoding(),
3888 f
'{key} {get_encoding(stream)}' for key
, stream
in self
._out
_files
.items_
3889 if stream
is not None and key
!= 'console')
3892 logger
= self
.params
.get('logger')
3894 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3895 write_debug(encoding_str
)
3897 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
3898 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3900 source
= detect_variant()
3901 if VARIANT
not in (None, 'pip'):
3904 write_debug(join_nonempty(
3905 f
'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3906 f
'{CHANNEL}@{__version__}',
3907 f
'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD
else '',
3908 '' if source
== 'unknown' else f
'({source})',
3909 '' if _IN_CLI
else 'API' if klass
== YoutubeDL
else f
'API:{self.__module__}.{klass.__qualname__}',
3913 write_debug(f
'params: {self.params}')
3915 if not _LAZY_LOADER
:
3916 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3917 write_debug('Lazy loading extractors is forcibly disabled')
3919 write_debug('Lazy loading extractors is disabled')
3920 if self
.params
['compat_opts']:
3921 write_debug('Compatibility options: %s' % ', '.join(self
.params
['compat_opts']))
3923 if current_git_head():
3924 write_debug(f
'Git HEAD: {current_git_head()}')
3925 write_debug(system_identifier())
3927 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
3928 ffmpeg_features
= {key for key, val in ffmpeg_features.items() if val}
3930 exe_versions
['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features
))
3932 exe_versions
['rtmpdump'] = rtmpdump_version()
3933 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3934 exe_str
= ', '.join(
3935 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3937 write_debug('exe versions: %s' % exe_str
)
3939 from .compat
.compat_utils
import get_package_info
3940 from .dependencies
import available_dependencies
3942 write_debug('Optional libraries: %s' % (', '.join(sorted({
3943 join_nonempty(*get_package_info(m
)) for m
in available_dependencies
.values()
3946 self
._setup
_opener
()
3948 for handler
in self
._opener
.handlers
:
3949 if hasattr(handler
, 'proxies'):
3950 proxy_map
.update(handler
.proxies
)
3951 write_debug(f
'Proxy map: {proxy_map}')
3953 for plugin_type
, plugins
in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}
.items():
3954 display_list
= ['%s%s' % (
3955 klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3956 for name
, klass
in plugins
.items()]
3957 if plugin_type
== 'Extractor':
3958 display_list
.extend(f
'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3959 for parent
, plugins
in plugin_ie_overrides
.items())
3960 if not display_list
:
3962 write_debug(f
'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3964 plugin_dirs
= plugin_directories()
3966 write_debug(f
'Plugin directories: {plugin_dirs}')
3969 if False and self
.params
.get('call_home'):
3970 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode()
3971 write_debug('Public IP address: %s' % ipaddr
)
3972 latest_version
= self
.urlopen(
3973 'https://yt-dl.org/latest/version').read().decode()
3974 if version_tuple(latest_version
) > version_tuple(__version__
):
3975 self
.report_warning(
3976 'You are using an outdated version (newest version: %s)! '
3977 'See https://yt-dl.org/update if you need help updating.' %
3980 def _setup_opener(self
):
3981 if hasattr(self
, '_opener'):
3983 timeout_val
= self
.params
.get('socket_timeout')
3984 self
._socket
_timeout
= 20 if timeout_val
is None else float(timeout_val
)
3985 opts_proxy
= self
.params
.get('proxy')
3987 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
3988 if opts_proxy
is not None:
3989 if opts_proxy
== '':
3992 proxies
= {'http': opts_proxy, 'https': opts_proxy}
3994 proxies
= urllib
.request
.getproxies()
3995 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3996 if 'http' in proxies
and 'https' not in proxies
:
3997 proxies
['https'] = proxies
['http']
3998 proxy_handler
= PerRequestProxyHandler(proxies
)
4000 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
4001 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
4002 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
4003 redirect_handler
= YoutubeDLRedirectHandler()
4004 data_handler
= urllib
.request
.DataHandler()
4006 # When passing our own FileHandler instance, build_opener won't add the
4007 # default FileHandler and allows us to disable the file protocol, which
4008 # can be used for malicious purposes (see
4009 # https://github.com/ytdl-org/youtube-dl/issues/8227)
4010 file_handler
= urllib
.request
.FileHandler()
4012 if not self
.params
.get('enable_file_urls'):
4013 def file_open(*args
, **kwargs
):
4014 raise urllib
.error
.URLError(
4015 'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
4016 'Use --enable-file-urls to enable at your own risk.')
4017 file_handler
.file_open
= file_open
4019 opener
= urllib
.request
.build_opener(
4020 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
4022 # Delete the default user-agent header, which would otherwise apply in
4023 # cases where our custom HTTP handler doesn't come into play
4024 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
4025 opener
.addheaders
= []
4026 self
._opener
= opener
4028 @functools.cached_property
4029 def cookiejar(self
):
4030 """Global cookiejar instance"""
4031 return load_cookies(
4032 self
.params
.get('cookiefile'), self
.params
.get('cookiesfrombrowser'), self
)
4034 def urlopen(self
, req
):
4035 """ Start an HTTP download """
4036 if isinstance(req
, str):
4037 req
= sanitized_Request(req
)
4038 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
4040 def encode(self
, s
):
4041 if isinstance(s
, bytes):
4042 return s
# Already encoded
4045 return s
.encode(self
.get_encoding())
4046 except UnicodeEncodeError as err
:
4047 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
4050 def get_encoding(self
):
4051 encoding
= self
.params
.get('encoding')
4052 if encoding
is None:
4053 encoding
= preferredencoding()
4056 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
4057 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4058 if overwrite
is None:
4059 overwrite
= self
.params
.get('overwrites', True)
4060 if not self
.params
.get('writeinfojson'):
4063 self
.write_debug(f
'Skipping writing {label} infojson')
4065 elif not self
._ensure
_dir
_exists
(infofn
):
4067 elif not overwrite
and os
.path
.exists(infofn
):
4068 self
.to_screen(f
'[info] {label.title()} metadata is already present')
4071 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
4073 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
4076 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
4079 def _write_description(self
, label
, ie_result
, descfn
):
4080 ''' Write description and returns True = written, False = skip, None = error '''
4081 if not self
.params
.get('writedescription'):
4084 self
.write_debug(f
'Skipping writing {label} description')
4086 elif not self
._ensure
_dir
_exists
(descfn
):
4088 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
4089 self
.to_screen(f
'[info] {label.title()} description is already present')
4090 elif ie_result
.get('description') is None:
4091 self
.to_screen(f
'[info] There\'s no {label} description to write')
4095 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
4096 with open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
4097 descfile
.write(ie_result
['description'])
4099 self
.report_error(f
'Cannot write {label} description file {descfn}')
4103 def _write_subtitles(self
, info_dict
, filename
):
4104 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4106 subtitles
= info_dict
.get('requested_subtitles')
4107 if not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
4108 # subtitles download errors are already managed as troubles in relevant IE
4109 # that way it will silently go on when used with unsupporting IE
4112 self
.to_screen('[info] There are no subtitles for the requested languages')
4114 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
4115 if not sub_filename_base
:
4116 self
.to_screen('[info] Skipping writing video subtitles')
4119 for sub_lang
, sub_info
in subtitles
.items():
4120 sub_format
= sub_info
['ext']
4121 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
4122 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
4123 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
4125 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4126 sub_info
['filepath'] = existing_sub
4127 ret
.append((existing_sub
, sub_filename_final
))
4130 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
4131 if sub_info
.get('data') is not None:
4133 # Use newline='' to prevent conversion of newline characters
4134 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4135 with open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
4136 subfile
.write(sub_info
['data'])
4137 sub_info
['filepath'] = sub_filename
4138 ret
.append((sub_filename
, sub_filename_final
))
4141 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
4145 sub_copy
= sub_info
.copy()
4146 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
4147 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
4148 sub_info
['filepath'] = sub_filename
4149 ret
.append((sub_filename
, sub_filename_final
))
4150 except (DownloadError
, ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
4151 msg
= f
'Unable to download video subtitles for {sub_lang!r}: {err}'
4152 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
4153 if not self
.params
.get('ignoreerrors'):
4154 self
.report_error(msg
)
4155 raise DownloadError(msg
)
4156 self
.report_warning(msg
)
4159 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
4160 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4161 write_all
= self
.params
.get('write_all_thumbnails', False)
4162 thumbnails
, ret
= [], []
4163 if write_all
or self
.params
.get('writethumbnail', False):
4164 thumbnails
= info_dict
.get('thumbnails') or []
4166 self
.to_screen(f
'[info] There are no {label} thumbnails to download')
4168 multiple
= write_all
and len(thumbnails
) > 1
4170 if thumb_filename_base
is None:
4171 thumb_filename_base
= filename
4172 if thumbnails
and not thumb_filename_base
:
4173 self
.write_debug(f
'Skipping writing {label} thumbnail')
4176 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
4177 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
4178 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
4179 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
4180 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
4182 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
4184 self
.to_screen('[info] %s is already present' % (
4185 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize())
4186 t
['filepath'] = existing_thumb
4187 ret
.append((existing_thumb
, thumb_filename_final
))
4189 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
4191 uf
= self
.urlopen(sanitized_Request(t
['url'], headers
=t
.get('http_headers', {})))
4192 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
4193 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
4194 shutil
.copyfileobj(uf
, thumbf
)
4195 ret
.append((thumb_filename
, thumb_filename_final
))
4196 t
['filepath'] = thumb_filename
4197 except network_exceptions
as err
:
4198 if isinstance(err
, urllib
.error
.HTTPError
) and err
.code
== 404:
4199 self
.to_screen(f
'[info] {thumb_display_id.title()} does not exist')
4201 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
4203 if ret
and not write_all
: