26 from .cache
import Cache
27 from .compat
import functools
, urllib
# isort: split
28 from .compat
import compat_os_name
, compat_shlex_quote
, urllib_req_to_req
29 from .cookies
import LenientSimpleCookie
, load_cookies
30 from .downloader
import FFmpegFD
, get_suitable_downloader
, shorten_protocol_name
31 from .downloader
.rtmp
import rtmpdump_version
32 from .extractor
import gen_extractor_classes
, get_info_extractor
33 from .extractor
.common
import UnsupportedURLIE
34 from .extractor
.openload
import PhantomJSwrapper
35 from .minicurses
import format_text
36 from .networking
import HEADRequest
, Request
, RequestDirector
37 from .networking
.common
import _REQUEST_HANDLERS
38 from .networking
.exceptions
import (
46 from .plugins
import directories
as plugin_directories
47 from .postprocessor
import _PLUGIN_CLASSES
as plugin_pps
48 from .postprocessor
import (
50 FFmpegFixupDuplicateMoovPP
,
51 FFmpegFixupDurationPP
,
54 FFmpegFixupStretchedPP
,
55 FFmpegFixupTimestampPP
,
58 FFmpegVideoConvertorPP
,
59 MoveFilesAfterDownloadPP
,
62 from .postprocessor
.ffmpeg
import resolve_mapping
as resolve_recode_mapping
63 from .update
import REPOSITORY
, current_git_head
, detect_variant
95 UnavailableVideoError
,
113 format_decimal_suffix
,
127 orderedSet_from_options
,
131 remove_terminal_sequences
,
140 supports_terminal_sequences
,
150 windows_enable_vt_mode
,
154 from .utils
._utils
import _YDLLogger
155 from .utils
.networking
import (
161 from .version
import CHANNEL
, RELEASE_GIT_HEAD
, VARIANT
, __version__
163 if compat_os_name
== 'nt':
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
202 netrc_cmd: Use a shell command to get credentials
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
205 no_warnings: Do not print out anything for warnings.
206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
209 For compatibility, a single list is also accepted
210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
212 forcejson: Force printing info_dict as JSON.
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
219 format: Video format code. see "FORMAT SELECTION" for more details.
220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
229 format_sort_force: Force the given format_sort. see "Sorting Formats"
231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
233 allow_multiple_video_streams: Allow multiple video streams to be merged
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
237 check_formats Whether to test if the formats are downloadable.
238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
240 or None (check only if requested by extractor)
241 paths: Dictionary of output paths. The allowed keys are 'home'
242 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
243 outtmpl: Dictionary of templates for output names. Allowed keys
244 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
245 For compatibility with youtube-dl, a single string can also be used
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
249 windowsfilenames: Force the filenames to be windows compatible
250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
255 allowed_extractors: List of regexes to match against extractor names that are allowed
256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
259 For compatibility with youtube-dl,
260 "nooverwrites" may also be used instead
261 playlist_items: Specific indices of playlist to download.
262 playlistrandom: Download playlist items in random order.
263 lazy_playlist: Process playlist entries as they are received.
264 matchtitle: Download only matching titles.
265 rejecttitle: Reject downloads for matching titles.
266 logger: Log messages to a logging.Logger instance.
267 logtostderr: Print everything to stderr instead of stdout.
268 consoletitle: Display progress in console window's titlebar.
269 writedescription: Write the video description to a .description file
270 writeinfojson: Write the video description to a .info.json file
271 clean_infojson: Remove internal metadata from the infojson
272 getcomments: Extract video comments. This will not be written to disk
273 unless writeinfojson is also given
274 writeannotations: Write the video annotations to a .annotations.xml file
275 writethumbnail: Write the thumbnail image to a file
276 allow_playlist_files: Whether to write playlists' description, infojson etc
277 also to disk when using the 'write*' options
278 write_all_thumbnails: Write all thumbnail formats to files
279 writelink: Write an internet shortcut file, depending on the
280 current platform (.url/.webloc/.desktop)
281 writeurllink: Write a Windows internet shortcut file (.url)
282 writewebloclink: Write a macOS internet shortcut file (.webloc)
283 writedesktoplink: Write a Linux internet shortcut file (.desktop)
284 writesubtitles: Write the video subtitles to a file
285 writeautomaticsub: Write the automatically generated subtitles to a file
286 listsubtitles: Lists all available subtitles for the video
287 subtitlesformat: The format code for subtitles
288 subtitleslangs: List of languages of the subtitles to download (can be regex).
289 The list may contain "all" to refer to all the available
290 subtitles. The language can be prefixed with a "-" to
291 exclude it from the requested languages, e.g. ['all', '-live_chat']
292 keepvideo: Keep the video file after post-processing
293 daterange: A utils.DateRange object, download only if the upload_date is in the range.
294 skip_download: Skip the actual download of the video file
295 cachedir: Location of the cache files in the filesystem.
296 False to disable filesystem cache.
297 noplaylist: Download single video instead of a playlist if in doubt.
298 age_limit: An integer representing the user's age in years.
299 Unsuitable videos for the given age are skipped.
300 min_views: An integer representing the minimum view count the video
301 must have in order to not be skipped.
302 Videos without view count information are always
303 downloaded. None for no limit.
304 max_views: An integer representing the maximum view count.
305 Videos that are more popular than that are not
307 Videos without view count information are always
308 downloaded. None for no limit.
309 download_archive: A set, or the name of a file where all downloads are recorded.
310 Videos already present in the file are not downloaded again.
311 break_on_existing: Stop the download process after attempting to download a
312 file that is in the archive.
313 break_per_url: Whether break_on_reject and break_on_existing
314 should act on each input URL as opposed to for the entire queue
315 cookiefile: File name or text stream from where cookies should be read and dumped to
316 cookiesfrombrowser: A tuple containing the name of the browser, the profile
317 name/path from where cookies are loaded, the name of the keyring,
318 and the container name, e.g. ('chrome', ) or
319 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
320 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
321 support RFC 5746 secure renegotiation
322 nocheckcertificate: Do not verify SSL certificates
323 client_certificate: Path to client certificate file in PEM format. May include the private key
324 client_certificate_key: Path to private key file for client certificate
325 client_certificate_password: Password for client certificate private key, if encrypted.
326 If not provided and the key is encrypted, yt-dlp will ask interactively
327 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
328 (Only supported by some extractors)
329 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
330 http_headers: A dictionary of custom headers to be used for all requests
331 proxy: URL of the proxy server to use
332 geo_verification_proxy: URL of the proxy to use for IP address verification
333 on geo-restricted sites.
334 socket_timeout: Time to wait for unresponsive hosts, in seconds
335 bidi_workaround: Work around buggy terminals without bidirectional text
336 support, using fridibi
337 debug_printtraffic:Print out sent and received HTTP traffic
338 default_search: Prepend this string if an input url is not valid.
339 'auto' for elaborate guessing
340 encoding: Use this encoding instead of the system-specified.
341 extract_flat: Whether to resolve and process url_results further
342 * False: Always process. Default for API
343 * True: Never process
344 * 'in_playlist': Do not process inside playlist/multi_video
345 * 'discard': Always process, but don't return the result
346 from inside playlist/multi_video
347 * 'discard_in_playlist': Same as "discard", but only for
348 playlists (not multi_video). Default for CLI
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 progress_hooks: A list of functions that get called on download
359 progress, with a dictionary with the entries
360 * status: One of "downloading", "error", or "finished".
361 Check this first and ignore unknown values.
362 * info_dict: The extracted info_dict
364 If status is one of "downloading", or "finished", the
365 following properties may also be present:
366 * filename: The final filename (always present)
367 * tmpfilename: The filename we're currently writing to
368 * downloaded_bytes: Bytes on disk
369 * total_bytes: Size of the whole file, None if unknown
370 * total_bytes_estimate: Guess of the eventual file size,
372 * elapsed: The number of seconds since download started.
373 * eta: The estimated time in seconds, None if unknown
374 * speed: The download speed in bytes/second, None if
376 * fragment_index: The counter of the currently
377 downloaded video fragment.
378 * fragment_count: The number of fragments (= individual
379 files that will be merged)
381 Progress hooks are guaranteed to be called at least once
382 (with status "finished") if the download is successful.
383 postprocessor_hooks: A list of functions that get called on postprocessing
384 progress, with a dictionary with the entries
385 * status: One of "started", "processing", or "finished".
386 Check this first and ignore unknown values.
387 * postprocessor: Name of the postprocessor
388 * info_dict: The extracted info_dict
390 Progress hooks are guaranteed to be called at least twice
391 (with status "started" and "finished") if the processing is successful.
392 merge_output_format: "/" separated list of extensions to use when merging formats.
393 final_ext: Expected final extension; used to detect when the file was
394 already downloaded and converted
395 fixup: Automatically correct known faults of the file.
397 - "never": do nothing
398 - "warn": only emit a warning
399 - "detect_or_warn": check whether we can do anything
400 about it, warn otherwise (default)
401 source_address: Client-side IP address to bind to.
402 sleep_interval_requests: Number of seconds to sleep between requests
404 sleep_interval: Number of seconds to sleep before each download when
405 used alone or a lower bound of a range for randomized
406 sleep before each download (minimum possible number
407 of seconds to sleep) when used along with
409 max_sleep_interval:Upper bound of a range for randomized sleep before each
410 download (maximum possible number of seconds to sleep).
411 Must only be used along with sleep_interval.
412 Actual sleep time will be a random float from range
413 [sleep_interval; max_sleep_interval].
414 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
415 listformats: Print an overview of available video formats and exit.
416 list_thumbnails: Print a table of all thumbnails and exit.
417 match_filter: A function that gets called for every video with the signature
418 (info_dict, *, incomplete: bool) -> Optional[str]
419 For backward compatibility with youtube-dl, the signature
420 (info_dict) -> Optional[str] is also allowed.
421 - If it returns a message, the video is ignored.
422 - If it returns None, the video is downloaded.
423 - If it returns utils.NO_DEFAULT, the user is interactively
424 asked whether to download the video.
425 - Raise utils.DownloadCancelled(msg) to abort remaining
426 downloads when a video is rejected.
427 match_filter_func in utils.py is one example for this.
428 color: A Dictionary with output stream names as keys
429 and their respective color policy as values.
430 Can also just be a single color policy,
431 in which case it applies to all outputs.
432 Valid stream names are 'stdout' and 'stderr'.
433 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
434 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
437 Two-letter ISO 3166-2 country code that will be used for
438 explicit geographic restriction bypassing via faking
439 X-Forwarded-For HTTP header
441 IP range in CIDR notation that will be used similarly to
443 external_downloader: A dictionary of protocol keys and the executable of the
444 external downloader to use for it. The allowed protocols
445 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
446 Set the value to 'native' to use the native downloader
447 compat_opts: Compatibility options. See "Differences in default behavior".
448 The following options do not work when used through the API:
449 filename, abort-on-error, multistreams, no-live-chat, format-sort
450 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
451 Refer __init__.py for their implementation
452 progress_template: Dictionary of templates for progress outputs.
453 Allowed keys are 'download', 'postprocess',
454 'download-title' (console title) and 'postprocess-title'.
455 The template is mapped on a dictionary with keys 'progress' and 'info'
456 retry_sleep_functions: Dictionary of functions that takes the number of attempts
457 as argument and returns the time to sleep in seconds.
458 Allowed keys are 'http', 'fragment', 'file_access'
459 download_ranges: A callback function that gets called for every video with
460 the signature (info_dict, ydl) -> Iterable[Section].
461 Only the returned sections will be downloaded.
462 Each Section is a dict with the following keys:
463 * start_time: Start time of the section in seconds
464 * end_time: End time of the section in seconds
465 * title: Section title (Optional)
466 * index: Section number (Optional)
467 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
468 noprogress: Do not print the progress bar
469 live_from_start: Whether to download livestreams videos from the start
471 The following parameters are not used by YoutubeDL itself, they are used by
472 the downloader (see yt_dlp/downloader/common.py):
473 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
474 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
475 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
476 external_downloader_args, concurrent_fragment_downloads.
478 The following options are used by the post processors:
479 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
480 to the binary or its containing directory.
481 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
482 and a list of additional command-line arguments for the
483 postprocessor/executable. The dict can also have "PP+EXE" keys
484 which are used when the given exe is used by the given PP.
485 Use 'default' as the name for arguments to passed to all PP
486 For compatibility with youtube-dl, a single list of args
489 The following options are used by the extractors:
490 extractor_retries: Number of times to retry for known errors (default: 3)
491 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
492 hls_split_discontinuity: Split HLS playlists to different formats at
493 discontinuities such as ad breaks (default: False)
494 extractor_args: A dictionary of arguments to be passed to the extractors.
495 See "EXTRACTOR ARGUMENTS" for details.
496 E.g. {'youtube': {'skip': ['dash', 'hls']}}
497 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
499 The following options are deprecated and may be removed in the future:
501 break_on_reject: Stop the download process when encountering a video that
502 has been filtered out.
503 - `raise DownloadCancelled(msg)` in match_filter instead
504 force_generic_extractor: Force downloader to use the generic extractor
505 - Use allowed_extractors = ['generic', 'default']
506 playliststart: - Use playlist_items
507 Playlist item to start at.
508 playlistend: - Use playlist_items
509 Playlist item to end at.
510 playlistreverse: - Use playlist_items
511 Download playlist items in reverse order.
512 forceurl: - Use forceprint
513 Force printing final URL.
514 forcetitle: - Use forceprint
515 Force printing title.
516 forceid: - Use forceprint
518 forcethumbnail: - Use forceprint
519 Force printing thumbnail URL.
520 forcedescription: - Use forceprint
521 Force printing description.
522 forcefilename: - Use forceprint
523 Force printing final filename.
524 forceduration: - Use forceprint
525 Force printing duration.
526 allsubtitles: - Use subtitleslangs = ['all']
527 Downloads all the subtitles of the video
528 (requires writesubtitles or writeautomaticsub)
529 include_ads: - Doesn't work
531 call_home: - Not implemented
532 Boolean, true iff we are allowed to contact the
533 yt-dlp servers for debugging.
534 post_hooks: - Register a custom postprocessor
535 A list of functions that get called as the final step
536 for each video file, after all postprocessors have been
537 called. The filename will be passed as the only argument.
538 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
539 Use the native HLS downloader instead of ffmpeg/avconv
540 if True, otherwise use ffmpeg/avconv if False, otherwise
541 use downloader suggested by extractor if None.
542 prefer_ffmpeg: - avconv support is deprecated
543 If False, use avconv instead of ffmpeg if both are available,
544 otherwise prefer ffmpeg.
545 youtube_include_dash_manifest: - Use extractor_args
546 If True (default), DASH manifests and related
547 data will be downloaded and processed by extractor.
548 You can reduce network I/O by disabling it if you don't
549 care about DASH. (only for youtube)
550 youtube_include_hls_manifest: - Use extractor_args
551 If True (default), HLS manifests and related
552 data will be downloaded and processed by extractor.
553 You can reduce network I/O by disabling it if you don't
554 care about HLS. (only for youtube)
555 no_color: Same as `color='no_color'`
559 'width', 'height', 'asr', 'audio_channels', 'fps',
560 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
561 'timestamp', 'release_timestamp',
562 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
563 'average_rating', 'comment_count', 'age_limit',
564 'start_time', 'end_time',
565 'chapter_number', 'season_number', 'episode_number',
566 'track_number', 'disc_number', 'release_year',
570 # NB: Keep in sync with the docstring of extractor/common.py
571 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
572 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
573 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
574 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
575 'preference', 'language', 'language_preference', 'quality', 'source_preference',
576 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
577 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
579 _format_selection_exts
= {
580 'audio': set(MEDIA_EXTENSIONS
.common_audio
),
581 'video': set(MEDIA_EXTENSIONS
.common_video
+ ('3gp', )),
582 'storyboards': set(MEDIA_EXTENSIONS
.storyboards
),
585 def __init__(self
, params
=None, auto_init
=True):
586 """Create a FileDownloader object with the given options.
587 @param auto_init Whether to load the default extractors and print header (if verbose).
588 Set to 'no_verbose_header' to not print the header
594 self
._ies
_instances
= {}
595 self
._pps
= {k: [] for k in POSTPROCESS_WHEN}
596 self
._printed
_messages
= set()
597 self
._first
_webpage
_request
= True
598 self
._post
_hooks
= []
599 self
._progress
_hooks
= []
600 self
._postprocessor
_hooks
= []
601 self
._download
_retcode
= 0
602 self
._num
_downloads
= 0
604 self
._playlist
_level
= 0
605 self
._playlist
_urls
= set()
606 self
.cache
= Cache(self
)
608 stdout
= sys
.stderr
if self
.params
.get('logtostderr') else sys
.stdout
609 self
._out
_files
= Namespace(
612 screen
=sys
.stderr
if self
.params
.get('quiet') else stdout
,
613 console
=None if compat_os_name
== 'nt' else next(
614 filter(supports_terminal_sequences
, (sys
.stderr
, sys
.stdout
)), None)
618 windows_enable_vt_mode()
619 except Exception as e
:
620 self
.write_debug(f
'Failed to enable VT mode: {e}')
622 if self
.params
.get('no_color'):
623 if self
.params
.get('color') is not None:
624 self
.report_warning('Overwriting params from "color" with "no_color"')
625 self
.params
['color'] = 'no_color'
627 term_allow_color
= os
.environ
.get('TERM', '').lower() != 'dumb'
629 def process_color_policy(stream
):
630 stream_name
= {sys.stdout: 'stdout', sys.stderr: 'stderr'}
[stream
]
631 policy
= traverse_obj(self
.params
, ('color', (stream_name
, None), {str}
), get_all
=False)
632 if policy
in ('auto', None):
633 return term_allow_color
and supports_terminal_sequences(stream
)
634 assert policy
in ('always', 'never', 'no_color')
635 return {'always': True, 'never': False}
.get(policy
, policy
)
637 self
._allow
_colors
= Namespace(**{
638 name
: process_color_policy(stream
)
639 for name
, stream
in self
._out
_files
.items_
if name
!= 'console'
642 # The code is left like this to be reused for future deprecations
643 MIN_SUPPORTED
, MIN_RECOMMENDED
= (3, 7), (3, 7)
644 current_version
= sys
.version_info
[:2]
645 if current_version
< MIN_RECOMMENDED
:
646 msg
= ('Support for Python version %d.%d has been deprecated. '
647 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
648 '\n You will no longer receive updates on this version')
649 if current_version
< MIN_SUPPORTED
:
650 msg
= 'Python version %d.%d is no longer supported'
651 self
.deprecated_feature(
652 f
'{msg}! Please update to Python %d.%d or above' % (*current_version
, *MIN_RECOMMENDED
))
654 if self
.params
.get('allow_unplayable_formats'):
656 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
657 'This is a developer option intended for debugging. \n'
658 ' If you experience any issues while using this option, '
659 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
661 if self
.params
.get('bidi_workaround', False):
664 master
, slave
= pty
.openpty()
665 width
= shutil
.get_terminal_size().columns
666 width_args
= [] if width
is None else ['-w', str(width
)]
667 sp_kwargs
= {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
669 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
671 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
672 self
._output
_channel
= os
.fdopen(master
, 'rb')
673 except OSError as ose
:
674 if ose
.errno
== errno
.ENOENT
:
676 'Could not find fribidi executable, ignoring --bidi-workaround. '
677 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
681 self
.params
['compat_opts'] = set(self
.params
.get('compat_opts', ()))
682 self
.params
['http_headers'] = HTTPHeaderDict(std_headers
, self
.params
.get('http_headers'))
683 self
._request
_director
= self
.build_request_director(
684 sorted(_REQUEST_HANDLERS
.values(), key
=lambda rh
: rh
.RH_NAME
.lower()))
685 if auto_init
and auto_init
!= 'no_verbose_header':
686 self
.print_debug_header()
688 self
.__header
_cookies
= []
689 self
._load
_cookies
(traverse_obj(self
.params
.get('http_headers'), 'cookie', casesense
=False)) # compat
691 def check_deprecated(param
, option
, suggestion
):
692 if self
.params
.get(param
) is not None:
693 self
.report_warning(f
'{option} is deprecated. Use {suggestion} instead')
697 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
698 if self
.params
.get('geo_verification_proxy') is None:
699 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
701 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
702 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
703 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
705 for msg
in self
.params
.get('_warnings', []):
706 self
.report_warning(msg
)
707 for msg
in self
.params
.get('_deprecation_warnings', []):
708 self
.deprecated_feature(msg
)
710 if 'list-formats' in self
.params
['compat_opts']:
711 self
.params
['listformats_table'] = False
713 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
714 # nooverwrites was unnecessarily changed to overwrites
715 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
716 # This ensures compatibility with both keys
717 self
.params
['overwrites'] = not self
.params
['nooverwrites']
718 elif self
.params
.get('overwrites') is None:
719 self
.params
.pop('overwrites', None)
721 self
.params
['nooverwrites'] = not self
.params
['overwrites']
723 if self
.params
.get('simulate') is None and any((
724 self
.params
.get('list_thumbnails'),
725 self
.params
.get('listformats'),
726 self
.params
.get('listsubtitles'),
728 self
.params
['simulate'] = 'list_only'
730 self
.params
.setdefault('forceprint', {})
731 self
.params
.setdefault('print_to_file', {})
733 # Compatibility with older syntax
734 if not isinstance(params
['forceprint'], dict):
735 self
.params
['forceprint'] = {'video': params['forceprint']}
738 self
.add_default_info_extractors()
740 if (sys
.platform
!= 'win32'
741 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
742 and not self
.params
.get('restrictfilenames', False)):
743 # Unicode filesystem API will throw errors (#1474, #13027)
745 'Assuming --restrict-filenames since file system encoding '
746 'cannot encode all characters. '
747 'Set the LC_ALL environment variable to fix this.')
748 self
.params
['restrictfilenames'] = True
750 self
._parse
_outtmpl
()
752 # Creating format selector here allows us to catch syntax errors before the extraction
753 self
.format_selector
= (
754 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
755 else self
.params
['format'] if callable(self
.params
['format'])
756 else self
.build_format_selector(self
.params
['format']))
759 'post_hooks': self
.add_post_hook
,
760 'progress_hooks': self
.add_progress_hook
,
761 'postprocessor_hooks': self
.add_postprocessor_hook
,
763 for opt
, fn
in hooks
.items():
764 for ph
in self
.params
.get(opt
, []):
767 for pp_def_raw
in self
.params
.get('postprocessors', []):
768 pp_def
= dict(pp_def_raw
)
769 when
= pp_def
.pop('when', 'post_process')
770 self
.add_post_processor(
771 get_postprocessor(pp_def
.pop('key'))(self
, **pp_def
),
774 def preload_download_archive(fn
):
775 """Preload the archive, if any is specified"""
779 elif not is_path_like(fn
):
782 self
.write_debug(f
'Loading archive file {fn!r}')
784 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
785 for line
in archive_file
:
786 archive
.add(line
.strip())
787 except OSError as ioe
:
788 if ioe
.errno
!= errno
.ENOENT
:
792 self
.archive
= preload_download_archive(self
.params
.get('download_archive'))
794 def warn_if_short_id(self
, argv
):
795 # short YouTube ID starting with dash?
797 i
for i
, a
in enumerate(argv
)
798 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
802 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
803 + ['--'] + [argv
[i
] for i
in idxs
]
806 'Long argument string detected. '
807 'Use -- to separate parameters and URLs, like this:\n%s' %
808 args_to_str(correct_argv
))
810 def add_info_extractor(self
, ie
):
811 """Add an InfoExtractor object to the end of the list."""
813 self
._ies
[ie_key
] = ie
814 if not isinstance(ie
, type):
815 self
._ies
_instances
[ie_key
] = ie
816 ie
.set_downloader(self
)
818 def get_info_extractor(self
, ie_key
):
820 Get an instance of an IE with name ie_key, it will try to get one from
821 the _ies list, if there's no instance it will create a new one and add
822 it to the extractor list.
824 ie
= self
._ies
_instances
.get(ie_key
)
826 ie
= get_info_extractor(ie_key
)()
827 self
.add_info_extractor(ie
)
830 def add_default_info_extractors(self
):
832 Add the InfoExtractors returned by gen_extractors to the end of the list
834 all_ies
= {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
835 all_ies
['end'] = UnsupportedURLIE()
837 ie_names
= orderedSet_from_options(
838 self
.params
.get('allowed_extractors', ['default']), {
839 'all': list(all_ies
),
840 'default': [name
for name
, ie
in all_ies
.items() if ie
._ENABLED
],
842 except re
.error
as e
:
843 raise ValueError(f
'Wrong regex for allowed_extractors: {e.pattern}')
844 for name
in ie_names
:
845 self
.add_info_extractor(all_ies
[name
])
846 self
.write_debug(f
'Loaded {len(ie_names)} extractors')
848 def add_post_processor(self
, pp
, when
='post_process'):
849 """Add a PostProcessor object to the end of the chain."""
850 assert when
in POSTPROCESS_WHEN
, f
'Invalid when={when}'
851 self
._pps
[when
].append(pp
)
852 pp
.set_downloader(self
)
854 def add_post_hook(self
, ph
):
855 """Add the post hook"""
856 self
._post
_hooks
.append(ph
)
858 def add_progress_hook(self
, ph
):
859 """Add the download progress hook"""
860 self
._progress
_hooks
.append(ph
)
862 def add_postprocessor_hook(self
, ph
):
863 """Add the postprocessing progress hook"""
864 self
._postprocessor
_hooks
.append(ph
)
865 for pps
in self
._pps
.values():
867 pp
.add_progress_hook(ph
)
869 def _bidi_workaround(self
, message
):
870 if not hasattr(self
, '_output_channel'):
873 assert hasattr(self
, '_output_process')
874 assert isinstance(message
, str)
875 line_count
= message
.count('\n') + 1
876 self
._output
_process
.stdin
.write((message
+ '\n').encode())
877 self
._output
_process
.stdin
.flush()
878 res
= ''.join(self
._output
_channel
.readline().decode()
879 for _
in range(line_count
))
880 return res
[:-len('\n')]
882 def _write_string(self
, message
, out
=None, only_once
=False):
884 if message
in self
._printed
_messages
:
886 self
._printed
_messages
.add(message
)
887 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
889 def to_stdout(self
, message
, skip_eol
=False, quiet
=None):
890 """Print message to stdout"""
891 if quiet
is not None:
892 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
893 'Use "YoutubeDL.to_screen" instead')
894 if skip_eol
is not False:
895 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
896 'Use "YoutubeDL.to_screen" instead')
897 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.out
)
899 def to_screen(self
, message
, skip_eol
=False, quiet
=None, only_once
=False):
900 """Print message to screen if not in quiet mode"""
901 if self
.params
.get('logger'):
902 self
.params
['logger'].debug(message
)
904 if (self
.params
.get('quiet') if quiet
is None else quiet
) and not self
.params
.get('verbose'):
907 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
908 self
._out
_files
.screen
, only_once
=only_once
)
910 def to_stderr(self
, message
, only_once
=False):
911 """Print message to stderr"""
912 assert isinstance(message
, str)
913 if self
.params
.get('logger'):
914 self
.params
['logger'].error(message
)
916 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.error
, only_once
=only_once
)
918 def _send_console_code(self
, code
):
919 if compat_os_name
== 'nt' or not self
._out
_files
.console
:
921 self
._write
_string
(code
, self
._out
_files
.console
)
923 def to_console_title(self
, message
):
924 if not self
.params
.get('consoletitle', False):
926 message
= remove_terminal_sequences(message
)
927 if compat_os_name
== 'nt':
928 if ctypes
.windll
.kernel32
.GetConsoleWindow():
929 # c_wchar_p() might not be necessary if `message` is
930 # already of type unicode()
931 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
933 self
._send
_console
_code
(f
'\033]0;{message}\007')
935 def save_console_title(self
):
936 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
938 self
._send
_console
_code
('\033[22;0t') # Save the title on stack
940 def restore_console_title(self
):
941 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
943 self
._send
_console
_code
('\033[23;0t') # Restore the title from stack
946 self
.save_console_title()
949 def save_cookies(self
):
950 if self
.params
.get('cookiefile') is not None:
951 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
953 def __exit__(self
, *args
):
954 self
.restore_console_title()
959 self
._request
_director
.close()
961 def trouble(self
, message
=None, tb
=None, is_error
=True):
962 """Determine action to take when a download problem appears.
964 Depending on if the downloader has been configured to ignore
965 download errors or not, this method may throw an exception or
966 not when errors are found, after printing the message.
968 @param tb If given, is additional traceback information
969 @param is_error Whether to raise error according to ignorerrors
971 if message
is not None:
972 self
.to_stderr(message
)
973 if self
.params
.get('verbose'):
975 if sys
.exc_info()[0]: # if .trouble has been called from an except block
977 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
978 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
979 tb
+= encode_compat_str(traceback
.format_exc())
981 tb_data
= traceback
.format_list(traceback
.extract_stack())
982 tb
= ''.join(tb_data
)
987 if not self
.params
.get('ignoreerrors'):
988 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
989 exc_info
= sys
.exc_info()[1].exc_info
991 exc_info
= sys
.exc_info()
992 raise DownloadError(message
, exc_info
)
993 self
._download
_retcode
= 1
997 EMPHASIS
='light blue',
1002 BAD_FORMAT
='light red',
1004 SUPPRESS
='light black',
1007 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
1010 original_text
= text
1011 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1012 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', None) or 'ascii'
1013 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
1014 if fallback
is not None and text
!= original_text
:
1016 return format_text(text
, f
) if allow_colors
is True else text
if fallback
is None else fallback
1018 def _format_out(self
, *args
, **kwargs
):
1019 return self
._format
_text
(self
._out
_files
.out
, self
._allow
_colors
.out
, *args
, **kwargs
)
1021 def _format_screen(self
, *args
, **kwargs
):
1022 return self
._format
_text
(self
._out
_files
.screen
, self
._allow
_colors
.screen
, *args
, **kwargs
)
1024 def _format_err(self
, *args
, **kwargs
):
1025 return self
._format
_text
(self
._out
_files
.error
, self
._allow
_colors
.error
, *args
, **kwargs
)
1027 def report_warning(self
, message
, only_once
=False):
1029 Print the message to stderr, it will be prefixed with 'WARNING:'
1030 If stderr is a tty file the 'WARNING:' will be colored
1032 if self
.params
.get('logger') is not None:
1033 self
.params
['logger'].warning(message
)
1035 if self
.params
.get('no_warnings'):
1037 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
1039 def deprecation_warning(self
, message
, *, stacklevel
=0):
1040 deprecation_warning(
1041 message
, stacklevel
=stacklevel
+ 1, printer
=self
.report_error
, is_error
=False)
1043 def deprecated_feature(self
, message
):
1044 if self
.params
.get('logger') is not None:
1045 self
.params
['logger'].warning(f
'Deprecated Feature: {message}')
1046 self
.to_stderr(f
'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1048 def report_error(self
, message
, *args
, **kwargs
):
1050 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1051 in red if stderr is a tty file.
1053 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
1055 def write_debug(self
, message
, only_once
=False):
1056 '''Log debug message or Print message to stderr'''
1057 if not self
.params
.get('verbose', False):
1059 message
= f
'[debug] {message}'
1060 if self
.params
.get('logger'):
1061 self
.params
['logger'].debug(message
)
1063 self
.to_stderr(message
, only_once
)
1065 def report_file_already_downloaded(self
, file_name
):
1066 """Report file has already been fully downloaded."""
1068 self
.to_screen('[download] %s has already been downloaded' % file_name
)
1069 except UnicodeEncodeError:
1070 self
.to_screen('[download] The file has already been downloaded')
1072 def report_file_delete(self
, file_name
):
1073 """Report that existing file will be deleted."""
1075 self
.to_screen('Deleting existing file %s' % file_name
)
1076 except UnicodeEncodeError:
1077 self
.to_screen('Deleting existing file')
1079 def raise_no_formats(self
, info
, forced
=False, *, msg
=None):
1080 has_drm
= info
.get('_has_drm')
1081 ignored
, expected
= self
.params
.get('ignore_no_formats_error'), bool(msg
)
1082 msg
= msg
or has_drm
and 'This video is DRM protected' or 'No video formats found!'
1083 if forced
or not ignored
:
1084 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
1085 expected
=has_drm
or ignored
or expected
)
1087 self
.report_warning(msg
)
1089 def parse_outtmpl(self
):
1090 self
.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1091 self
._parse
_outtmpl
()
1092 return self
.params
['outtmpl']
1094 def _parse_outtmpl(self
):
1096 if self
.params
.get('restrictfilenames'): # Remove spaces in the default template
1097 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
1099 outtmpl
= self
.params
.setdefault('outtmpl', {})
1100 if not isinstance(outtmpl
, dict):
1101 self
.params
['outtmpl'] = outtmpl
= {'default': outtmpl}
1102 outtmpl
.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}
)
1104 def get_output_path(self
, dir_type
='', filename
=None):
1105 paths
= self
.params
.get('paths', {})
1106 assert isinstance(paths
, dict), '"paths" parameter must be a dictionary'
1107 path
= os
.path
.join(
1108 expand_path(paths
.get('home', '').strip()),
1109 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
1111 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1114 def _outtmpl_expandpath(outtmpl
):
1115 # expand_path translates '%%' into '%' and '$$' into '$'
1116 # correspondingly that is not what we want since we need to keep
1117 # '%%' intact for template dict substitution step. Working around
1118 # with boundary-alike separator hack.
1119 sep
= ''.join(random
.choices(string
.ascii_letters
, k
=32))
1120 outtmpl
= outtmpl
.replace('%%', f
'%{sep}%').replace('$$', f
'${sep}$')
1122 # outtmpl should be expand_path'ed before template dict substitution
1123 # because meta fields may contain env variables we don't want to
1124 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1125 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1126 return expand_path(outtmpl
).replace(sep
, '')
1129 def escape_outtmpl(outtmpl
):
1130 ''' Escape any remaining strings like %s, %abc% etc. '''
1132 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1133 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1137 def validate_outtmpl(cls
, outtmpl
):
1138 ''' @return None or Exception object '''
1140 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljhqBUDS]'),
1141 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1142 cls
._outtmpl
_expandpath
(outtmpl
))
1144 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1146 except ValueError as err
:
1150 def _copy_infodict(info_dict
):
1151 info_dict
= dict(info_dict
)
1152 info_dict
.pop('__postprocessors', None)
1153 info_dict
.pop('__pending_error', None)
1156 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1157 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1158 @param sanitize Whether to sanitize the output as a filename.
1159 For backward compatibility, a function can also be passed
1162 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1164 info_dict
= self
._copy
_infodict
(info_dict
)
1165 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1166 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1167 if info_dict
.get('duration', None) is not None
1169 info_dict
['autonumber'] = int(self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
)
1170 info_dict
['video_autonumber'] = self
._num
_videos
1171 if info_dict
.get('resolution') is None:
1172 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1174 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1175 # of %(field)s to %(field)0Nd for backward compatibility
1176 field_size_compat_map
= {
1177 'playlist_index': number_of_digits(info_dict
.get('__last_playlist_index') or 0),
1178 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1179 'autonumber': self
.params
.get('autonumber_size') or 5,
1183 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1188 # Field is of the form key1.key2...
1189 # where keys (except first) can be string, int, slice or "{field, ...}"
1190 FIELD_INNER_RE
= r
'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1191 FIELD_RE
= r
'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1192 'inner': FIELD_INNER_RE
,
1193 'field': rf
'\w*(?:\.{FIELD_INNER_RE})*'
1195 MATH_FIELD_RE
= rf
'(?:{FIELD_RE}|-?{NUMBER_RE})'
1196 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
1197 INTERNAL_FORMAT_RE
= re
.compile(rf
'''(?xs)
1199 (?P<fields>{FIELD_RE})
1200 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1201 (?:>(?P<strf_format>.+?))?
1203 (?P<alternate>(?<!\\),[^|&)]+)?
1204 (?:&(?P<replacement>.*?))?
1205 (?:\|(?P<default>.*?))?
1208 def _traverse_infodict(fields
):
1209 fields
= [f
for x
in re
.split(r
'\.({.+?})\.?', fields
)
1210 for f
in ([x
] if x
.startswith('{') else x
.split('.'))]
1212 if fields
and not fields
[i
]:
1215 for i
, f
in enumerate(fields
):
1216 if not f
.startswith('{'):
1218 assert f
.endswith('}'), f
'No closing brace for {f} in {fields}'
1219 fields
[i
] = {k: k.split('.') for k in f[1:-1].split(',')}
1221 return traverse_obj(info_dict
, fields
, is_user_input
=True, traverse_string
=True)
1223 def get_value(mdict
):
1225 value
= _traverse_infodict(mdict
['fields'])
1228 value
= float_or_none(value
)
1229 if value
is not None:
1232 offset_key
= mdict
['maths']
1234 value
= float_or_none(value
)
1238 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1239 offset_key
).group(0)
1240 offset_key
= offset_key
[len(item
):]
1241 if operator
is None:
1242 operator
= MATH_FUNCTIONS
[item
]
1244 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1245 offset
= float_or_none(item
)
1247 offset
= float_or_none(_traverse_infodict(item
))
1249 value
= operator(value
, multiplier
* offset
)
1250 except (TypeError, ZeroDivisionError):
1253 # Datetime formatting
1254 if mdict
['strf_format']:
1255 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1257 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1258 if sanitize
and value
== '':
1262 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1264 def filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames')):
1265 return sanitize_filename(str(value
), restricted
=restricted
, is_id
=(
1266 bool(re
.search(r
'(^|[_.])id(\.|$)', key
))
1267 if 'filename-sanitization' in self
.params
['compat_opts']
1270 sanitizer
= sanitize
if callable(sanitize
) else filename_sanitizer
1271 sanitize
= bool(sanitize
)
1273 def _dumpjson_default(obj
):
1274 if isinstance(obj
, (set, LazyList
)):
1278 class _ReplacementFormatter(string
.Formatter
):
1279 def get_field(self
, field_name
, args
, kwargs
):
1280 if field_name
.isdigit():
1282 raise ValueError('Unsupported field')
1284 replacement_formatter
= _ReplacementFormatter()
1286 def create_key(outer_mobj
):
1287 if not outer_mobj
.group('has_key'):
1288 return outer_mobj
.group(0)
1289 key
= outer_mobj
.group('key')
1290 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1291 value
, replacement
, default
, last_field
= None, None, na
, ''
1293 mobj
= mobj
.groupdict()
1294 default
= mobj
['default'] if mobj
['default'] is not None else default
1295 value
= get_value(mobj
)
1296 last_field
, replacement
= mobj
['fields'], mobj
['replacement']
1297 if value
is None and mobj
['alternate']:
1298 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['remaining'][1:])
1302 fmt
= outer_mobj
.group('format')
1303 if fmt
== 's' and value
is not None and last_field
in field_size_compat_map
.keys():
1304 fmt
= f
'0{field_size_compat_map[last_field]:d}d'
1306 if None not in (value
, replacement
):
1308 value
= replacement_formatter
.format(replacement
, value
)
1310 value
, default
= None, na
1312 flags
= outer_mobj
.group('conversion') or ''
1313 str_fmt
= f
'{fmt[:-1]}s'
1315 value
, fmt
= default
, 's'
1316 elif fmt
[-1] == 'l': # list
1317 delim
= '\n' if '#' in flags
else ', '
1318 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes)))), str_fmt
1319 elif fmt
[-1] == 'j': # json
1320 value
, fmt
= json
.dumps(
1321 value
, default
=_dumpjson_default
,
1322 indent
=4 if '#' in flags
else None, ensure_ascii
='+' not in flags
), str_fmt
1323 elif fmt
[-1] == 'h': # html
1324 value
, fmt
= escapeHTML(str(value
)), str_fmt
1325 elif fmt
[-1] == 'q': # quoted
1326 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1327 value
, fmt
= ' '.join(map(compat_shlex_quote
, value
)), str_fmt
1328 elif fmt
[-1] == 'B': # bytes
1329 value
= f
'%{str_fmt}'.encode() % str(value
).encode()
1330 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1331 elif fmt
[-1] == 'U': # unicode normalized
1332 value
, fmt
= unicodedata
.normalize(
1333 # "+" = compatibility equivalence, "#" = NFD
1334 'NF%s%s' % ('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1336 elif fmt
[-1] == 'D': # decimal suffix
1337 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1338 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1339 factor
=1024 if '#' in flags
else 1000)
1340 elif fmt
[-1] == 'S': # filename sanitization
1341 value
, fmt
= filename_sanitizer(last_field
, value
, restricted
='#' in flags
), str_fmt
1342 elif fmt
[-1] == 'c':
1344 value
= str(value
)[0]
1347 elif fmt
[-1] not in 'rsa': # numeric
1348 value
= float_or_none(value
)
1350 value
, fmt
= default
, 's'
1353 # If value is an object, sanitize might convert it to a string
1354 # So we convert it to repr first
1356 value
, fmt
= repr(value
), str_fmt
1357 elif fmt
[-1] == 'a':
1358 value
, fmt
= ascii(value
), str_fmt
1359 if fmt
[-1] in 'csra':
1360 value
= sanitizer(last_field
, value
)
1362 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1363 TMPL_DICT
[key
] = value
1364 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1366 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1368 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1369 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1370 return self
.escape_outtmpl(outtmpl
) % info_dict
1372 def _prepare_filename(self
, info_dict
, *, outtmpl
=None, tmpl_type
=None):
1373 assert None in (outtmpl
, tmpl_type
), 'outtmpl and tmpl_type are mutually exclusive'
1375 outtmpl
= self
.params
['outtmpl'].get(tmpl_type
or 'default', self
.params
['outtmpl']['default'])
1377 outtmpl
= self
._outtmpl
_expandpath
(outtmpl
)
1378 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1382 if tmpl_type
in ('', 'temp'):
1383 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1384 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1385 filename
= replace_extension(filename
, ext
, final_ext
)
1387 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1389 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1391 # https://github.com/blackjack4494/youtube-dlc/issues/85
1392 trim_file_name
= self
.params
.get('trim_file_name', False)
1394 no_ext
, *ext
= filename
.rsplit('.', 2)
1395 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1398 except ValueError as err
:
1399 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1402 def prepare_filename(self
, info_dict
, dir_type
='', *, outtmpl
=None, warn
=False):
1403 """Generate the output filename"""
1405 assert not dir_type
, 'outtmpl and dir_type are mutually exclusive'
1407 filename
= self
._prepare
_filename
(info_dict
, tmpl_type
=dir_type
, outtmpl
=outtmpl
)
1408 if not filename
and dir_type
not in ('', 'temp'):
1412 if not self
.params
.get('paths'):
1414 elif filename
== '-':
1415 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1416 elif os
.path
.isabs(filename
):
1417 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1418 if filename
== '-' or not filename
:
1421 return self
.get_output_path(dir_type
, filename
)
1423 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1424 """Returns None if the file should be downloaded"""
1425 _type
= 'video' if 'playlist-match-filter' in self
.params
['compat_opts'] else info_dict
.get('_type', 'video')
1426 assert incomplete
or _type
== 'video', 'Only video result can be considered complete'
1428 video_title
= info_dict
.get('title', info_dict
.get('id', 'entry'))
1431 if _type
in ('playlist', 'multi_video'):
1433 elif _type
in ('url', 'url_transparent') and not try_call(
1434 lambda: self
.get_info_extractor(info_dict
['ie_key']).is_single_video(info_dict
['url'])):
1437 if 'title' in info_dict
:
1438 # This can happen when we're just evaluating the playlist
1439 title
= info_dict
['title']
1440 matchtitle
= self
.params
.get('matchtitle', False)
1442 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1443 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1444 rejecttitle
= self
.params
.get('rejecttitle', False)
1446 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1447 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1449 date
= info_dict
.get('upload_date')
1450 if date
is not None:
1451 dateRange
= self
.params
.get('daterange', DateRange())
1452 if date
not in dateRange
:
1453 return f
'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1454 view_count
= info_dict
.get('view_count')
1455 if view_count
is not None:
1456 min_views
= self
.params
.get('min_views')
1457 if min_views
is not None and view_count
< min_views
:
1458 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1459 max_views
= self
.params
.get('max_views')
1460 if max_views
is not None and view_count
> max_views
:
1461 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1462 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1463 return 'Skipping "%s" because it is age restricted' % video_title
1465 match_filter
= self
.params
.get('match_filter')
1466 if match_filter
is None:
1472 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1474 # For backward compatibility
1475 ret
= None if incomplete
else match_filter(info_dict
)
1476 except DownloadCancelled
as err
:
1477 if err
.msg
is not NO_DEFAULT
:
1479 ret
, cancelled
= err
.msg
, err
1481 if ret
is NO_DEFAULT
:
1483 filename
= self
._format
_screen
(self
.prepare_filename(info_dict
), self
.Styles
.FILENAME
)
1484 reply
= input(self
._format
_screen
(
1485 f
'Download "{filename}"? (Y/n): ', self
.Styles
.EMPHASIS
)).lower().strip()
1486 if reply
in {'y', ''}
:
1490 raise type(cancelled
)(f
'Skipping {video_title}')
1491 return f
'Skipping {video_title}'
1494 if self
.in_download_archive(info_dict
):
1495 reason
= '%s has already been recorded in the archive' % video_title
1496 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1499 reason
= check_filter()
1500 except DownloadCancelled
as e
:
1501 reason
, break_opt
, break_err
= e
.msg
, 'match_filter', type(e
)
1503 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1504 if reason
is not None:
1506 self
.to_screen('[download] ' + reason
)
1507 if self
.params
.get(break_opt
, False):
1512 def add_extra_info(info_dict
, extra_info
):
1513 '''Set the keys from extra_info in info dict if they are missing'''
1514 for key
, value
in extra_info
.items():
1515 info_dict
.setdefault(key
, value
)
1517 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1518 process
=True, force_generic_extractor
=False):
1520 Extract and return the information dictionary of the URL
1523 @param url URL to extract
1526 @param download Whether to download videos
1527 @param process Whether to resolve all unresolved references (URLs, playlist items).
1528 Must be True for download to work
1529 @param ie_key Use only the extractor with this key
1531 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1532 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1535 if extra_info
is None:
1538 if not ie_key
and force_generic_extractor
:
1542 ies
= {ie_key: self._ies[ie_key]}
if ie_key
in self
._ies
else {}
1546 for key
, ie
in ies
.items():
1547 if not ie
.suitable(url
):
1550 if not ie
.working():
1551 self
.report_warning('The program functionality for this site has been marked as broken, '
1552 'and will probably not work.')
1554 temp_id
= ie
.get_temp_id(url
)
1555 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': key}
):
1556 self
.to_screen(f
'[{key}] {temp_id}: has already been recorded in the archive')
1557 if self
.params
.get('break_on_existing', False):
1558 raise ExistingVideoReached()
1560 return self
.__extract
_info
(url
, self
.get_info_extractor(key
), download
, extra_info
, process
)
1562 extractors_restricted
= self
.params
.get('allowed_extractors') not in (None, ['default'])
1563 self
.report_error(f
'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1564 tb
=False if extractors_restricted
else None)
1566 def _handle_extraction_exceptions(func
):
1567 @functools.wraps(func
)
1568 def wrapper(self
, *args
, **kwargs
):
1571 return func(self
, *args
, **kwargs
)
1572 except (DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1574 except ReExtractInfo
as e
:
1576 self
.to_screen(f
'{e}; Re-extracting data')
1578 self
.to_stderr('\r')
1579 self
.report_warning(f
'{e}; Re-extracting data')
1581 except GeoRestrictedError
as e
:
1584 msg
+= '\nThis video is available in %s.' % ', '.join(
1585 map(ISO3166Utils
.short2full
, e
.countries
))
1586 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1587 self
.report_error(msg
)
1588 except ExtractorError
as e
: # An error we somewhat expected
1589 self
.report_error(str(e
), e
.format_traceback())
1590 except Exception as e
:
1591 if self
.params
.get('ignoreerrors'):
1592 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1598 def _wait_for_video(self
, ie_result
={}):
1599 if (not self
.params
.get('wait_for_video')
1600 or ie_result
.get('_type', 'video') != 'video'
1601 or ie_result
.get('formats') or ie_result
.get('url')):
1604 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1609 full_msg
= f
'{msg}\n'
1610 if not self
.params
.get('noprogress'):
1611 full_msg
= msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r'
1614 self
.to_screen(full_msg
, skip_eol
=True)
1617 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1618 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1619 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1620 diff
= round(random
.uniform(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
), 0)
1621 self
.report_warning('Release time of video is not known')
1622 elif ie_result
and (diff
or 0) <= 0:
1623 self
.report_warning('Video should already be available according to extracted info')
1624 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1625 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1627 wait_till
= time
.time() + diff
1630 diff
= wait_till
- time
.time()
1633 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1634 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1636 except KeyboardInterrupt:
1638 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1639 except BaseException
as e
:
1640 if not isinstance(e
, ReExtractInfo
):
1644 def _load_cookies(self
, data
, *, from_headers
=True):
1645 """Loads cookies from a `Cookie` header
1647 This tries to work around the security vulnerability of passing cookies to every domain.
1648 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1649 The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1651 @param data The Cookie header as string to load the cookies from
1652 @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1654 for cookie
in LenientSimpleCookie(data
).values():
1655 if from_headers
and any(cookie
.values()):
1656 raise ValueError('Invalid syntax in Cookie Header')
1658 domain
= cookie
.get('domain') or ''
1659 expiry
= cookie
.get('expires')
1660 if expiry
== '': # 0 is valid
1662 prepared_cookie
= http
.cookiejar
.Cookie(
1663 cookie
.get('version') or 0, cookie
.key
, cookie
.value
, None, False,
1664 domain
, True, True, cookie
.get('path') or '', bool(cookie
.get('path')),
1665 cookie
.get('secure') or False, expiry
, False, None, None, {})
1668 self
.cookiejar
.set_cookie(prepared_cookie
)
1670 self
.deprecated_feature(
1671 'Passing cookies as a header is a potential security risk; '
1672 'they will be scoped to the domain of the downloaded urls. '
1673 'Please consider loading cookies from a file or browser instead.')
1674 self
.__header
_cookies
.append(prepared_cookie
)
1676 self
.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1677 tb
=False, is_error
=False)
1679 def _apply_header_cookies(self
, url
):
1680 """Applies stray header cookies to the provided url
1682 This loads header cookies and scopes them to the domain provided in `url`.
1683 While this is not ideal, it helps reduce the risk of them being sent
1684 to an unintended destination while mostly maintaining compatibility.
1686 parsed
= urllib
.parse
.urlparse(url
)
1687 if not parsed
.hostname
:
1690 for cookie
in map(copy
.copy
, self
.__header
_cookies
):
1691 cookie
.domain
= f
'.{parsed.hostname}'
1692 self
.cookiejar
.set_cookie(cookie
)
1694 @_handle_extraction_exceptions
1695 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1696 self
._apply
_header
_cookies
(url
)
1699 ie_result
= ie
.extract(url
)
1700 except UserNotLive
as e
:
1702 if self
.params
.get('wait_for_video'):
1703 self
.report_warning(e
)
1704 self
._wait
_for
_video
()
1706 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1707 self
.report_warning(f
'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1709 if isinstance(ie_result
, list):
1710 # Backwards compatibility: old IE result format
1712 '_type': 'compat_list',
1713 'entries': ie_result
,
1715 if extra_info
.get('original_url'):
1716 ie_result
.setdefault('original_url', extra_info
['original_url'])
1717 self
.add_default_extra_info(ie_result
, ie
, url
)
1719 self
._wait
_for
_video
(ie_result
)
1720 return self
.process_ie_result(ie_result
, download
, extra_info
)
1724 def add_default_extra_info(self
, ie_result
, ie
, url
):
1726 self
.add_extra_info(ie_result
, {
1728 'original_url': url
,
1730 webpage_url
= ie_result
.get('webpage_url')
1732 self
.add_extra_info(ie_result
, {
1733 'webpage_url_basename': url_basename(webpage_url
),
1734 'webpage_url_domain': get_domain(webpage_url
),
1737 self
.add_extra_info(ie_result
, {
1738 'extractor': ie
.IE_NAME
,
1739 'extractor_key': ie
.ie_key(),
1742 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1744 Take the result of the ie(may be modified) and resolve all unresolved
1745 references (URLs, playlist items).
1747 It will also download the videos if 'download'.
1748 Returns the resolved ie_result.
1750 if extra_info
is None:
1752 result_type
= ie_result
.get('_type', 'video')
1754 if result_type
in ('url', 'url_transparent'):
1755 ie_result
['url'] = sanitize_url(
1756 ie_result
['url'], scheme
='http' if self
.params
.get('prefer_insecure') else 'https')
1757 if ie_result
.get('original_url') and not extra_info
.get('original_url'):
1758 extra_info
= {'original_url': ie_result['original_url'], **extra_info}
1760 extract_flat
= self
.params
.get('extract_flat', False)
1761 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1762 or extract_flat
is True):
1763 info_copy
= ie_result
.copy()
1764 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1765 if ie
and not ie_result
.get('id'):
1766 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1767 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1768 self
.add_extra_info(info_copy
, extra_info
)
1769 info_copy
, _
= self
.pre_process(info_copy
)
1770 self
._fill
_common
_fields
(info_copy
, False)
1771 self
.__forced
_printings
(info_copy
)
1772 self
._raise
_pending
_errors
(info_copy
)
1773 if self
.params
.get('force_write_download_archive', False):
1774 self
.record_download_archive(info_copy
)
1777 if result_type
== 'video':
1778 self
.add_extra_info(ie_result
, extra_info
)
1779 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1780 self
._raise
_pending
_errors
(ie_result
)
1781 additional_urls
= (ie_result
or {}).get('additional_urls')
1783 # TODO: Improve MetadataParserPP to allow setting a list
1784 if isinstance(additional_urls
, str):
1785 additional_urls
= [additional_urls
]
1787 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1788 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1789 ie_result
['additional_entries'] = [
1791 url
, download
, extra_info
=extra_info
,
1792 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1793 for url
in additional_urls
1796 elif result_type
== 'url':
1797 # We have to add extra_info to the results because it may be
1798 # contained in a playlist
1799 return self
.extract_info(
1800 ie_result
['url'], download
,
1801 ie_key
=ie_result
.get('ie_key'),
1802 extra_info
=extra_info
)
1803 elif result_type
== 'url_transparent':
1804 # Use the information from the embedding page
1805 info
= self
.extract_info(
1806 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1807 extra_info
=extra_info
, download
=False, process
=False)
1809 # extract_info may return None when ignoreerrors is enabled and
1810 # extraction failed with an error, don't crash and return early
1815 exempted_fields
= {'_type', 'url', 'ie_key'}
1816 if not ie_result
.get('section_end') and ie_result
.get('section_start') is None:
1817 # For video clips, the id etc of the clip extractor should be used
1818 exempted_fields |
= {'id', 'extractor', 'extractor_key'}
1820 new_result
= info
.copy()
1821 new_result
.update(filter_dict(ie_result
, lambda k
, v
: v
is not None and k
not in exempted_fields
))
1823 # Extracted info may not be a video result (i.e.
1824 # info.get('_type', 'video') != video) but rather an url or
1825 # url_transparent. In such cases outer metadata (from ie_result)
1826 # should be propagated to inner one (info). For this to happen
1827 # _type of info should be overridden with url_transparent. This
1828 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1829 if new_result
.get('_type') == 'url':
1830 new_result
['_type'] = 'url_transparent'
1832 return self
.process_ie_result(
1833 new_result
, download
=download
, extra_info
=extra_info
)
1834 elif result_type
in ('playlist', 'multi_video'):
1835 # Protect from infinite recursion due to recursively nested playlists
1836 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1837 webpage_url
= ie_result
.get('webpage_url') # Playlists maynot have webpage_url
1838 if webpage_url
and webpage_url
in self
._playlist
_urls
:
1840 '[download] Skipping already downloaded playlist: %s'
1841 % ie_result
.get('title') or ie_result
.get('id'))
1844 self
._playlist
_level
+= 1
1845 self
._playlist
_urls
.add(webpage_url
)
1846 self
._fill
_common
_fields
(ie_result
, False)
1847 self
._sanitize
_thumbnails
(ie_result
)
1849 return self
.__process
_playlist
(ie_result
, download
)
1851 self
._playlist
_level
-= 1
1852 if not self
._playlist
_level
:
1853 self
._playlist
_urls
.clear()
1854 elif result_type
== 'compat_list':
1855 self
.report_warning(
1856 'Extractor %s returned a compat_list result. '
1857 'It needs to be updated.' % ie_result
.get('extractor'))
1860 self
.add_extra_info(r
, {
1861 'extractor': ie_result
['extractor'],
1862 'webpage_url': ie_result
['webpage_url'],
1863 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1864 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1865 'extractor_key': ie_result
['extractor_key'],
1868 ie_result
['entries'] = [
1869 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1870 for r
in ie_result
['entries']
1874 raise Exception('Invalid result type: %s' % result_type
)
1876 def _ensure_dir_exists(self
, path
):
1877 return make_dir(path
, self
.report_error
)
1880 def _playlist_infodict(ie_result
, strict
=False, **kwargs
):
1882 'playlist_count': ie_result
.get('playlist_count'),
1883 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1884 'playlist_id': ie_result
.get('id'),
1885 'playlist_title': ie_result
.get('title'),
1886 'playlist_uploader': ie_result
.get('uploader'),
1887 'playlist_uploader_id': ie_result
.get('uploader_id'),
1892 if ie_result
.get('webpage_url'):
1894 'webpage_url': ie_result
['webpage_url'],
1895 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1896 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1900 'playlist_index': 0,
1901 '__last_playlist_index': max(ie_result
.get('requested_entries') or (0, 0)),
1902 'extractor': ie_result
['extractor'],
1903 'extractor_key': ie_result
['extractor_key'],
1906 def __process_playlist(self
, ie_result
, download
):
1907 """Process each entry in the playlist"""
1908 assert ie_result
['_type'] in ('playlist', 'multi_video')
1910 common_info
= self
._playlist
_infodict
(ie_result
, strict
=True)
1911 title
= common_info
.get('playlist') or '<Untitled>'
1912 if self
._match
_entry
(common_info
, incomplete
=True) is not None:
1914 self
.to_screen(f
'[download] Downloading {ie_result["_type"]}: {title}')
1916 all_entries
= PlaylistEntries(self
, ie_result
)
1917 entries
= orderedSet(all_entries
.get_requested_items(), lazy
=True)
1919 lazy
= self
.params
.get('lazy_playlist')
1921 resolved_entries
, n_entries
= [], 'N/A'
1922 ie_result
['requested_entries'], ie_result
['entries'] = None, None
1924 entries
= resolved_entries
= list(entries
)
1925 n_entries
= len(resolved_entries
)
1926 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
1927 if not ie_result
.get('playlist_count'):
1928 # Better to do this after potentially exhausting entries
1929 ie_result
['playlist_count'] = all_entries
.get_full_count()
1931 extra
= self
._playlist
_infodict
(ie_result
, n_entries
=int_or_none(n_entries
))
1932 ie_copy
= collections
.ChainMap(ie_result
, extra
)
1934 _infojson_written
= False
1935 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
1936 if write_playlist_files
and self
.params
.get('list_thumbnails'):
1937 self
.list_thumbnails(ie_result
)
1938 if write_playlist_files
and not self
.params
.get('simulate'):
1939 _infojson_written
= self
._write
_info
_json
(
1940 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
1941 if _infojson_written
is None:
1943 if self
._write
_description
('playlist', ie_result
,
1944 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1946 # TODO: This should be passed to ThumbnailsConvertor if necessary
1947 self
._write
_thumbnails
('playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1950 if self
.params
.get('playlistreverse') or self
.params
.get('playlistrandom'):
1951 self
.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once
=True)
1952 elif self
.params
.get('playlistreverse'):
1954 elif self
.params
.get('playlistrandom'):
1955 random
.shuffle(entries
)
1957 self
.to_screen(f
'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1958 f
'{format_field(ie_result, "playlist_count", " of %s")}')
1960 keep_resolved_entries
= self
.params
.get('extract_flat') != 'discard'
1961 if self
.params
.get('extract_flat') == 'discard_in_playlist':
1962 keep_resolved_entries
= ie_result
['_type'] != 'playlist'
1963 if keep_resolved_entries
:
1964 self
.write_debug('The information of all playlist entries will be held in memory')
1967 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1968 for i
, (playlist_index
, entry
) in enumerate(entries
):
1970 resolved_entries
.append((playlist_index
, entry
))
1974 entry
['__x_forwarded_for_ip'] = ie_result
.get('__x_forwarded_for_ip')
1975 if not lazy
and 'playlist-index' in self
.params
['compat_opts']:
1976 playlist_index
= ie_result
['requested_entries'][i
]
1978 entry_copy
= collections
.ChainMap(entry
, {
1980 'n_entries': int_or_none(n_entries
),
1981 'playlist_index': playlist_index
,
1982 'playlist_autonumber': i
+ 1,
1985 if self
._match
_entry
(entry_copy
, incomplete
=True) is not None:
1986 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1987 resolved_entries
[i
] = (playlist_index
, NO_DEFAULT
)
1990 self
.to_screen('[download] Downloading item %s of %s' % (
1991 self
._format
_screen
(i
+ 1, self
.Styles
.ID
), self
._format
_screen
(n_entries
, self
.Styles
.EMPHASIS
)))
1993 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, collections
.ChainMap({
1994 'playlist_index': playlist_index
,
1995 'playlist_autonumber': i
+ 1,
1997 if not entry_result
:
1999 if failures
>= max_failures
:
2001 f
'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2003 if keep_resolved_entries
:
2004 resolved_entries
[i
] = (playlist_index
, entry_result
)
2006 # Update with processed data
2007 ie_result
['entries'] = [e
for _
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2008 ie_result
['requested_entries'] = [i
for i
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2009 if ie_result
['requested_entries'] == try_call(lambda: list(range(1, ie_result
['playlist_count'] + 1))):
2010 # Do not set for full playlist
2011 ie_result
.pop('requested_entries')
2013 # Write the updated info to json
2014 if _infojson_written
is True and self
._write
_info
_json
(
2015 'updated playlist', ie_result
,
2016 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
2019 ie_result
= self
.run_all_pps('playlist', ie_result
)
2020 self
.to_screen(f
'[download] Finished downloading playlist: {title}')
2023 @_handle_extraction_exceptions
2024 def __process_iterable_entry(self
, entry
, download
, extra_info
):
2025 return self
.process_ie_result(
2026 entry
, download
=download
, extra_info
=extra_info
)
2028 def _build_format_filter(self
, filter_spec
):
2029 " Returns a function to filter the formats according to the filter_spec "
2039 operator_rex
= re
.compile(r
'''(?x)\s*
2041 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2042 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2043 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
2044 m
= operator_rex
.fullmatch(filter_spec
)
2047 comparison_value
= int(m
.group('value'))
2049 comparison_value
= parse_filesize(m
.group('value'))
2050 if comparison_value
is None:
2051 comparison_value
= parse_filesize(m
.group('value') + 'B')
2052 if comparison_value
is None:
2054 'Invalid value %r in format specification %r' % (
2055 m
.group('value'), filter_spec
))
2056 op
= OPERATORS
[m
.group('op')]
2061 '^=': lambda attr
, value
: attr
.startswith(value
),
2062 '$=': lambda attr
, value
: attr
.endswith(value
),
2063 '*=': lambda attr
, value
: value
in attr
,
2064 '~=': lambda attr
, value
: value
.search(attr
) is not None
2066 str_operator_rex
= re
.compile(r
'''(?x)\s*
2067 (?P<key>[a-zA-Z0-9._-]+)\s*
2068 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2070 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2071 (?(quote)(?P=quote))\s*
2072 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
2073 m
= str_operator_rex
.fullmatch(filter_spec
)
2075 if m
.group('op') == '~=':
2076 comparison_value
= re
.compile(m
.group('value'))
2078 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
2079 str_op
= STR_OPERATORS
[m
.group('op')]
2080 if m
.group('negation'):
2081 op
= lambda attr
, value
: not str_op(attr
, value
)
2086 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
2089 actual_value
= f
.get(m
.group('key'))
2090 if actual_value
is None:
2091 return m
.group('none_inclusive')
2092 return op(actual_value
, comparison_value
)
2095 def _check_formats(self
, formats
):
2097 self
.to_screen('[info] Testing format %s' % f
['format_id'])
2098 path
= self
.get_output_path('temp')
2099 if not self
._ensure
_dir
_exists
(f
'{path}/'):
2101 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
2104 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
2105 except (DownloadError
, OSError, ValueError) + network_exceptions
:
2108 if os
.path
.exists(temp_file
.name
):
2110 os
.remove(temp_file
.name
)
2112 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
2116 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
2118 def _default_format_spec(self
, info_dict
, download
=True):
2121 merger
= FFmpegMergerPP(self
)
2122 return merger
.available
and merger
.can_merge()
2125 not self
.params
.get('simulate')
2129 or info_dict
.get('is_live') and not self
.params
.get('live_from_start')
2130 or self
.params
['outtmpl']['default'] == '-'))
2133 or self
.params
.get('allow_multiple_audio_streams', False)
2134 or 'format-spec' in self
.params
['compat_opts'])
2137 'best/bestvideo+bestaudio' if prefer_best
2138 else 'bestvideo*+bestaudio/best' if not compat
2139 else 'bestvideo+bestaudio/best')
2141 def build_format_selector(self
, format_spec
):
2142 def syntax_error(note
, start
):
2144 'Invalid format specification: '
2145 '{}\n\t{}\n\t{}^'.format(note
, format_spec
, ' ' * start
[1]))
2146 return SyntaxError(message
)
2148 PICKFIRST
= 'PICKFIRST'
2152 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2154 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
2155 'video': self
.params
.get('allow_multiple_video_streams', False)}
2157 def _parse_filter(tokens
):
2159 for type, string_
, start
, _
, _
in tokens
:
2160 if type == tokenize
.OP
and string_
== ']':
2161 return ''.join(filter_parts
)
2163 filter_parts
.append(string_
)
2165 def _remove_unused_ops(tokens
):
2166 # Remove operators that we don't use and join them with the surrounding strings.
2167 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2168 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
2169 last_string
, last_start
, last_end
, last_line
= None, None, None, None
2170 for type, string_
, start
, end
, line
in tokens
:
2171 if type == tokenize
.OP
and string_
== '[':
2173 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2175 yield type, string_
, start
, end
, line
2176 # everything inside brackets will be handled by _parse_filter
2177 for type, string_
, start
, end
, line
in tokens
:
2178 yield type, string_
, start
, end
, line
2179 if type == tokenize
.OP
and string_
== ']':
2181 elif type == tokenize
.OP
and string_
in ALLOWED_OPS
:
2183 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2185 yield type, string_
, start
, end
, line
2186 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
2188 last_string
= string_
2192 last_string
+= string_
2194 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2196 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
2198 current_selector
= None
2199 for type, string_
, start
, _
, _
in tokens
:
2200 # ENCODING is only defined in python 3.x
2201 if type == getattr(tokenize
, 'ENCODING', None):
2203 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
2204 current_selector
= FormatSelector(SINGLE
, string_
, [])
2205 elif type == tokenize
.OP
:
2207 if not inside_group
:
2208 # ')' will be handled by the parentheses group
2209 tokens
.restore_last_token()
2211 elif inside_merge
and string_
in ['/', ',']:
2212 tokens
.restore_last_token()
2214 elif inside_choice
and string_
== ',':
2215 tokens
.restore_last_token()
2217 elif string_
== ',':
2218 if not current_selector
:
2219 raise syntax_error('"," must follow a format selector', start
)
2220 selectors
.append(current_selector
)
2221 current_selector
= None
2222 elif string_
== '/':
2223 if not current_selector
:
2224 raise syntax_error('"/" must follow a format selector', start
)
2225 first_choice
= current_selector
2226 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
2227 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
2228 elif string_
== '[':
2229 if not current_selector
:
2230 current_selector
= FormatSelector(SINGLE
, 'best', [])
2231 format_filter
= _parse_filter(tokens
)
2232 current_selector
.filters
.append(format_filter
)
2233 elif string_
== '(':
2234 if current_selector
:
2235 raise syntax_error('Unexpected "("', start
)
2236 group
= _parse_format_selection(tokens
, inside_group
=True)
2237 current_selector
= FormatSelector(GROUP
, group
, [])
2238 elif string_
== '+':
2239 if not current_selector
:
2240 raise syntax_error('Unexpected "+"', start
)
2241 selector_1
= current_selector
2242 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2244 raise syntax_error('Expected a selector', start
)
2245 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2247 raise syntax_error(f
'Operator not recognized: "{string_}"', start
)
2248 elif type == tokenize
.ENDMARKER
:
2250 if current_selector
:
2251 selectors
.append(current_selector
)
2254 def _merge(formats_pair
):
2255 format_1
, format_2
= formats_pair
2258 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2259 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2261 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2262 get_no_more
= {'video': False, 'audio': False}
2263 for (i
, fmt_info
) in enumerate(formats_info
):
2264 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2267 for aud_vid
in ['audio', 'video']:
2268 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2269 if get_no_more
[aud_vid
]:
2272 get_no_more
[aud_vid
] = True
2274 if len(formats_info
) == 1:
2275 return formats_info
[0]
2277 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2278 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2280 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2281 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2283 output_ext
= get_compatible_ext(
2284 vcodecs
=[f
.get('vcodec') for f
in video_fmts
],
2285 acodecs
=[f
.get('acodec') for f
in audio_fmts
],
2286 vexts
=[f
['ext'] for f
in video_fmts
],
2287 aexts
=[f
['ext'] for f
in audio_fmts
],
2288 preferences
=(try_call(lambda: self
.params
['merge_output_format'].split('/'))
2289 or self
.params
.get('prefer_free_formats') and ('webm', 'mkv')))
2291 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2294 'requested_formats': formats_info
,
2295 'format': '+'.join(filtered('format')),
2296 'format_id': '+'.join(filtered('format_id')),
2298 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2299 'language': '+'.join(orderedSet(filtered('language'))) or None,
2300 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2301 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2302 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2307 'width': the_only_video
.get('width'),
2308 'height': the_only_video
.get('height'),
2309 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2310 'fps': the_only_video
.get('fps'),
2311 'dynamic_range': the_only_video
.get('dynamic_range'),
2312 'vcodec': the_only_video
.get('vcodec'),
2313 'vbr': the_only_video
.get('vbr'),
2314 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2315 'aspect_ratio': the_only_video
.get('aspect_ratio'),
2320 'acodec': the_only_audio
.get('acodec'),
2321 'abr': the_only_audio
.get('abr'),
2322 'asr': the_only_audio
.get('asr'),
2323 'audio_channels': the_only_audio
.get('audio_channels')
2328 def _check_formats(formats
):
2329 if (self
.params
.get('check_formats') is not None
2330 or self
.params
.get('allow_unplayable_formats')):
2333 elif self
.params
.get('check_formats') == 'selected':
2334 yield from self
._check
_formats
(formats
)
2338 if f
.get('has_drm'):
2339 yield from self
._check
_formats
([f
])
2343 def _build_selector_function(selector
):
2344 if isinstance(selector
, list): # ,
2345 fs
= [_build_selector_function(s
) for s
in selector
]
2347 def selector_function(ctx
):
2350 return selector_function
2352 elif selector
.type == GROUP
: # ()
2353 selector_function
= _build_selector_function(selector
.selector
)
2355 elif selector
.type == PICKFIRST
: # /
2356 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2358 def selector_function(ctx
):
2360 picked_formats
= list(f(ctx
))
2362 return picked_formats
2365 elif selector
.type == MERGE
: # +
2366 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2368 def selector_function(ctx
):
2369 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2372 elif selector
.type == SINGLE
: # atom
2373 format_spec
= selector
.selector
or 'best'
2375 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2376 if format_spec
== 'all':
2377 def selector_function(ctx
):
2378 yield from _check_formats(ctx
['formats'][::-1])
2379 elif format_spec
== 'mergeall':
2380 def selector_function(ctx
):
2381 formats
= list(_check_formats(
2382 f
for f
in ctx
['formats'] if f
.get('vcodec') != 'none' or f
.get('acodec') != 'none'))
2385 merged_format
= formats
[-1]
2386 for f
in formats
[-2::-1]:
2387 merged_format
= _merge((merged_format
, f
))
2391 format_fallback
, seperate_fallback
, format_reverse
, format_idx
= False, None, True, 1
2393 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2395 if mobj
is not None:
2396 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2397 format_reverse
= mobj
.group('bw')[0] == 'b'
2398 format_type
= (mobj
.group('type') or [None])[0]
2399 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
2400 format_modified
= mobj
.group('mod') is not None
2402 format_fallback
= not format_type
and not format_modified
# for b, w
2404 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
2405 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2406 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
2407 if format_type
# bv, ba, wv, wa
2408 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2409 if not format_modified
# b, w
2410 else lambda f
: True) # b*, w*
2411 filter_f
= lambda f
: _filter_f(f
) and (
2412 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2414 if format_spec
in self
._format
_selection
_exts
['audio']:
2415 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2416 elif format_spec
in self
._format
_selection
_exts
['video']:
2417 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2418 seperate_fallback
= lambda f
: f
.get('ext') == format_spec
and f
.get('vcodec') != 'none'
2419 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2420 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2422 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2424 def selector_function(ctx
):
2425 formats
= list(ctx
['formats'])
2426 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2428 if format_fallback
and ctx
['incomplete_formats']:
2429 # for extractors with incomplete formats (audio only (soundcloud)
2430 # or video only (imgur)) best/worst will fallback to
2431 # best/worst {video,audio}-only format
2433 elif seperate_fallback
and not ctx
['has_merged_format']:
2434 # for compatibility with youtube-dl when there is no pre-merged format
2435 matches
= list(filter(seperate_fallback
, formats
))
2436 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2438 yield matches
[format_idx
- 1]
2439 except LazyList
.IndexError:
2442 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2444 def final_selector(ctx
):
2445 ctx_copy
= dict(ctx
)
2446 for _filter
in filters
:
2447 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2448 return selector_function(ctx_copy
)
2449 return final_selector
2451 stream
= io
.BytesIO(format_spec
.encode())
2453 tokens
= list(_remove_unused_ops(tokenize
.tokenize(stream
.readline
)))
2454 except tokenize
.TokenError
:
2455 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2457 class TokenIterator
:
2458 def __init__(self
, tokens
):
2459 self
.tokens
= tokens
2466 if self
.counter
>= len(self
.tokens
):
2467 raise StopIteration()
2468 value
= self
.tokens
[self
.counter
]
2474 def restore_last_token(self
):
2477 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2478 return _build_selector_function(parsed_selector
)
2480 def _calc_headers(self
, info_dict
):
2481 res
= HTTPHeaderDict(self
.params
['http_headers'], info_dict
.get('http_headers'))
2483 cookies
= self
.cookiejar
.get_cookies_for_url(info_dict
['url'])
2485 encoder
= LenientSimpleCookie()
2487 for cookie
in cookies
:
2488 _
, value
= encoder
.value_encode(cookie
.value
)
2489 values
.append(f
'{cookie.name}={value}')
2491 values
.append(f
'Domain={cookie.domain}')
2493 values
.append(f
'Path={cookie.path}')
2495 values
.append('Secure')
2497 values
.append(f
'Expires={cookie.expires}')
2499 values
.append(f
'Version={cookie.version}')
2500 info_dict
['cookies'] = '; '.join(values
)
2502 if 'X-Forwarded-For' not in res
:
2503 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2504 if x_forwarded_for_ip
:
2505 res
['X-Forwarded-For'] = x_forwarded_for_ip
2509 def _calc_cookies(self
, url
):
2510 self
.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2511 return self
.cookiejar
.get_cookie_header(url
)
2513 def _sort_thumbnails(self
, thumbnails
):
2514 thumbnails
.sort(key
=lambda t
: (
2515 t
.get('preference') if t
.get('preference') is not None else -1,
2516 t
.get('width') if t
.get('width') is not None else -1,
2517 t
.get('height') if t
.get('height') is not None else -1,
2518 t
.get('id') if t
.get('id') is not None else '',
2521 def _sanitize_thumbnails(self
, info_dict
):
2522 thumbnails
= info_dict
.get('thumbnails')
2523 if thumbnails
is None:
2524 thumbnail
= info_dict
.get('thumbnail')
2526 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2530 def check_thumbnails(thumbnails
):
2531 for t
in thumbnails
:
2532 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2534 self
.urlopen(HEADRequest(t
['url']))
2535 except network_exceptions
as err
:
2536 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2540 self
._sort
_thumbnails
(thumbnails
)
2541 for i
, t
in enumerate(thumbnails
):
2542 if t
.get('id') is None:
2544 if t
.get('width') and t
.get('height'):
2545 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2546 t
['url'] = sanitize_url(t
['url'])
2548 if self
.params
.get('check_formats') is True:
2549 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2551 info_dict
['thumbnails'] = thumbnails
2553 def _fill_common_fields(self
, info_dict
, final
=True):
2554 # TODO: move sanitization here
2556 title
= info_dict
['fulltitle'] = info_dict
.get('title')
2559 self
.write_debug('Extractor gave empty title. Creating a generic title')
2561 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2562 info_dict
['title'] = f
'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2564 if info_dict
.get('duration') is not None:
2565 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2567 for ts_key
, date_key
in (
2568 ('timestamp', 'upload_date'),
2569 ('release_timestamp', 'release_date'),
2570 ('modified_timestamp', 'modified_date'),
2572 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2573 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2574 # see http://bugs.python.org/issue1646728)
2575 with contextlib
.suppress(ValueError, OverflowError, OSError):
2576 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2577 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2579 live_keys
= ('is_live', 'was_live')
2580 live_status
= info_dict
.get('live_status')
2581 if live_status
is None:
2582 for key
in live_keys
:
2583 if info_dict
.get(key
) is False:
2585 if info_dict
.get(key
):
2588 if all(info_dict
.get(key
) is False for key
in live_keys
):
2589 live_status
= 'not_live'
2591 info_dict
['live_status'] = live_status
2592 for key
in live_keys
:
2593 if info_dict
.get(key
) is None:
2594 info_dict
[key
] = (live_status
== key
)
2595 if live_status
== 'post_live':
2596 info_dict
['was_live'] = True
2598 # Auto generate title fields corresponding to the *_number fields when missing
2599 # in order to always have clean titles. This is very common for TV series.
2600 for field
in ('chapter', 'season', 'episode'):
2601 if final
and info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2602 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2604 def _raise_pending_errors(self
, info
):
2605 err
= info
.pop('__pending_error', None)
2607 self
.report_error(err
, tb
=False)
2609 def sort_formats(self
, info_dict
):
2610 formats
= self
._get
_formats
(info_dict
)
2611 formats
.sort(key
=FormatSorter(
2612 self
, info_dict
.get('_format_sort_fields') or []).calculate_preference
)
2614 def process_video_result(self
, info_dict
, download
=True):
2615 assert info_dict
.get('_type', 'video') == 'video'
2616 self
._num
_videos
+= 1
2618 if 'id' not in info_dict
:
2619 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2620 elif not info_dict
.get('id'):
2621 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2623 def report_force_conversion(field
, field_not
, conversion
):
2624 self
.report_warning(
2625 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2626 % (field
, field_not
, conversion
))
2628 def sanitize_string_field(info
, string_field
):
2629 field
= info
.get(string_field
)
2630 if field
is None or isinstance(field
, str):
2632 report_force_conversion(string_field
, 'a string', 'string')
2633 info
[string_field
] = str(field
)
2635 def sanitize_numeric_fields(info
):
2636 for numeric_field
in self
._NUMERIC
_FIELDS
:
2637 field
= info
.get(numeric_field
)
2638 if field
is None or isinstance(field
, (int, float)):
2640 report_force_conversion(numeric_field
, 'numeric', 'int')
2641 info
[numeric_field
] = int_or_none(field
)
2643 sanitize_string_field(info_dict
, 'id')
2644 sanitize_numeric_fields(info_dict
)
2645 if info_dict
.get('section_end') and info_dict
.get('section_start') is not None:
2646 info_dict
['duration'] = round(info_dict
['section_end'] - info_dict
['section_start'], 3)
2647 if (info_dict
.get('duration') or 0) <= 0 and info_dict
.pop('duration', None):
2648 self
.report_warning('"duration" field is negative, there is an error in extractor')
2650 chapters
= info_dict
.get('chapters') or []
2651 if chapters
and chapters
[0].get('start_time'):
2652 chapters
.insert(0, {'start_time': 0}
)
2654 dummy_chapter
= {'end_time': 0, 'start_time': info_dict.get('duration')}
2655 for idx
, (prev
, current
, next_
) in enumerate(zip(
2656 (dummy_chapter
, *chapters
), chapters
, (*chapters
[1:], dummy_chapter
)), 1):
2657 if current
.get('start_time') is None:
2658 current
['start_time'] = prev
.get('end_time')
2659 if not current
.get('end_time'):
2660 current
['end_time'] = next_
.get('start_time')
2661 if not current
.get('title'):
2662 current
['title'] = f
'<Untitled Chapter {idx}>'
2664 if 'playlist' not in info_dict
:
2665 # It isn't part of a playlist
2666 info_dict
['playlist'] = None
2667 info_dict
['playlist_index'] = None
2669 self
._sanitize
_thumbnails
(info_dict
)
2671 thumbnail
= info_dict
.get('thumbnail')
2672 thumbnails
= info_dict
.get('thumbnails')
2674 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2676 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2678 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2679 info_dict
['display_id'] = info_dict
['id']
2681 self
._fill
_common
_fields
(info_dict
)
2683 for cc_kind
in ('subtitles', 'automatic_captions'):
2684 cc
= info_dict
.get(cc_kind
)
2686 for _
, subtitle
in cc
.items():
2687 for subtitle_format
in subtitle
:
2688 if subtitle_format
.get('url'):
2689 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2690 if subtitle_format
.get('ext') is None:
2691 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2693 automatic_captions
= info_dict
.get('automatic_captions')
2694 subtitles
= info_dict
.get('subtitles')
2696 info_dict
['requested_subtitles'] = self
.process_subtitles(
2697 info_dict
['id'], subtitles
, automatic_captions
)
2699 formats
= self
._get
_formats
(info_dict
)
2701 # Backward compatibility with InfoExtractor._sort_formats
2702 field_preference
= (formats
or [{}])[0].pop('__sort_fields', None)
2703 if field_preference
:
2704 info_dict
['_format_sort_fields'] = field_preference
2706 info_dict
['_has_drm'] = any( # or None ensures --clean-infojson removes it
2707 f
.get('has_drm') and f
['has_drm'] != 'maybe' for f
in formats
) or None
2708 if not self
.params
.get('allow_unplayable_formats'):
2709 formats
= [f
for f
in formats
if not f
.get('has_drm') or f
['has_drm'] == 'maybe']
2711 if formats
and all(f
.get('acodec') == f
.get('vcodec') == 'none' for f
in formats
):
2712 self
.report_warning(
2713 f
'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2714 'only images are available for download. Use --list-formats to see them'.capitalize())
2716 get_from_start
= not info_dict
.get('is_live') or bool(self
.params
.get('live_from_start'))
2717 if not get_from_start
:
2718 info_dict
['title'] += ' ' + datetime
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2719 if info_dict
.get('is_live') and formats
:
2720 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2721 if get_from_start
and not formats
:
2722 self
.raise_no_formats(info_dict
, msg
=(
2723 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2724 'If you want to download from the current time, use --no-live-from-start'))
2726 def is_wellformed(f
):
2729 self
.report_warning(
2730 '"url" field is missing or empty - skipping format, '
2731 'there is an error in extractor')
2733 if isinstance(url
, bytes):
2734 sanitize_string_field(f
, 'url')
2737 # Filter out malformed formats for better extraction robustness
2738 formats
= list(filter(is_wellformed
, formats
or []))
2741 self
.raise_no_formats(info_dict
)
2743 for format
in formats
:
2744 sanitize_string_field(format
, 'format_id')
2745 sanitize_numeric_fields(format
)
2746 format
['url'] = sanitize_url(format
['url'])
2747 if format
.get('ext') is None:
2748 format
['ext'] = determine_ext(format
['url']).lower()
2749 if format
.get('protocol') is None:
2750 format
['protocol'] = determine_protocol(format
)
2751 if format
.get('resolution') is None:
2752 format
['resolution'] = self
.format_resolution(format
, default
=None)
2753 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2754 format
['dynamic_range'] = 'SDR'
2755 if format
.get('aspect_ratio') is None:
2756 format
['aspect_ratio'] = try_call(lambda: round(format
['width'] / format
['height'], 2))
2757 if (not format
.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2758 and info_dict
.get('duration') and format
.get('tbr')
2759 and not format
.get('filesize') and not format
.get('filesize_approx')):
2760 format
['filesize_approx'] = int(info_dict
['duration'] * format
['tbr'] * (1024 / 8))
2761 format
['http_headers'] = self
._calc
_headers
(collections
.ChainMap(format
, info_dict
))
2763 # This is copied to http_headers by the above _calc_headers and can now be removed
2764 if '__x_forwarded_for_ip' in info_dict
:
2765 del info_dict
['__x_forwarded_for_ip']
2769 '_format_sort_fields': info_dict
.get('_format_sort_fields')
2772 # Sanitize and group by format_id
2774 for i
, format
in enumerate(formats
):
2775 if not format
.get('format_id'):
2776 format
['format_id'] = str(i
)
2778 # Sanitize format_id from characters used in format selector expression
2779 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2780 formats_dict
.setdefault(format
['format_id'], []).append(format
)
2782 # Make sure all formats have unique format_id
2783 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2784 for format_id
, ambiguous_formats
in formats_dict
.items():
2785 ambigious_id
= len(ambiguous_formats
) > 1
2786 for i
, format
in enumerate(ambiguous_formats
):
2788 format
['format_id'] = '%s-%d' % (format_id
, i
)
2789 # Ensure there is no conflict between id and ext in format selection
2790 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2791 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2792 format
['format_id'] = 'f%s' % format
['format_id']
2794 if format
.get('format') is None:
2795 format
['format'] = '{id} - {res}{note}'.format(
2796 id=format
['format_id'],
2797 res
=self
.format_resolution(format
),
2798 note
=format_field(format
, 'format_note', ' (%s)'),
2801 if self
.params
.get('check_formats') is True:
2802 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2804 if not formats
or formats
[0] is not info_dict
:
2805 # only set the 'formats' fields if the original info_dict list them
2806 # otherwise we end up with a circular reference, the first (and unique)
2807 # element in the 'formats' field in info_dict is info_dict itself,
2808 # which can't be exported to json
2809 info_dict
['formats'] = formats
2811 info_dict
, _
= self
.pre_process(info_dict
)
2813 if self
._match
_entry
(info_dict
, incomplete
=self
._format
_fields
) is not None:
2816 self
.post_extract(info_dict
)
2817 info_dict
, _
= self
.pre_process(info_dict
, 'after_filter')
2819 # The pre-processors may have modified the formats
2820 formats
= self
._get
_formats
(info_dict
)
2822 list_only
= self
.params
.get('simulate') == 'list_only'
2823 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2824 if self
.params
.get('list_thumbnails'):
2825 self
.list_thumbnails(info_dict
)
2826 if self
.params
.get('listsubtitles'):
2827 if 'automatic_captions' in info_dict
:
2828 self
.list_subtitles(
2829 info_dict
['id'], automatic_captions
, 'automatic captions')
2830 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2831 if self
.params
.get('listformats') or interactive_format_selection
:
2832 self
.list_formats(info_dict
)
2834 # Without this printing, -F --print-json will not work
2835 self
.__forced
_printings
(info_dict
)
2838 format_selector
= self
.format_selector
2840 if interactive_format_selection
:
2841 req_format
= input(self
._format
_screen
('\nEnter format selector ', self
.Styles
.EMPHASIS
)
2842 + '(Press ENTER for default, or Ctrl+C to quit)'
2843 + self
._format
_screen
(': ', self
.Styles
.EMPHASIS
))
2845 format_selector
= self
.build_format_selector(req_format
) if req_format
else None
2846 except SyntaxError as err
:
2847 self
.report_error(err
, tb
=False, is_error
=False)
2850 if format_selector
is None:
2851 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2852 self
.write_debug(f
'Default format spec: {req_format}')
2853 format_selector
= self
.build_format_selector(req_format
)
2855 formats_to_download
= list(format_selector({
2857 'has_merged_format': any('none' not in (f
.get('acodec'), f
.get('vcodec')) for f
in formats
),
2858 'incomplete_formats': (all(f
.get('vcodec') == 'none' for f
in formats
) # No formats with video
2859 or all(f
.get('acodec') == 'none' for f
in formats
)), # OR, No formats with audio
2861 if interactive_format_selection
and not formats_to_download
:
2862 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2866 if not formats_to_download
:
2867 if not self
.params
.get('ignore_no_formats_error'):
2868 raise ExtractorError(
2869 'Requested format is not available. Use --list-formats for a list of available formats',
2870 expected
=True, video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2871 self
.report_warning('Requested format is not available')
2872 # Process what we can, even without any available formats.
2873 formats_to_download
= [{}]
2875 requested_ranges
= tuple(self
.params
.get('download_ranges', lambda *_
: [{}])(info_dict
, self
))
2876 best_format
, downloaded_formats
= formats_to_download
[-1], []
2878 if best_format
and requested_ranges
:
2879 def to_screen(*msg
):
2880 self
.to_screen(f
'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2882 to_screen(f
'Downloading {len(formats_to_download)} format(s):',
2883 (f
['format_id'] for f
in formats_to_download
))
2884 if requested_ranges
!= ({}, ):
2885 to_screen(f
'Downloading {len(requested_ranges)} time ranges:',
2886 (f
'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c
in requested_ranges
))
2887 max_downloads_reached
= False
2889 for fmt
, chapter
in itertools
.product(formats_to_download
, requested_ranges
):
2890 new_info
= self
._copy
_infodict
(info_dict
)
2891 new_info
.update(fmt
)
2892 offset
, duration
= info_dict
.get('section_start') or 0, info_dict
.get('duration') or float('inf')
2893 end_time
= offset
+ min(chapter
.get('end_time', duration
), duration
)
2894 # duration may not be accurate. So allow deviations <1sec
2895 if end_time
== float('inf') or end_time
> offset
+ duration
+ 1:
2897 if chapter
or offset
:
2899 'section_start': offset
+ chapter
.get('start_time', 0),
2900 'section_end': end_time
,
2901 'section_title': chapter
.get('title'),
2902 'section_number': chapter
.get('index'),
2904 downloaded_formats
.append(new_info
)
2906 self
.process_info(new_info
)
2907 except MaxDownloadsReached
:
2908 max_downloads_reached
= True
2909 self
._raise
_pending
_errors
(new_info
)
2910 # Remove copied info
2911 for key
, val
in tuple(new_info
.items()):
2912 if info_dict
.get(key
) == val
:
2914 if max_downloads_reached
:
2917 write_archive
= {f.get('__write_download_archive', False) for f in downloaded_formats}
2918 assert write_archive
.issubset({True, False, 'ignore'}
)
2919 if True in write_archive
and False not in write_archive
:
2920 self
.record_download_archive(info_dict
)
2922 info_dict
['requested_downloads'] = downloaded_formats
2923 info_dict
= self
.run_all_pps('after_video', info_dict
)
2924 if max_downloads_reached
:
2925 raise MaxDownloadsReached()
2927 # We update the info dict with the selected best quality format (backwards compatibility)
2928 info_dict
.update(best_format
)
2931 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2932 """Select the requested subtitles and their format"""
2933 available_subs
, normal_sub_langs
= {}, []
2934 if normal_subtitles
and self
.params
.get('writesubtitles'):
2935 available_subs
.update(normal_subtitles
)
2936 normal_sub_langs
= tuple(normal_subtitles
.keys())
2937 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2938 for lang
, cap_info
in automatic_captions
.items():
2939 if lang
not in available_subs
:
2940 available_subs
[lang
] = cap_info
2942 if not available_subs
or (
2943 not self
.params
.get('writesubtitles')
2944 and not self
.params
.get('writeautomaticsub')):
2947 all_sub_langs
= tuple(available_subs
.keys())
2948 if self
.params
.get('allsubtitles', False):
2949 requested_langs
= all_sub_langs
2950 elif self
.params
.get('subtitleslangs', False):
2952 requested_langs
= orderedSet_from_options(
2953 self
.params
.get('subtitleslangs'), {'all': all_sub_langs}
, use_regex
=True)
2954 except re
.error
as e
:
2955 raise ValueError(f
'Wrong regex for subtitlelangs: {e.pattern}')
2957 requested_langs
= LazyList(itertools
.chain(
2958 ['en'] if 'en' in normal_sub_langs
else [],
2959 filter(lambda f
: f
.startswith('en'), normal_sub_langs
),
2960 ['en'] if 'en' in all_sub_langs
else [],
2961 filter(lambda f
: f
.startswith('en'), all_sub_langs
),
2962 normal_sub_langs
, all_sub_langs
,
2965 self
.to_screen(f
'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2967 formats_query
= self
.params
.get('subtitlesformat', 'best')
2968 formats_preference
= formats_query
.split('/') if formats_query
else []
2970 for lang
in requested_langs
:
2971 formats
= available_subs
.get(lang
)
2973 self
.report_warning(f
'{lang} subtitles not available for {video_id}')
2975 for ext
in formats_preference
:
2979 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2985 self
.report_warning(
2986 'No subtitle format found matching "%s" for language %s, '
2987 'using %s' % (formats_query
, lang
, f
['ext']))
2991 def _forceprint(self
, key
, info_dict
):
2992 if info_dict
is None:
2994 info_copy
= info_dict
.copy()
2995 info_copy
.setdefault('filename', self
.prepare_filename(info_dict
))
2996 if info_dict
.get('requested_formats') is not None:
2997 # For RTMP URLs, also include the playpath
2998 info_copy
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2999 elif info_dict
.get('url'):
3000 info_copy
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
3001 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
3002 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
3003 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
3004 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
3006 def format_tmpl(tmpl
):
3007 mobj
= re
.fullmatch(r
'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl
)
3012 if tmpl
.startswith('{'):
3013 tmpl
, fmt
= f
'.{tmpl}', '%({})j'
3014 if tmpl
.endswith('='):
3015 tmpl
, fmt
= tmpl
[:-1], '{0} = %({0})#j'
3016 return '\n'.join(map(fmt
.format
, [tmpl
] if mobj
.group('dict') else tmpl
.split(',')))
3018 for tmpl
in self
.params
['forceprint'].get(key
, []):
3019 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
3021 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
3022 filename
= self
.prepare_filename(info_dict
, outtmpl
=file_tmpl
)
3023 tmpl
= format_tmpl(tmpl
)
3024 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
3025 if self
._ensure
_dir
_exists
(filename
):
3026 with open(filename
, 'a', encoding
='utf-8', newline
='') as f
:
3027 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + os
.linesep
)
3031 def __forced_printings(self
, info_dict
, filename
=None, incomplete
=True):
3032 if (self
.params
.get('forcejson')
3033 or self
.params
['forceprint'].get('video')
3034 or self
.params
['print_to_file'].get('video')):
3035 self
.post_extract(info_dict
)
3037 info_dict
['filename'] = filename
3038 info_copy
= self
._forceprint
('video', info_dict
)
3040 def print_field(field
, actual_field
=None, optional
=False):
3041 if actual_field
is None:
3042 actual_field
= field
3043 if self
.params
.get(f
'force{field}') and (
3044 info_copy
.get(field
) is not None or (not optional
and not incomplete
)):
3045 self
.to_stdout(info_copy
[actual_field
])
3047 print_field('title')
3049 print_field('url', 'urls')
3050 print_field('thumbnail', optional
=True)
3051 print_field('description', optional
=True)
3052 print_field('filename')
3053 if self
.params
.get('forceduration') and info_copy
.get('duration') is not None:
3054 self
.to_stdout(formatSeconds(info_copy
['duration']))
3055 print_field('format')
3057 if self
.params
.get('forcejson'):
3058 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
3060 def dl(self
, name
, info
, subtitle
=False, test
=False):
3061 if not info
.get('url'):
3062 self
.raise_no_formats(info
, True)
3065 verbose
= self
.params
.get('verbose')
3068 'quiet': self
.params
.get('quiet') or not verbose
,
3070 'noprogress': not verbose
,
3072 'skip_unavailable_fragments': False,
3073 'keep_fragments': False,
3075 '_no_ytdl_file': True,
3078 params
= self
.params
3079 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
3081 for ph
in self
._progress
_hooks
:
3082 fd
.add_progress_hook(ph
)
3084 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
3085 for f
in info
.get('requested_formats', []) or [info
])
3086 self
.write_debug(f
'Invoking {fd.FD_NAME} downloader on "{urls}"')
3088 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3089 # But it may contain objects that are not deep-copyable
3090 new_info
= self
._copy
_infodict
(info
)
3091 if new_info
.get('http_headers') is None:
3092 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
3093 return fd
.download(name
, new_info
, subtitle
)
3095 def existing_file(self
, filepaths
, *, default_overwrite
=True):
3096 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
3097 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
3098 return existing_files
[0]
3100 for file in existing_files
:
3101 self
.report_file_delete(file)
3105 def process_info(self
, info_dict
):
3106 """Process a single resolved IE result. (Modifies it in-place)"""
3108 assert info_dict
.get('_type', 'video') == 'video'
3109 original_infodict
= info_dict
3111 if 'format' not in info_dict
and 'ext' in info_dict
:
3112 info_dict
['format'] = info_dict
['ext']
3114 if self
._match
_entry
(info_dict
) is not None:
3115 info_dict
['__write_download_archive'] = 'ignore'
3118 # Does nothing under normal operation - for backward compatibility of process_info
3119 self
.post_extract(info_dict
)
3121 def replace_info_dict(new_info
):
3123 if new_info
== info_dict
:
3126 info_dict
.update(new_info
)
3128 new_info
, _
= self
.pre_process(info_dict
, 'video')
3129 replace_info_dict(new_info
)
3130 self
._num
_downloads
+= 1
3132 # info_dict['_filename'] needs to be set for backward compatibility
3133 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
3134 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
3138 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
3140 def check_max_downloads():
3141 if self
._num
_downloads
>= float(self
.params
.get('max_downloads') or 'inf'):
3142 raise MaxDownloadsReached()
3144 if self
.params
.get('simulate'):
3145 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3146 check_max_downloads()
3149 if full_filename
is None:
3151 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
3153 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
3156 if self
._write
_description
('video', info_dict
,
3157 self
.prepare_filename(info_dict
, 'description')) is None:
3160 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
3161 if sub_files
is None:
3163 files_to_move
.update(dict(sub_files
))
3165 thumb_files
= self
._write
_thumbnails
(
3166 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
3167 if thumb_files
is None:
3169 files_to_move
.update(dict(thumb_files
))
3171 infofn
= self
.prepare_filename(info_dict
, 'infojson')
3172 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
3173 if _infojson_written
:
3174 info_dict
['infojson_filename'] = infofn
3175 # For backward compatibility, even though it was a private field
3176 info_dict
['__infojson_filename'] = infofn
3177 elif _infojson_written
is None:
3180 # Note: Annotations are deprecated
3182 if self
.params
.get('writeannotations', False):
3183 annofn
= self
.prepare_filename(info_dict
, 'annotation')
3185 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
3187 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
3188 self
.to_screen('[info] Video annotations are already present')
3189 elif not info_dict
.get('annotations'):
3190 self
.report_warning('There are no annotations to write.')
3193 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
3194 with open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
3195 annofile
.write(info_dict
['annotations'])
3196 except (KeyError, TypeError):
3197 self
.report_warning('There are no annotations to write.')
3199 self
.report_error('Cannot write annotations file: ' + annofn
)
3202 # Write internet shortcut files
3203 def _write_link_file(link_type
):
3204 url
= try_get(info_dict
['webpage_url'], iri_to_uri
)
3206 self
.report_warning(
3207 f
'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3209 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
3210 if not self
._ensure
_dir
_exists
(encodeFilename(linkfn
)):
3212 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
3213 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
3216 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3217 with open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8',
3218 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
3219 template_vars
= {'url': url}
3220 if link_type
== 'desktop':
3221 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
3222 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
3224 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
3229 'url': self
.params
.get('writeurllink'),
3230 'webloc': self
.params
.get('writewebloclink'),
3231 'desktop': self
.params
.get('writedesktoplink'),
3233 if self
.params
.get('writelink'):
3234 link_type
= ('webloc' if sys
.platform
== 'darwin'
3235 else 'desktop' if sys
.platform
.startswith('linux')
3237 write_links
[link_type
] = True
3239 if any(should_write
and not _write_link_file(link_type
)
3240 for link_type
, should_write
in write_links
.items()):
3243 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
3244 replace_info_dict(new_info
)
3246 if self
.params
.get('skip_download'):
3247 info_dict
['filepath'] = temp_filename
3248 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3249 info_dict
['__files_to_move'] = files_to_move
3250 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
3251 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3254 info_dict
.setdefault('__postprocessors', [])
3257 def existing_video_file(*filepaths
):
3258 ext
= info_dict
.get('ext')
3259 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
3260 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
3261 default_overwrite
=False)
3263 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
3266 fd
, success
= None, True
3267 if info_dict
.get('protocol') or info_dict
.get('url'):
3268 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3269 if fd
is not FFmpegFD
and 'no-direct-merge' not in self
.params
['compat_opts'] and (
3270 info_dict
.get('section_start') or info_dict
.get('section_end')):
3271 msg
= ('This format cannot be partially downloaded' if FFmpegFD
.available()
3272 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3273 self
.report_error(f
'{msg}. Aborting')
3276 if info_dict
.get('requested_formats') is not None:
3277 old_ext
= info_dict
['ext']
3278 if self
.params
.get('merge_output_format') is None:
3279 if (info_dict
['ext'] == 'webm'
3280 and info_dict
.get('thumbnails')
3281 # check with type instead of pp_key, __name__, or isinstance
3282 # since we dont want any custom PPs to trigger this
3283 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])): # noqa: E721
3284 info_dict
['ext'] = 'mkv'
3285 self
.report_warning(
3286 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3287 new_ext
= info_dict
['ext']
3289 def correct_ext(filename
, ext
=new_ext
):
3292 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3294 os
.path
.splitext(filename
)[0]
3295 if filename_real_ext
in (old_ext
, new_ext
)
3297 return f
'{filename_wo_ext}.{ext}'
3299 # Ensure filename always has a correct extension for successful merge
3300 full_filename
= correct_ext(full_filename
)
3301 temp_filename
= correct_ext(temp_filename
)
3302 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3304 info_dict
['__real_download'] = False
3305 # NOTE: Copy so that original format dicts are not modified
3306 info_dict
['requested_formats'] = list(map(dict, info_dict
['requested_formats']))
3308 merger
= FFmpegMergerPP(self
)
3310 if dl_filename
is not None:
3311 self
.report_file_already_downloaded(dl_filename
)
3313 for f
in info_dict
['requested_formats'] if fd
!= FFmpegFD
else []:
3314 f
['filepath'] = fname
= prepend_extension(
3315 correct_ext(temp_filename
, info_dict
['ext']),
3316 'f%s' % f
['format_id'], info_dict
['ext'])
3317 downloaded
.append(fname
)
3318 info_dict
['url'] = '\n'.join(f
['url'] for f
in info_dict
['requested_formats'])
3319 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3320 info_dict
['__real_download'] = real_download
3322 if self
.params
.get('allow_unplayable_formats'):
3323 self
.report_warning(
3324 'You have requested merging of multiple formats '
3325 'while also allowing unplayable formats to be downloaded. '
3326 'The formats won\'t be merged to prevent data corruption.')
3327 elif not merger
.available
:
3328 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3329 if not self
.params
.get('ignoreerrors'):
3330 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3332 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3334 if temp_filename
== '-':
3335 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3336 else 'but the formats are incompatible for simultaneous download' if merger
.available
3337 else 'but ffmpeg is not installed')
3338 self
.report_warning(
3339 f
'You have requested downloading multiple formats to stdout {reason}. '
3340 'The formats will be streamed one after the other')
3341 fname
= temp_filename
3342 for f
in info_dict
['requested_formats']:
3343 new_info
= dict(info_dict
)
3344 del new_info
['requested_formats']
3346 if temp_filename
!= '-':
3347 fname
= prepend_extension(
3348 correct_ext(temp_filename
, new_info
['ext']),
3349 'f%s' % f
['format_id'], new_info
['ext'])
3350 if not self
._ensure
_dir
_exists
(fname
):
3352 f
['filepath'] = fname
3353 downloaded
.append(fname
)
3354 partial_success
, real_download
= self
.dl(fname
, new_info
)
3355 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3356 success
= success
and partial_success
3358 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3359 info_dict
['__postprocessors'].append(merger
)
3360 info_dict
['__files_to_merge'] = downloaded
3361 # Even if there were no downloads, it is being merged only now
3362 info_dict
['__real_download'] = True
3364 for file in downloaded
:
3365 files_to_move
[file] = None
3367 # Just a single file
3368 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3369 if dl_filename
is None or dl_filename
== temp_filename
:
3370 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3371 # So we should try to resume the download
3372 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3373 info_dict
['__real_download'] = real_download
3375 self
.report_file_already_downloaded(dl_filename
)
3377 dl_filename
= dl_filename
or temp_filename
3378 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3380 except network_exceptions
as err
:
3381 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
3383 except OSError as err
:
3384 raise UnavailableVideoError(err
)
3385 except (ContentTooShortError
, ) as err
:
3386 self
.report_error(f
'content too short (expected {err.expected} bytes and served {err.downloaded})')
3389 self
._raise
_pending
_errors
(info_dict
)
3390 if success
and full_filename
!= '-':
3394 fixup_policy
= self
.params
.get('fixup')
3395 vid
= info_dict
['id']
3397 if fixup_policy
in ('ignore', 'never'):
3399 elif fixup_policy
== 'warn':
3401 elif fixup_policy
!= 'force':
3402 assert fixup_policy
in ('detect_or_warn', None)
3403 if not info_dict
.get('__real_download'):
3406 def ffmpeg_fixup(cndn
, msg
, cls
):
3407 if not (do_fixup
and cndn
):
3409 elif do_fixup
== 'warn':
3410 self
.report_warning(f
'{vid}: {msg}')
3414 info_dict
['__postprocessors'].append(pp
)
3416 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3418 stretched_ratio
= info_dict
.get('stretched_ratio')
3419 ffmpeg_fixup(stretched_ratio
not in (1, None),
3420 f
'Non-uniform pixel ratio {stretched_ratio}',
3421 FFmpegFixupStretchedPP
)
3423 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3424 downloader
= downloader
.FD_NAME
if downloader
else None
3426 ext
= info_dict
.get('ext')
3427 postprocessed_by_ffmpeg
= info_dict
.get('requested_formats') or any((
3428 isinstance(pp
, FFmpegVideoConvertorPP
)
3429 and resolve_recode_mapping(ext
, pp
.mapping
)[0] not in (ext
, None)
3430 ) for pp
in self
._pps
['post_process'])
3432 if not postprocessed_by_ffmpeg
:
3433 ffmpeg_fixup(ext
== 'm4a' and info_dict
.get('container') == 'm4a_dash',
3434 'writing DASH m4a. Only some players support this container',
3436 ffmpeg_fixup(downloader
== 'hlsnative' and not self
.params
.get('hls_use_mpegts')
3437 or info_dict
.get('is_live') and self
.params
.get('hls_use_mpegts') is None,
3438 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3440 ffmpeg_fixup(info_dict
.get('is_live') and downloader
== 'dashsegments',
3441 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3443 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3444 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP
)
3448 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3449 except PostProcessingError
as err
:
3450 self
.report_error('Postprocessing: %s' % str(err
))
3453 for ph
in self
._post
_hooks
:
3454 ph(info_dict
['filepath'])
3455 except Exception as err
:
3456 self
.report_error('post hooks: %s' % str(err
))
3458 info_dict
['__write_download_archive'] = True
3460 assert info_dict
is original_infodict
# Make sure the info_dict was modified in-place
3461 if self
.params
.get('force_write_download_archive'):
3462 info_dict
['__write_download_archive'] = True
3463 check_max_downloads()
3465 def __download_wrapper(self
, func
):
3466 @functools.wraps(func
)
3467 def wrapper(*args
, **kwargs
):
3469 res
= func(*args
, **kwargs
)
3470 except UnavailableVideoError
as e
:
3471 self
.report_error(e
)
3472 except DownloadCancelled
as e
:
3473 self
.to_screen(f
'[info] {e}')
3474 if not self
.params
.get('break_per_url'):
3476 self
._num
_downloads
= 0
3478 if self
.params
.get('dump_single_json', False):
3479 self
.post_extract(res
)
3480 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3483 def download(self
, url_list
):
3484 """Download a given list of URLs."""
3485 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3486 outtmpl
= self
.params
['outtmpl']['default']
3487 if (len(url_list
) > 1
3489 and '%' not in outtmpl
3490 and self
.params
.get('max_downloads') != 1):
3491 raise SameFileError(outtmpl
)
3493 for url
in url_list
:
3494 self
.__download
_wrapper
(self
.extract_info
)(
3495 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3497 return self
._download
_retcode
3499 def download_with_info_file(self
, info_filename
):
3500 with contextlib
.closing(fileinput
.FileInput(
3501 [info_filename
], mode
='r',
3502 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3503 # FileInput doesn't have a read method, we can't call json.load
3504 infos
= [self
.sanitize_info(info
, self
.params
.get('clean_infojson', True))
3505 for info
in variadic(json
.loads('\n'.join(f
)))]
3507 self
._load
_cookies
(info
.get('cookies'), from_headers
=False)
3508 self
._load
_cookies
(traverse_obj(info
.get('http_headers'), 'Cookie', casesense
=False)) # compat
3510 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3511 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3512 if not isinstance(e
, EntryNotInPlaylist
):
3513 self
.to_stderr('\r')
3514 webpage_url
= info
.get('webpage_url')
3515 if webpage_url
is None:
3517 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3518 self
.download([webpage_url
])
3519 return self
._download
_retcode
3522 def sanitize_info(info_dict
, remove_private_keys
=False):
3523 ''' Sanitize the infodict for converting to json '''
3524 if info_dict
is None:
3526 info_dict
.setdefault('epoch', int(time
.time()))
3527 info_dict
.setdefault('_type', 'video')
3528 info_dict
.setdefault('_version', {
3529 'version': __version__
,
3530 'current_git_head': current_git_head(),
3531 'release_git_head': RELEASE_GIT_HEAD
,
3532 'repository': REPOSITORY
,
3535 if remove_private_keys
:
3536 reject
= lambda k
, v
: v
is None or k
.startswith('__') or k
in {
3537 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3538 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3539 'playlist_autonumber', '_format_sort_fields',
3542 reject
= lambda k
, v
: False
3545 if isinstance(obj
, dict):
3546 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3547 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3548 return list(map(filter_fn
, obj
))
3549 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3554 return filter_fn(info_dict
)
3557 def filter_requested_info(info_dict
, actually_filter
=True):
3558 ''' Alias of sanitize_info for backward compatibility '''
3559 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3561 def _delete_downloaded_files(self
, *files_to_delete
, info
={}, msg
=None):
3562 for filename
in set(filter(None, files_to_delete
)):
3564 self
.to_screen(msg
% filename
)
3568 self
.report_warning(f
'Unable to delete file {filename}')
3569 if filename
in info
.get('__files_to_move', []): # NB: Delete even if None
3570 del info
['__files_to_move'][filename
]
3573 def post_extract(info_dict
):
3574 def actual_post_extract(info_dict
):
3575 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3576 for video_dict
in info_dict
.get('entries', {}):
3577 actual_post_extract(video_dict
or {})
3580 post_extractor
= info_dict
.pop('__post_extractor', None) or (lambda: {})
3581 info_dict
.update(post_extractor())
3583 actual_post_extract(info_dict
or {})
3585 def run_pp(self
, pp
, infodict
):
3586 files_to_delete
= []
3587 if '__files_to_move' not in infodict
:
3588 infodict
['__files_to_move'] = {}
3590 files_to_delete
, infodict
= pp
.run(infodict
)
3591 except PostProcessingError
as e
:
3592 # Must be True and not 'only_download'
3593 if self
.params
.get('ignoreerrors') is True:
3594 self
.report_error(e
)
3598 if not files_to_delete
:
3600 if self
.params
.get('keepvideo', False):
3601 for f
in files_to_delete
:
3602 infodict
['__files_to_move'].setdefault(f
, '')
3604 self
._delete
_downloaded
_files
(
3605 *files_to_delete
, info
=infodict
, msg
='Deleting original file %s (pass -k to keep)')
3608 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3610 self
._forceprint
(key
, info
)
3611 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3612 info
= self
.run_pp(pp
, info
)
3615 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3616 info
= dict(ie_info
)
3617 info
['__files_to_move'] = files_to_move
or {}
3619 info
= self
.run_all_pps(key
, info
)
3620 except PostProcessingError
as err
:
3621 msg
= f
'Preprocessing: {err}'
3622 info
.setdefault('__pending_error', msg
)
3623 self
.report_error(msg
, is_error
=False)
3624 return info
, info
.pop('__files_to_move', None)
3626 def post_process(self
, filename
, info
, files_to_move
=None):
3627 """Run all the postprocessors on the given file."""
3628 info
['filepath'] = filename
3629 info
['__files_to_move'] = files_to_move
or {}
3630 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3631 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3632 del info
['__files_to_move']
3633 return self
.run_all_pps('after_move', info
)
3635 def _make_archive_id(self
, info_dict
):
3636 video_id
= info_dict
.get('id')
3639 # Future-proof against any change in case
3640 # and backwards compatibility with prior versions
3641 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3642 if extractor
is None:
3643 url
= str_or_none(info_dict
.get('url'))
3646 # Try to find matching extractor for the URL and take its ie_key
3647 for ie_key
, ie
in self
._ies
.items():
3648 if ie
.suitable(url
):
3653 return make_archive_id(extractor
, video_id
)
3655 def in_download_archive(self
, info_dict
):
3656 if not self
.archive
:
3659 vid_ids
= [self
._make
_archive
_id
(info_dict
)]
3660 vid_ids
.extend(info_dict
.get('_old_archive_ids') or [])
3661 return any(id_
in self
.archive
for id_
in vid_ids
)
3663 def record_download_archive(self
, info_dict
):
3664 fn
= self
.params
.get('download_archive')
3667 vid_id
= self
._make
_archive
_id
(info_dict
)
3670 self
.write_debug(f
'Adding to archive: {vid_id}')
3671 if is_path_like(fn
):
3672 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3673 archive_file
.write(vid_id
+ '\n')
3674 self
.archive
.add(vid_id
)
3677 def format_resolution(format
, default
='unknown'):
3678 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3680 if format
.get('resolution') is not None:
3681 return format
['resolution']
3682 if format
.get('width') and format
.get('height'):
3683 return '%dx%d' % (format
['width'], format
['height'])
3684 elif format
.get('height'):
3685 return '%sp' % format
['height']
3686 elif format
.get('width'):
3687 return '%dx?' % format
['width']
3690 def _list_format_headers(self
, *headers
):
3691 if self
.params
.get('listformats_table', True) is not False:
3692 return [self
._format
_out
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3695 def _format_note(self
, fdict
):
3697 if fdict
.get('ext') in ['f4f', 'f4m']:
3698 res
+= '(unsupported)'
3699 if fdict
.get('language'):
3702 res
+= '[%s]' % fdict
['language']
3703 if fdict
.get('format_note') is not None:
3706 res
+= fdict
['format_note']
3707 if fdict
.get('tbr') is not None:
3710 res
+= '%4dk' % fdict
['tbr']
3711 if fdict
.get('container') is not None:
3714 res
+= '%s container' % fdict
['container']
3715 if (fdict
.get('vcodec') is not None
3716 and fdict
.get('vcodec') != 'none'):
3719 res
+= fdict
['vcodec']
3720 if fdict
.get('vbr') is not None:
3722 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3724 if fdict
.get('vbr') is not None:
3725 res
+= '%4dk' % fdict
['vbr']
3726 if fdict
.get('fps') is not None:
3729 res
+= '%sfps' % fdict
['fps']
3730 if fdict
.get('acodec') is not None:
3733 if fdict
['acodec'] == 'none':
3736 res
+= '%-5s' % fdict
['acodec']
3737 elif fdict
.get('abr') is not None:
3741 if fdict
.get('abr') is not None:
3742 res
+= '@%3dk' % fdict
['abr']
3743 if fdict
.get('asr') is not None:
3744 res
+= ' (%5dHz)' % fdict
['asr']
3745 if fdict
.get('filesize') is not None:
3748 res
+= format_bytes(fdict
['filesize'])
3749 elif fdict
.get('filesize_approx') is not None:
3752 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3755 def _get_formats(self
, info_dict
):
3756 if info_dict
.get('formats') is None:
3757 if info_dict
.get('url') and info_dict
.get('_type', 'video') == 'video':
3760 return info_dict
['formats']
3762 def render_formats_table(self
, info_dict
):
3763 formats
= self
._get
_formats
(info_dict
)
3766 if not self
.params
.get('listformats_table', True) is not False:
3769 format_field(f
, 'format_id'),
3770 format_field(f
, 'ext'),
3771 self
.format_resolution(f
),
3772 self
._format
_note
(f
)
3773 ] for f
in formats
if (f
.get('preference') or 0) >= -1000]
3774 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3776 def simplified_codec(f
, field
):
3777 assert field
in ('acodec', 'vcodec')
3778 codec
= f
.get(field
)
3781 elif codec
!= 'none':
3782 return '.'.join(codec
.split('.')[:4])
3784 if field
== 'vcodec' and f
.get('acodec') == 'none':
3786 elif field
== 'acodec' and f
.get('vcodec') == 'none':
3788 return self
._format
_out
('audio only' if field
== 'vcodec' else 'video only',
3789 self
.Styles
.SUPPRESS
)
3791 delim
= self
._format
_out
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3794 self
._format
_out
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3795 format_field(f
, 'ext'),
3796 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3797 format_field(f
, 'fps', '\t%d', func
=round),
3798 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3799 format_field(f
, 'audio_channels', '\t%s'),
3801 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
)
3802 or format_field(f
, 'filesize_approx', '≈\t%s', func
=format_bytes
)
3803 or format_field(try_call(lambda: format_bytes(int(info_dict
['duration'] * f
['tbr'] * (1024 / 8)))),
3804 None, self
._format
_out
('~\t%s', self
.Styles
.SUPPRESS
))),
3805 format_field(f
, 'tbr', '\t%dk', func
=round),
3806 shorten_protocol_name(f
.get('protocol', '')),
3808 simplified_codec(f
, 'vcodec'),
3809 format_field(f
, 'vbr', '\t%dk', func
=round),
3810 simplified_codec(f
, 'acodec'),
3811 format_field(f
, 'abr', '\t%dk', func
=round),
3812 format_field(f
, 'asr', '\t%s', func
=format_decimal_suffix
),
3813 join_nonempty(format_field(f
, 'language', '[%s]'), join_nonempty(
3814 self
._format
_out
('UNSUPPORTED', self
.Styles
.BAD_FORMAT
) if f
.get('ext') in ('f4f', 'f4m') else None,
3815 (self
._format
_out
('Maybe DRM', self
.Styles
.WARNING
) if f
.get('has_drm') == 'maybe'
3816 else self
._format
_out
('DRM', self
.Styles
.BAD_FORMAT
) if f
.get('has_drm') else None),
3817 format_field(f
, 'format_note'),
3818 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3819 delim
=', '), delim
=' '),
3820 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3821 header_line
= self
._list
_format
_headers
(
3822 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3823 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3825 return render_table(
3826 header_line
, table
, hide_empty
=True,
3827 delim
=self
._format
_out
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3829 def render_thumbnails_table(self
, info_dict
):
3830 thumbnails
= list(info_dict
.get('thumbnails') or [])
3833 return render_table(
3834 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3835 [[t
.get('id'), t
.get('width') or 'unknown', t
.get('height') or 'unknown', t
['url']] for t
in thumbnails
])
3837 def render_subtitles_table(self
, video_id
, subtitles
):
3838 def _row(lang
, formats
):
3839 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3840 if len(set(names
)) == 1:
3841 names
= [] if names
[0] == 'unknown' else names
[:1]
3842 return [lang
, ', '.join(names
), ', '.join(exts
)]
3846 return render_table(
3847 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3848 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3851 def __list_table(self
, video_id
, name
, func
, *args
):
3854 self
.to_screen(f
'{video_id} has no {name}')
3856 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3857 self
.to_stdout(table
)
3859 def list_formats(self
, info_dict
):
3860 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3862 def list_thumbnails(self
, info_dict
):
3863 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3865 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3866 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3868 def print_debug_header(self
):
3869 if not self
.params
.get('verbose'):
3872 from . import _IN_CLI
# Must be delayed import
3874 # These imports can be slow. So import them only as needed
3875 from .extractor
.extractors
import _LAZY_LOADER
3876 from .extractor
.extractors
import (
3877 _PLUGIN_CLASSES
as plugin_ies
,
3878 _PLUGIN_OVERRIDES
as plugin_ie_overrides
3881 def get_encoding(stream
):
3882 ret
= str(getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__))
3883 additional_info
= []
3884 if os
.environ
.get('TERM', '').lower() == 'dumb':
3885 additional_info
.append('dumb')
3886 if not supports_terminal_sequences(stream
):
3887 from .utils
import WINDOWS_VT_MODE
# Must be imported locally
3888 additional_info
.append('No VT' if WINDOWS_VT_MODE
is False else 'No ANSI')
3890 ret
= f
'{ret} ({",".join(additional_info)})'
3893 encoding_str
= 'Encodings: locale %s, fs %s, pref %s, %s' % (
3894 locale
.getpreferredencoding(),
3895 sys
.getfilesystemencoding(),
3896 self
.get_encoding(),
3898 f
'{key} {get_encoding(stream)}' for key
, stream
in self
._out
_files
.items_
3899 if stream
is not None and key
!= 'console')
3902 logger
= self
.params
.get('logger')
3904 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3905 write_debug(encoding_str
)
3907 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
3908 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3910 source
= detect_variant()
3911 if VARIANT
not in (None, 'pip'):
3914 write_debug(join_nonempty(
3915 f
'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3916 f
'{CHANNEL}@{__version__}',
3917 f
'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD
else '',
3918 '' if source
== 'unknown' else f
'({source})',
3919 '' if _IN_CLI
else 'API' if klass
== YoutubeDL
else f
'API:{self.__module__}.{klass.__qualname__}',
3923 write_debug(f
'params: {self.params}')
3925 if not _LAZY_LOADER
:
3926 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3927 write_debug('Lazy loading extractors is forcibly disabled')
3929 write_debug('Lazy loading extractors is disabled')
3930 if self
.params
['compat_opts']:
3931 write_debug('Compatibility options: %s' % ', '.join(self
.params
['compat_opts']))
3933 if current_git_head():
3934 write_debug(f
'Git HEAD: {current_git_head()}')
3935 write_debug(system_identifier())
3937 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
3938 ffmpeg_features
= {key for key, val in ffmpeg_features.items() if val}
3940 exe_versions
['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features
))
3942 exe_versions
['rtmpdump'] = rtmpdump_version()
3943 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3944 exe_str
= ', '.join(
3945 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3947 write_debug('exe versions: %s' % exe_str
)
3949 from .compat
.compat_utils
import get_package_info
3950 from .dependencies
import available_dependencies
3952 write_debug('Optional libraries: %s' % (', '.join(sorted({
3953 join_nonempty(*get_package_info(m
)) for m
in available_dependencies
.values()
3956 write_debug(f
'Proxy map: {self.proxies}')
3957 # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
3958 for plugin_type
, plugins
in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}
.items():
3959 display_list
= ['%s%s' % (
3960 klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3961 for name
, klass
in plugins
.items()]
3962 if plugin_type
== 'Extractor':
3963 display_list
.extend(f
'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3964 for parent
, plugins
in plugin_ie_overrides
.items())
3965 if not display_list
:
3967 write_debug(f
'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3969 plugin_dirs
= plugin_directories()
3971 write_debug(f
'Plugin directories: {plugin_dirs}')
3974 if False and self
.params
.get('call_home'):
3975 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode()
3976 write_debug('Public IP address: %s' % ipaddr
)
3977 latest_version
= self
.urlopen(
3978 'https://yt-dl.org/latest/version').read().decode()
3979 if version_tuple(latest_version
) > version_tuple(__version__
):
3980 self
.report_warning(
3981 'You are using an outdated version (newest version: %s)! '
3982 'See https://yt-dl.org/update if you need help updating.' %
3985 @functools.cached_property
3987 """Global proxy configuration"""
3988 opts_proxy
= self
.params
.get('proxy')
3989 if opts_proxy
is not None:
3990 if opts_proxy
== '':
3991 opts_proxy
= '__noproxy__'
3992 proxies
= {'all': opts_proxy}
3994 proxies
= urllib
.request
.getproxies()
3995 # compat. Set HTTPS_PROXY to __noproxy__ to revert
3996 if 'http' in proxies
and 'https' not in proxies
:
3997 proxies
['https'] = proxies
['http']
4001 @functools.cached_property
4002 def cookiejar(self
):
4003 """Global cookiejar instance"""
4004 return load_cookies(
4005 self
.params
.get('cookiefile'), self
.params
.get('cookiesfrombrowser'), self
)
4010 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4012 self
.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
4013 handler
= self
._request
_director
.handlers
['Urllib']
4014 return handler
._get
_instance
(cookiejar
=self
.cookiejar
, proxies
=self
.proxies
)
4016 def urlopen(self
, req
):
4017 """ Start an HTTP download """
4018 if isinstance(req
, str):
4020 elif isinstance(req
, urllib
.request
.Request
):
4021 self
.deprecation_warning(
4022 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4023 'Use yt_dlp.networking.common.Request instead.')
4024 req
= urllib_req_to_req(req
)
4025 assert isinstance(req
, Request
)
4027 # compat: Assume user:pass url params are basic auth
4028 url
, basic_auth_header
= extract_basic_auth(req
.url
)
4029 if basic_auth_header
:
4030 req
.headers
['Authorization'] = basic_auth_header
4031 req
.url
= sanitize_url(url
)
4033 clean_proxies(proxies
=req
.proxies
, headers
=req
.headers
)
4034 clean_headers(req
.headers
)
4037 return self
._request
_director
.send(req
)
4038 except NoSupportingHandlers
as e
:
4039 for ue
in e
.unsupported_errors
:
4040 if not (ue
.handler
and ue
.msg
):
4042 if ue
.handler
.RH_KEY
== 'Urllib' and 'unsupported url scheme: "file"' in ue
.msg
.lower():
4044 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4045 'Use --enable-file-urls to enable at your own risk.', cause
=ue
) from ue
4047 except SSLError
as e
:
4048 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e
):
4049 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause
=e
) from e
4050 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e
):
4052 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4053 'Try using --legacy-server-connect', cause
=e
) from e
4055 except HTTPError
as e
: # TODO: Remove in a future release
4056 raise _CompatHTTPError(e
) from e
4058 def build_request_director(self
, handlers
):
4059 logger
= _YDLLogger(self
)
4060 headers
= self
.params
.get('http_headers').copy()
4061 proxies
= self
.proxies
.copy()
4062 clean_headers(headers
)
4063 clean_proxies(proxies
, headers
)
4065 director
= RequestDirector(logger
=logger
, verbose
=self
.params
.get('debug_printtraffic'))
4066 for handler
in handlers
:
4067 director
.add_handler(handler(
4070 cookiejar
=self
.cookiejar
,
4072 prefer_system_certs
='no-certifi' in self
.params
['compat_opts'],
4073 verify
=not self
.params
.get('nocheckcertificate'),
4074 **traverse_obj(self
.params
, {
4075 'verbose': 'debug_printtraffic',
4076 'source_address': 'source_address',
4077 'timeout': 'socket_timeout',
4078 'legacy_ssl_support': 'legacy_server_connect',
4079 'enable_file_urls': 'enable_file_urls',
4081 'client_certificate': 'client_certificate',
4082 'client_certificate_key': 'client_certificate_key',
4083 'client_certificate_password': 'client_certificate_password',
4089 def encode(self
, s
):
4090 if isinstance(s
, bytes):
4091 return s
# Already encoded
4094 return s
.encode(self
.get_encoding())
4095 except UnicodeEncodeError as err
:
4096 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
4099 def get_encoding(self
):
4100 encoding
= self
.params
.get('encoding')
4101 if encoding
is None:
4102 encoding
= preferredencoding()
4105 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
4106 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4107 if overwrite
is None:
4108 overwrite
= self
.params
.get('overwrites', True)
4109 if not self
.params
.get('writeinfojson'):
4112 self
.write_debug(f
'Skipping writing {label} infojson')
4114 elif not self
._ensure
_dir
_exists
(infofn
):
4116 elif not overwrite
and os
.path
.exists(infofn
):
4117 self
.to_screen(f
'[info] {label.title()} metadata is already present')
4120 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
4122 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
4125 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
4128 def _write_description(self
, label
, ie_result
, descfn
):
4129 ''' Write description and returns True = written, False = skip, None = error '''
4130 if not self
.params
.get('writedescription'):
4133 self
.write_debug(f
'Skipping writing {label} description')
4135 elif not self
._ensure
_dir
_exists
(descfn
):
4137 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
4138 self
.to_screen(f
'[info] {label.title()} description is already present')
4139 elif ie_result
.get('description') is None:
4140 self
.to_screen(f
'[info] There\'s no {label} description to write')
4144 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
4145 with open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
4146 descfile
.write(ie_result
['description'])
4148 self
.report_error(f
'Cannot write {label} description file {descfn}')
4152 def _write_subtitles(self
, info_dict
, filename
):
4153 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4155 subtitles
= info_dict
.get('requested_subtitles')
4156 if not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
4157 # subtitles download errors are already managed as troubles in relevant IE
4158 # that way it will silently go on when used with unsupporting IE
4161 self
.to_screen('[info] There are no subtitles for the requested languages')
4163 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
4164 if not sub_filename_base
:
4165 self
.to_screen('[info] Skipping writing video subtitles')
4168 for sub_lang
, sub_info
in subtitles
.items():
4169 sub_format
= sub_info
['ext']
4170 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
4171 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
4172 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
4174 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4175 sub_info
['filepath'] = existing_sub
4176 ret
.append((existing_sub
, sub_filename_final
))
4179 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
4180 if sub_info
.get('data') is not None:
4182 # Use newline='' to prevent conversion of newline characters
4183 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4184 with open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
4185 subfile
.write(sub_info
['data'])
4186 sub_info
['filepath'] = sub_filename
4187 ret
.append((sub_filename
, sub_filename_final
))
4190 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
4194 sub_copy
= sub_info
.copy()
4195 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
4196 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
4197 sub_info
['filepath'] = sub_filename
4198 ret
.append((sub_filename
, sub_filename_final
))
4199 except (DownloadError
, ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
4200 msg
= f
'Unable to download video subtitles for {sub_lang!r}: {err}'
4201 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
4202 if not self
.params
.get('ignoreerrors'):
4203 self
.report_error(msg
)
4204 raise DownloadError(msg
)
4205 self
.report_warning(msg
)
4208 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
4209 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4210 write_all
= self
.params
.get('write_all_thumbnails', False)
4211 thumbnails
, ret
= [], []
4212 if write_all
or self
.params
.get('writethumbnail', False):
4213 thumbnails
= info_dict
.get('thumbnails') or []
4215 self
.to_screen(f
'[info] There are no {label} thumbnails to download')
4217 multiple
= write_all
and len(thumbnails
) > 1
4219 if thumb_filename_base
is None:
4220 thumb_filename_base
= filename
4221 if thumbnails
and not thumb_filename_base
:
4222 self
.write_debug(f
'Skipping writing {label} thumbnail')
4225 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
4226 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
4227 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
4228 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
4229 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
4231 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
4233 self
.to_screen('[info] %s is already present' % (
4234 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize())
4235 t
['filepath'] = existing_thumb
4236 ret
.append((existing_thumb
, thumb_filename_final
))
4238 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
4240 uf
= self
.urlopen(Request(t
['url'], headers
=t
.get('http_headers', {})))
4241 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
4242 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
4243 shutil
.copyfileobj(uf
, thumbf
)
4244 ret
.append((thumb_filename
, thumb_filename_final
))
4245 t
['filepath'] = thumb_filename
4246 except network_exceptions
as err
:
4247 if isinstance(err
, HTTPError
) and err
.status
== 404:
4248 self
.to_screen(f
'[info] {thumb_display_id.title()} does not exist')
4250 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
4252 if ret
and not write_all
: