24 from string
import ascii_letters
26 from .cache
import Cache
27 from .compat
import HAS_LEGACY
as compat_has_legacy
28 from .compat
import compat_os_name
, compat_shlex_quote
29 from .cookies
import load_cookies
30 from .downloader
import FFmpegFD
, get_suitable_downloader
, shorten_protocol_name
31 from .downloader
.rtmp
import rtmpdump_version
32 from .extractor
import gen_extractor_classes
, get_info_extractor
33 from .extractor
.openload
import PhantomJSwrapper
34 from .minicurses
import format_text
35 from .postprocessor
import _PLUGIN_CLASSES
as plugin_postprocessors
36 from .postprocessor
import (
38 FFmpegFixupDuplicateMoovPP
,
39 FFmpegFixupDurationPP
,
42 FFmpegFixupStretchedPP
,
43 FFmpegFixupTimestampPP
,
46 FFmpegVideoConvertorPP
,
47 MoveFilesAfterDownloadPP
,
50 from .postprocessor
.ffmpeg
import resolve_mapping
as resolve_recode_mapping
51 from .update
import detect_variant
76 PerRequestProxyHandler
,
83 UnavailableVideoError
,
84 YoutubeDLCookieProcessor
,
86 YoutubeDLRedirectHandler
,
101 format_decimal_suffix
,
118 register_socks_protocols
,
119 remove_terminal_sequences
,
130 supports_terminal_sequences
,
139 windows_enable_vt_mode
,
143 from .version
import RELEASE_GIT_HEAD
, __version__
145 if compat_os_name
== 'nt':
152 YoutubeDL objects are the ones responsible of downloading the
153 actual video file and writing it to disk if the user has requested
154 it, among some other tasks. In most cases there should be one per
155 program. As, given a video URL, the downloader doesn't know how to
156 extract all the needed information, task that InfoExtractors do, it
157 has to pass the URL to one of them.
159 For this, YoutubeDL objects have a method that allows
160 InfoExtractors to be registered in a given order. When it is passed
161 a URL, the YoutubeDL object handles it to the first InfoExtractor it
162 finds that reports being able to handle it. The InfoExtractor extracts
163 all the information about the video or videos the URL refers to, and
164 YoutubeDL process the extracted information, possibly using a File
165 Downloader to download the video.
167 YoutubeDL objects accept a lot of parameters. In order not to saturate
168 the object constructor with arguments, it receives a dictionary of
169 options instead. These options are available through the params
170 attribute for the InfoExtractors to use. The YoutubeDL also
171 registers itself as the downloader in charge for the InfoExtractors
172 that are added to it, so this is a "mutual registration".
176 username: Username for authentication purposes.
177 password: Password for authentication purposes.
178 videopassword: Password for accessing a video.
179 ap_mso: Adobe Pass multiple-system operator identifier.
180 ap_username: Multiple-system operator account username.
181 ap_password: Multiple-system operator account password.
182 usenetrc: Use netrc for authentication instead.
183 verbose: Print additional info to stdout.
184 quiet: Do not print messages to stdout.
185 no_warnings: Do not print out anything for warnings.
186 forceprint: A dict with keys WHEN mapped to a list of templates to
187 print to stdout. The allowed keys are video or any of the
188 items in utils.POSTPROCESS_WHEN.
189 For compatibility, a single list is also accepted
190 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
191 a list of tuples with (template, filename)
192 forcejson: Force printing info_dict as JSON.
193 dump_single_json: Force printing the info_dict of the whole playlist
194 (or video) as a single JSON line.
195 force_write_download_archive: Force writing download archive regardless
196 of 'skip_download' or 'simulate'.
197 simulate: Do not download the video files. If unset (or None),
198 simulate only if listsubtitles, listformats or list_thumbnails is used
199 format: Video format code. see "FORMAT SELECTION" for more details.
200 You can also pass a function. The function takes 'ctx' as
201 argument and returns the formats to download.
202 See "build_format_selector" for an implementation
203 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
204 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
205 extracting metadata even if the video is not actually
206 available for download (experimental)
207 format_sort: A list of fields by which to sort the video formats.
208 See "Sorting Formats" for more details.
209 format_sort_force: Force the given format_sort. see "Sorting Formats"
211 prefer_free_formats: Whether to prefer video formats with free containers
212 over non-free ones of same quality.
213 allow_multiple_video_streams: Allow multiple video streams to be merged
215 allow_multiple_audio_streams: Allow multiple audio streams to be merged
217 check_formats Whether to test if the formats are downloadable.
218 Can be True (check all), False (check none),
219 'selected' (check selected formats),
220 or None (check only if requested by extractor)
221 paths: Dictionary of output paths. The allowed keys are 'home'
222 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
223 outtmpl: Dictionary of templates for output names. Allowed keys
224 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
225 For compatibility with youtube-dl, a single string can also be used
226 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
227 restrictfilenames: Do not allow "&" and spaces in file names
228 trim_file_name: Limit length of filename (extension excluded)
229 windowsfilenames: Force the filenames to be windows compatible
230 ignoreerrors: Do not stop on download/postprocessing errors.
231 Can be 'only_download' to ignore only download errors.
232 Default is 'only_download' for CLI, but False for API
233 skip_playlist_after_errors: Number of allowed failures until the rest of
234 the playlist is skipped
235 force_generic_extractor: Force downloader to use the generic extractor
236 overwrites: Overwrite all video and metadata files if True,
237 overwrite only non-video files if None
238 and don't overwrite any file if False
239 For compatibility with youtube-dl,
240 "nooverwrites" may also be used instead
241 playlist_items: Specific indices of playlist to download.
242 playlistrandom: Download playlist items in random order.
243 lazy_playlist: Process playlist entries as they are received.
244 matchtitle: Download only matching titles.
245 rejecttitle: Reject downloads for matching titles.
246 logger: Log messages to a logging.Logger instance.
247 logtostderr: Log messages to stderr instead of stdout.
248 consoletitle: Display progress in console window's titlebar.
249 writedescription: Write the video description to a .description file
250 writeinfojson: Write the video description to a .info.json file
251 clean_infojson: Remove private fields from the infojson
252 getcomments: Extract video comments. This will not be written to disk
253 unless writeinfojson is also given
254 writeannotations: Write the video annotations to a .annotations.xml file
255 writethumbnail: Write the thumbnail image to a file
256 allow_playlist_files: Whether to write playlists' description, infojson etc
257 also to disk when using the 'write*' options
258 write_all_thumbnails: Write all thumbnail formats to files
259 writelink: Write an internet shortcut file, depending on the
260 current platform (.url/.webloc/.desktop)
261 writeurllink: Write a Windows internet shortcut file (.url)
262 writewebloclink: Write a macOS internet shortcut file (.webloc)
263 writedesktoplink: Write a Linux internet shortcut file (.desktop)
264 writesubtitles: Write the video subtitles to a file
265 writeautomaticsub: Write the automatically generated subtitles to a file
266 listsubtitles: Lists all available subtitles for the video
267 subtitlesformat: The format code for subtitles
268 subtitleslangs: List of languages of the subtitles to download (can be regex).
269 The list may contain "all" to refer to all the available
270 subtitles. The language can be prefixed with a "-" to
271 exclude it from the requested languages. Eg: ['all', '-live_chat']
272 keepvideo: Keep the video file after post-processing
273 daterange: A DateRange object, download only if the upload_date is in the range.
274 skip_download: Skip the actual download of the video file
275 cachedir: Location of the cache files in the filesystem.
276 False to disable filesystem cache.
277 noplaylist: Download single video instead of a playlist if in doubt.
278 age_limit: An integer representing the user's age in years.
279 Unsuitable videos for the given age are skipped.
280 min_views: An integer representing the minimum view count the video
281 must have in order to not be skipped.
282 Videos without view count information are always
283 downloaded. None for no limit.
284 max_views: An integer representing the maximum view count.
285 Videos that are more popular than that are not
287 Videos without view count information are always
288 downloaded. None for no limit.
289 download_archive: File name of a file where all downloads are recorded.
290 Videos already present in the file are not downloaded
292 break_on_existing: Stop the download process after attempting to download a
293 file that is in the archive.
294 break_on_reject: Stop the download process when encountering a video that
295 has been filtered out.
296 break_per_url: Whether break_on_reject and break_on_existing
297 should act on each input URL as opposed to for the entire queue
298 cookiefile: File name or text stream from where cookies should be read and dumped to
299 cookiesfrombrowser: A tuple containing the name of the browser, the profile
300 name/pathfrom where cookies are loaded, and the name of the
301 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
302 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
303 support RFC 5746 secure renegotiation
304 nocheckcertificate: Do not verify SSL certificates
305 client_certificate: Path to client certificate file in PEM format. May include the private key
306 client_certificate_key: Path to private key file for client certificate
307 client_certificate_password: Password for client certificate private key, if encrypted.
308 If not provided and the key is encrypted, yt-dlp will ask interactively
309 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
310 At the moment, this is only supported by YouTube.
311 http_headers: A dictionary of custom headers to be used for all requests
312 proxy: URL of the proxy server to use
313 geo_verification_proxy: URL of the proxy to use for IP address verification
314 on geo-restricted sites.
315 socket_timeout: Time to wait for unresponsive hosts, in seconds
316 bidi_workaround: Work around buggy terminals without bidirectional text
317 support, using fridibi
318 debug_printtraffic:Print out sent and received HTTP traffic
319 default_search: Prepend this string if an input url is not valid.
320 'auto' for elaborate guessing
321 encoding: Use this encoding instead of the system-specified.
322 extract_flat: Do not resolve URLs, return the immediate result.
323 Pass in 'in_playlist' to only show this behavior for
325 wait_for_video: If given, wait for scheduled streams to become available.
326 The value should be a tuple containing the range
327 (min_secs, max_secs) to wait between retries
328 postprocessors: A list of dictionaries, each with an entry
329 * key: The name of the postprocessor. See
330 yt_dlp/postprocessor/__init__.py for a list.
331 * when: When to run the postprocessor. Allowed values are
332 the entries of utils.POSTPROCESS_WHEN
333 Assumed to be 'post_process' if not given
334 progress_hooks: A list of functions that get called on download
335 progress, with a dictionary with the entries
336 * status: One of "downloading", "error", or "finished".
337 Check this first and ignore unknown values.
338 * info_dict: The extracted info_dict
340 If status is one of "downloading", or "finished", the
341 following properties may also be present:
342 * filename: The final filename (always present)
343 * tmpfilename: The filename we're currently writing to
344 * downloaded_bytes: Bytes on disk
345 * total_bytes: Size of the whole file, None if unknown
346 * total_bytes_estimate: Guess of the eventual file size,
348 * elapsed: The number of seconds since download started.
349 * eta: The estimated time in seconds, None if unknown
350 * speed: The download speed in bytes/second, None if
352 * fragment_index: The counter of the currently
353 downloaded video fragment.
354 * fragment_count: The number of fragments (= individual
355 files that will be merged)
357 Progress hooks are guaranteed to be called at least once
358 (with status "finished") if the download is successful.
359 postprocessor_hooks: A list of functions that get called on postprocessing
360 progress, with a dictionary with the entries
361 * status: One of "started", "processing", or "finished".
362 Check this first and ignore unknown values.
363 * postprocessor: Name of the postprocessor
364 * info_dict: The extracted info_dict
366 Progress hooks are guaranteed to be called at least twice
367 (with status "started" and "finished") if the processing is successful.
368 merge_output_format: Extension to use when merging formats.
369 final_ext: Expected final extension; used to detect when the file was
370 already downloaded and converted
371 fixup: Automatically correct known faults of the file.
373 - "never": do nothing
374 - "warn": only emit a warning
375 - "detect_or_warn": check whether we can do anything
376 about it, warn otherwise (default)
377 source_address: Client-side IP address to bind to.
378 sleep_interval_requests: Number of seconds to sleep between requests
380 sleep_interval: Number of seconds to sleep before each download when
381 used alone or a lower bound of a range for randomized
382 sleep before each download (minimum possible number
383 of seconds to sleep) when used along with
385 max_sleep_interval:Upper bound of a range for randomized sleep before each
386 download (maximum possible number of seconds to sleep).
387 Must only be used along with sleep_interval.
388 Actual sleep time will be a random float from range
389 [sleep_interval; max_sleep_interval].
390 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
391 listformats: Print an overview of available video formats and exit.
392 list_thumbnails: Print a table of all thumbnails and exit.
393 match_filter: A function that gets called for every video with the signature
394 (info_dict, *, incomplete: bool) -> Optional[str]
395 For backward compatibility with youtube-dl, the signature
396 (info_dict) -> Optional[str] is also allowed.
397 - If it returns a message, the video is ignored.
398 - If it returns None, the video is downloaded.
399 - If it returns utils.NO_DEFAULT, the user is interactively
400 asked whether to download the video.
401 match_filter_func in utils.py is one example for this.
402 no_color: Do not emit color codes in output.
403 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
406 Two-letter ISO 3166-2 country code that will be used for
407 explicit geographic restriction bypassing via faking
408 X-Forwarded-For HTTP header
410 IP range in CIDR notation that will be used similarly to
412 external_downloader: A dictionary of protocol keys and the executable of the
413 external downloader to use for it. The allowed protocols
414 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
415 Set the value to 'native' to use the native downloader
416 compat_opts: Compatibility options. See "Differences in default behavior".
417 The following options do not work when used through the API:
418 filename, abort-on-error, multistreams, no-live-chat, format-sort
419 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
420 Refer __init__.py for their implementation
421 progress_template: Dictionary of templates for progress outputs.
422 Allowed keys are 'download', 'postprocess',
423 'download-title' (console title) and 'postprocess-title'.
424 The template is mapped on a dictionary with keys 'progress' and 'info'
425 retry_sleep_functions: Dictionary of functions that takes the number of attempts
426 as argument and returns the time to sleep in seconds.
427 Allowed keys are 'http', 'fragment', 'file_access'
428 download_ranges: A function that gets called for every video with the signature
429 (info_dict, *, ydl) -> Iterable[Section].
430 Only the returned sections will be downloaded. Each Section contains:
431 * start_time: Start time of the section in seconds
432 * end_time: End time of the section in seconds
433 * title: Section title (Optional)
434 * index: Section number (Optional)
436 The following parameters are not used by YoutubeDL itself, they are used by
437 the downloader (see yt_dlp/downloader/common.py):
438 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
439 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
440 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
441 external_downloader_args, concurrent_fragment_downloads.
443 The following options are used by the post processors:
444 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
445 to the binary or its containing directory.
446 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
447 and a list of additional command-line arguments for the
448 postprocessor/executable. The dict can also have "PP+EXE" keys
449 which are used when the given exe is used by the given PP.
450 Use 'default' as the name for arguments to passed to all PP
451 For compatibility with youtube-dl, a single list of args
454 The following options are used by the extractors:
455 extractor_retries: Number of times to retry for known errors
456 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
457 hls_split_discontinuity: Split HLS playlists to different formats at
458 discontinuities such as ad breaks (default: False)
459 extractor_args: A dictionary of arguments to be passed to the extractors.
460 See "EXTRACTOR ARGUMENTS" for details.
461 Eg: {'youtube': {'skip': ['dash', 'hls']}}
462 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
464 The following options are deprecated and may be removed in the future:
466 playliststart: - Use playlist_items
467 Playlist item to start at.
468 playlistend: - Use playlist_items
469 Playlist item to end at.
470 playlistreverse: - Use playlist_items
471 Download playlist items in reverse order.
472 forceurl: - Use forceprint
473 Force printing final URL.
474 forcetitle: - Use forceprint
475 Force printing title.
476 forceid: - Use forceprint
478 forcethumbnail: - Use forceprint
479 Force printing thumbnail URL.
480 forcedescription: - Use forceprint
481 Force printing description.
482 forcefilename: - Use forceprint
483 Force printing final filename.
484 forceduration: - Use forceprint
485 Force printing duration.
486 allsubtitles: - Use subtitleslangs = ['all']
487 Downloads all the subtitles of the video
488 (requires writesubtitles or writeautomaticsub)
489 include_ads: - Doesn't work
491 call_home: - Not implemented
492 Boolean, true iff we are allowed to contact the
493 yt-dlp servers for debugging.
494 post_hooks: - Register a custom postprocessor
495 A list of functions that get called as the final step
496 for each video file, after all postprocessors have been
497 called. The filename will be passed as the only argument.
498 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
499 Use the native HLS downloader instead of ffmpeg/avconv
500 if True, otherwise use ffmpeg/avconv if False, otherwise
501 use downloader suggested by extractor if None.
502 prefer_ffmpeg: - avconv support is deprecated
503 If False, use avconv instead of ffmpeg if both are available,
504 otherwise prefer ffmpeg.
505 youtube_include_dash_manifest: - Use extractor_args
506 If True (default), DASH manifests and related
507 data will be downloaded and processed by extractor.
508 You can reduce network I/O by disabling it if you don't
509 care about DASH. (only for youtube)
510 youtube_include_hls_manifest: - Use extractor_args
511 If True (default), HLS manifests and related
512 data will be downloaded and processed by extractor.
513 You can reduce network I/O by disabling it if you don't
514 care about HLS. (only for youtube)
518 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
519 'timestamp', 'release_timestamp',
520 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
521 'average_rating', 'comment_count', 'age_limit',
522 'start_time', 'end_time',
523 'chapter_number', 'season_number', 'episode_number',
524 'track_number', 'disc_number', 'release_year',
528 # NB: Keep in sync with the docstring of extractor/common.py
529 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
530 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
531 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
532 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
533 'preference', 'language', 'language_preference', 'quality', 'source_preference',
534 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
535 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
537 _format_selection_exts
= {
538 'audio': {'m4a', 'mp3', 'ogg', 'aac'}
,
539 'video': {'mp4', 'flv', 'webm', '3gp'}
,
540 'storyboards': {'mhtml'}
,
543 def __init__(self
, params
=None, auto_init
=True):
544 """Create a FileDownloader object with the given options.
545 @param auto_init Whether to load the default extractors and print header (if verbose).
546 Set to 'no_verbose_header' to not print the header
552 self
._ies
_instances
= {}
553 self
._pps
= {k: [] for k in POSTPROCESS_WHEN}
554 self
._printed
_messages
= set()
555 self
._first
_webpage
_request
= True
556 self
._post
_hooks
= []
557 self
._progress
_hooks
= []
558 self
._postprocessor
_hooks
= []
559 self
._download
_retcode
= 0
560 self
._num
_downloads
= 0
562 self
._playlist
_level
= 0
563 self
._playlist
_urls
= set()
564 self
.cache
= Cache(self
)
566 windows_enable_vt_mode()
567 stdout
= sys
.stderr
if self
.params
.get('logtostderr') else sys
.stdout
568 self
._out
_files
= Namespace(
571 screen
=sys
.stderr
if self
.params
.get('quiet') else stdout
,
572 console
=None if compat_os_name
== 'nt' else next(
573 filter(supports_terminal_sequences
, (sys
.stderr
, sys
.stdout
)), None)
575 self
._allow
_colors
= Namespace(**{
576 type_
: not self
.params
.get('no_color') and supports_terminal_sequences(stream
)
577 for type_
, stream
in self
._out
_files
.items_
if type_
!= 'console'
580 MIN_SUPPORTED
, MIN_RECOMMENDED
= (3, 6), (3, 7)
581 current_version
= sys
.version_info
[:2]
582 if current_version
< MIN_RECOMMENDED
:
583 msg
= ('Support for Python version %d.%d has been deprecated. '
584 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details. '
585 'You will recieve only one more update on this version')
586 if current_version
< MIN_SUPPORTED
:
587 msg
= 'Python version %d.%d is no longer supported'
588 self
.deprecation_warning(
589 f
'{msg}! Please update to Python %d.%d or above' % (*current_version
, *MIN_RECOMMENDED
))
591 if self
.params
.get('allow_unplayable_formats'):
593 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
594 'This is a developer option intended for debugging. \n'
595 ' If you experience any issues while using this option, '
596 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
598 def check_deprecated(param
, option
, suggestion
):
599 if self
.params
.get(param
) is not None:
600 self
.report_warning(f
'{option} is deprecated. Use {suggestion} instead')
604 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
605 if self
.params
.get('geo_verification_proxy') is None:
606 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
608 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
609 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
610 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
612 for msg
in self
.params
.get('_warnings', []):
613 self
.report_warning(msg
)
614 for msg
in self
.params
.get('_deprecation_warnings', []):
615 self
.deprecation_warning(msg
)
617 self
.params
['compat_opts'] = set(self
.params
.get('compat_opts', ()))
618 if not compat_has_legacy
:
619 self
.params
['compat_opts'].add('no-compat-legacy')
620 if 'list-formats' in self
.params
['compat_opts']:
621 self
.params
['listformats_table'] = False
623 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
624 # nooverwrites was unnecessarily changed to overwrites
625 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
626 # This ensures compatibility with both keys
627 self
.params
['overwrites'] = not self
.params
['nooverwrites']
628 elif self
.params
.get('overwrites') is None:
629 self
.params
.pop('overwrites', None)
631 self
.params
['nooverwrites'] = not self
.params
['overwrites']
633 self
.params
.setdefault('forceprint', {})
634 self
.params
.setdefault('print_to_file', {})
636 # Compatibility with older syntax
637 if not isinstance(params
['forceprint'], dict):
638 self
.params
['forceprint'] = {'video': params['forceprint']}
640 if self
.params
.get('bidi_workaround', False):
643 master
, slave
= pty
.openpty()
644 width
= shutil
.get_terminal_size().columns
645 width_args
= [] if width
is None else ['-w', str(width
)]
646 sp_kwargs
= {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
648 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
650 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
651 self
._output
_channel
= os
.fdopen(master
, 'rb')
652 except OSError as ose
:
653 if ose
.errno
== errno
.ENOENT
:
655 'Could not find fribidi executable, ignoring --bidi-workaround. '
656 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
661 if auto_init
!= 'no_verbose_header':
662 self
.print_debug_header()
663 self
.add_default_info_extractors()
665 if (sys
.platform
!= 'win32'
666 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
667 and not self
.params
.get('restrictfilenames', False)):
668 # Unicode filesystem API will throw errors (#1474, #13027)
670 'Assuming --restrict-filenames since file system encoding '
671 'cannot encode all characters. '
672 'Set the LC_ALL environment variable to fix this.')
673 self
.params
['restrictfilenames'] = True
675 self
._parse
_outtmpl
()
677 # Creating format selector here allows us to catch syntax errors before the extraction
678 self
.format_selector
= (
679 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
680 else self
.params
['format'] if callable(self
.params
['format'])
681 else self
.build_format_selector(self
.params
['format']))
683 # Set http_headers defaults according to std_headers
684 self
.params
['http_headers'] = merge_headers(std_headers
, self
.params
.get('http_headers', {}))
687 'post_hooks': self
.add_post_hook
,
688 'progress_hooks': self
.add_progress_hook
,
689 'postprocessor_hooks': self
.add_postprocessor_hook
,
691 for opt
, fn
in hooks
.items():
692 for ph
in self
.params
.get(opt
, []):
695 for pp_def_raw
in self
.params
.get('postprocessors', []):
696 pp_def
= dict(pp_def_raw
)
697 when
= pp_def
.pop('when', 'post_process')
698 self
.add_post_processor(
699 get_postprocessor(pp_def
.pop('key'))(self
, **pp_def
),
703 register_socks_protocols()
705 def preload_download_archive(fn
):
706 """Preload the archive, if any is specified"""
709 self
.write_debug(f
'Loading archive file {fn!r}')
711 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
712 for line
in archive_file
:
713 self
.archive
.add(line
.strip())
714 except OSError as ioe
:
715 if ioe
.errno
!= errno
.ENOENT
:
721 preload_download_archive(self
.params
.get('download_archive'))
723 def warn_if_short_id(self
, argv
):
724 # short YouTube ID starting with dash?
726 i
for i
, a
in enumerate(argv
)
727 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
731 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
732 + ['--'] + [argv
[i
] for i
in idxs
]
735 'Long argument string detected. '
736 'Use -- to separate parameters and URLs, like this:\n%s' %
737 args_to_str(correct_argv
))
739 def add_info_extractor(self
, ie
):
740 """Add an InfoExtractor object to the end of the list."""
742 self
._ies
[ie_key
] = ie
743 if not isinstance(ie
, type):
744 self
._ies
_instances
[ie_key
] = ie
745 ie
.set_downloader(self
)
747 def _get_info_extractor_class(self
, ie_key
):
748 ie
= self
._ies
.get(ie_key
)
750 ie
= get_info_extractor(ie_key
)
751 self
.add_info_extractor(ie
)
754 def get_info_extractor(self
, ie_key
):
756 Get an instance of an IE with name ie_key, it will try to get one from
757 the _ies list, if there's no instance it will create a new one and add
758 it to the extractor list.
760 ie
= self
._ies
_instances
.get(ie_key
)
762 ie
= get_info_extractor(ie_key
)()
763 self
.add_info_extractor(ie
)
766 def add_default_info_extractors(self
):
768 Add the InfoExtractors returned by gen_extractors to the end of the list
770 for ie
in gen_extractor_classes():
771 self
.add_info_extractor(ie
)
773 def add_post_processor(self
, pp
, when
='post_process'):
774 """Add a PostProcessor object to the end of the chain."""
775 assert when
in POSTPROCESS_WHEN
, f
'Invalid when={when}'
776 self
._pps
[when
].append(pp
)
777 pp
.set_downloader(self
)
779 def add_post_hook(self
, ph
):
780 """Add the post hook"""
781 self
._post
_hooks
.append(ph
)
783 def add_progress_hook(self
, ph
):
784 """Add the download progress hook"""
785 self
._progress
_hooks
.append(ph
)
787 def add_postprocessor_hook(self
, ph
):
788 """Add the postprocessing progress hook"""
789 self
._postprocessor
_hooks
.append(ph
)
790 for pps
in self
._pps
.values():
792 pp
.add_progress_hook(ph
)
794 def _bidi_workaround(self
, message
):
795 if not hasattr(self
, '_output_channel'):
798 assert hasattr(self
, '_output_process')
799 assert isinstance(message
, str)
800 line_count
= message
.count('\n') + 1
801 self
._output
_process
.stdin
.write((message
+ '\n').encode())
802 self
._output
_process
.stdin
.flush()
803 res
= ''.join(self
._output
_channel
.readline().decode()
804 for _
in range(line_count
))
805 return res
[:-len('\n')]
807 def _write_string(self
, message
, out
=None, only_once
=False):
809 if message
in self
._printed
_messages
:
811 self
._printed
_messages
.add(message
)
812 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
814 def to_stdout(self
, message
, skip_eol
=False, quiet
=None):
815 """Print message to stdout"""
816 if quiet
is not None:
817 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
818 if skip_eol
is not False:
819 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
820 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.out
)
822 def to_screen(self
, message
, skip_eol
=False, quiet
=None):
823 """Print message to screen if not in quiet mode"""
824 if self
.params
.get('logger'):
825 self
.params
['logger'].debug(message
)
827 if (self
.params
.get('quiet') if quiet
is None else quiet
) and not self
.params
.get('verbose'):
830 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
831 self
._out
_files
.screen
)
833 def to_stderr(self
, message
, only_once
=False):
834 """Print message to stderr"""
835 assert isinstance(message
, str)
836 if self
.params
.get('logger'):
837 self
.params
['logger'].error(message
)
839 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.error
, only_once
=only_once
)
841 def _send_console_code(self
, code
):
842 if compat_os_name
== 'nt' or not self
._out
_files
.console
:
844 self
._write
_string
(code
, self
._out
_files
.console
)
846 def to_console_title(self
, message
):
847 if not self
.params
.get('consoletitle', False):
849 message
= remove_terminal_sequences(message
)
850 if compat_os_name
== 'nt':
851 if ctypes
.windll
.kernel32
.GetConsoleWindow():
852 # c_wchar_p() might not be necessary if `message` is
853 # already of type unicode()
854 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
856 self
._send
_console
_code
(f
'\033]0;{message}\007')
858 def save_console_title(self
):
859 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
861 self
._send
_console
_code
('\033[22;0t') # Save the title on stack
863 def restore_console_title(self
):
864 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
866 self
._send
_console
_code
('\033[23;0t') # Restore the title from stack
869 self
.save_console_title()
872 def __exit__(self
, *args
):
873 self
.restore_console_title()
875 if self
.params
.get('cookiefile') is not None:
876 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
878 def trouble(self
, message
=None, tb
=None, is_error
=True):
879 """Determine action to take when a download problem appears.
881 Depending on if the downloader has been configured to ignore
882 download errors or not, this method may throw an exception or
883 not when errors are found, after printing the message.
885 @param tb If given, is additional traceback information
886 @param is_error Whether to raise error according to ignorerrors
888 if message
is not None:
889 self
.to_stderr(message
)
890 if self
.params
.get('verbose'):
892 if sys
.exc_info()[0]: # if .trouble has been called from an except block
894 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
895 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
896 tb
+= encode_compat_str(traceback
.format_exc())
898 tb_data
= traceback
.format_list(traceback
.extract_stack())
899 tb
= ''.join(tb_data
)
904 if not self
.params
.get('ignoreerrors'):
905 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
906 exc_info
= sys
.exc_info()[1].exc_info
908 exc_info
= sys
.exc_info()
909 raise DownloadError(message
, exc_info
)
910 self
._download
_retcode
= 1
914 EMPHASIS
='light blue',
920 SUPPRESS
='light black',
923 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
927 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
928 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', None) or 'ascii'
929 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
930 if fallback
is not None and text
!= original_text
:
932 return format_text(text
, f
) if allow_colors
else text
if fallback
is None else fallback
934 def _format_out(self
, *args
, **kwargs
):
935 return self
._format
_text
(self
._out
_files
.out
, self
._allow
_colors
.out
, *args
, **kwargs
)
937 def _format_screen(self
, *args
, **kwargs
):
938 return self
._format
_text
(self
._out
_files
.screen
, self
._allow
_colors
.screen
, *args
, **kwargs
)
940 def _format_err(self
, *args
, **kwargs
):
941 return self
._format
_text
(self
._out
_files
.error
, self
._allow
_colors
.error
, *args
, **kwargs
)
943 def report_warning(self
, message
, only_once
=False):
945 Print the message to stderr, it will be prefixed with 'WARNING:'
946 If stderr is a tty file the 'WARNING:' will be colored
948 if self
.params
.get('logger') is not None:
949 self
.params
['logger'].warning(message
)
951 if self
.params
.get('no_warnings'):
953 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
955 def deprecation_warning(self
, message
):
956 if self
.params
.get('logger') is not None:
957 self
.params
['logger'].warning(f
'DeprecationWarning: {message}')
959 self
.to_stderr(f
'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
961 def report_error(self
, message
, *args
, **kwargs
):
963 Do the same as trouble, but prefixes the message with 'ERROR:', colored
964 in red if stderr is a tty file.
966 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
968 def write_debug(self
, message
, only_once
=False):
969 '''Log debug message or Print message to stderr'''
970 if not self
.params
.get('verbose', False):
972 message
= f
'[debug] {message}'
973 if self
.params
.get('logger'):
974 self
.params
['logger'].debug(message
)
976 self
.to_stderr(message
, only_once
)
978 def report_file_already_downloaded(self
, file_name
):
979 """Report file has already been fully downloaded."""
981 self
.to_screen('[download] %s has already been downloaded' % file_name
)
982 except UnicodeEncodeError:
983 self
.to_screen('[download] The file has already been downloaded')
985 def report_file_delete(self
, file_name
):
986 """Report that existing file will be deleted."""
988 self
.to_screen('Deleting existing file %s' % file_name
)
989 except UnicodeEncodeError:
990 self
.to_screen('Deleting existing file')
992 def raise_no_formats(self
, info
, forced
=False, *, msg
=None):
993 has_drm
= info
.get('_has_drm')
994 ignored
, expected
= self
.params
.get('ignore_no_formats_error'), bool(msg
)
995 msg
= msg
or has_drm
and 'This video is DRM protected' or 'No video formats found!'
996 if forced
or not ignored
:
997 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
998 expected
=has_drm
or ignored
or expected
)
1000 self
.report_warning(msg
)
1002 def parse_outtmpl(self
):
1003 self
.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1004 self
._parse
_outtmpl
()
1005 return self
.params
['outtmpl']
1007 def _parse_outtmpl(self
):
1009 if self
.params
.get('restrictfilenames'): # Remove spaces in the default template
1010 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
1012 outtmpl
= self
.params
.setdefault('outtmpl', {})
1013 if not isinstance(outtmpl
, dict):
1014 self
.params
['outtmpl'] = outtmpl
= {'default': outtmpl}
1015 outtmpl
.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}
)
1017 def get_output_path(self
, dir_type
='', filename
=None):
1018 paths
= self
.params
.get('paths', {})
1019 assert isinstance(paths
, dict)
1020 path
= os
.path
.join(
1021 expand_path(paths
.get('home', '').strip()),
1022 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
1024 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1027 def _outtmpl_expandpath(outtmpl
):
1028 # expand_path translates '%%' into '%' and '$$' into '$'
1029 # correspondingly that is not what we want since we need to keep
1030 # '%%' intact for template dict substitution step. Working around
1031 # with boundary-alike separator hack.
1032 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
1033 outtmpl
= outtmpl
.replace('%%', f
'%{sep}%').replace('$$', f
'${sep}$')
1035 # outtmpl should be expand_path'ed before template dict substitution
1036 # because meta fields may contain env variables we don't want to
1037 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1038 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1039 return expand_path(outtmpl
).replace(sep
, '')
1042 def escape_outtmpl(outtmpl
):
1043 ''' Escape any remaining strings like %s, %abc% etc. '''
1045 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1046 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1050 def validate_outtmpl(cls
, outtmpl
):
1051 ''' @return None or Exception object '''
1053 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljhqBUDS]'),
1054 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1055 cls
._outtmpl
_expandpath
(outtmpl
))
1057 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1059 except ValueError as err
:
1063 def _copy_infodict(info_dict
):
1064 info_dict
= dict(info_dict
)
1065 info_dict
.pop('__postprocessors', None)
1066 info_dict
.pop('__pending_error', None)
1069 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1070 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1071 @param sanitize Whether to sanitize the output as a filename.
1072 For backward compatibility, a function can also be passed
1075 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1077 info_dict
= self
._copy
_infodict
(info_dict
)
1078 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1079 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1080 if info_dict
.get('duration', None) is not None
1082 info_dict
['autonumber'] = int(self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
)
1083 info_dict
['video_autonumber'] = self
._num
_videos
1084 if info_dict
.get('resolution') is None:
1085 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1087 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1088 # of %(field)s to %(field)0Nd for backward compatibility
1089 field_size_compat_map
= {
1090 'playlist_index': number_of_digits(info_dict
.get('__last_playlist_index') or 0),
1091 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1092 'autonumber': self
.params
.get('autonumber_size') or 5,
1096 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1101 # Field is of the form key1.key2...
1102 # where keys (except first) can be string, int or slice
1103 FIELD_RE
= r
'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num
=r
'(?:-?\d+)')
1104 MATH_FIELD_RE
= rf
'(?:{FIELD_RE}|-?{NUMBER_RE})'
1105 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
1106 INTERNAL_FORMAT_RE
= re
.compile(rf
'''(?x)
1108 (?P<fields>{FIELD_RE})
1109 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1110 (?:>(?P<strf_format>.+?))?
1112 (?P<alternate>(?<!\\),[^|&)]+)?
1113 (?:&(?P<replacement>.*?))?
1114 (?:\|(?P<default>.*?))?
1117 def _traverse_infodict(k
):
1121 return traverse_obj(info_dict
, k
, is_user_input
=True, traverse_string
=True)
1123 def get_value(mdict
):
1125 value
= _traverse_infodict(mdict
['fields'])
1128 value
= float_or_none(value
)
1129 if value
is not None:
1132 offset_key
= mdict
['maths']
1134 value
= float_or_none(value
)
1138 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1139 offset_key
).group(0)
1140 offset_key
= offset_key
[len(item
):]
1141 if operator
is None:
1142 operator
= MATH_FUNCTIONS
[item
]
1144 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1145 offset
= float_or_none(item
)
1147 offset
= float_or_none(_traverse_infodict(item
))
1149 value
= operator(value
, multiplier
* offset
)
1150 except (TypeError, ZeroDivisionError):
1153 # Datetime formatting
1154 if mdict
['strf_format']:
1155 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1159 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1161 def filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames')):
1162 return sanitize_filename(str(value
), restricted
=restricted
, is_id
=(
1163 bool(re
.search(r
'(^|[_.])id(\.|$)', key
))
1164 if 'filename-sanitization' in self
.params
['compat_opts']
1167 sanitizer
= sanitize
if callable(sanitize
) else filename_sanitizer
1168 sanitize
= bool(sanitize
)
1170 def _dumpjson_default(obj
):
1171 if isinstance(obj
, (set, LazyList
)):
1175 def create_key(outer_mobj
):
1176 if not outer_mobj
.group('has_key'):
1177 return outer_mobj
.group(0)
1178 key
= outer_mobj
.group('key')
1179 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1180 initial_field
= mobj
.group('fields') if mobj
else ''
1181 value
, replacement
, default
= None, None, na
1183 mobj
= mobj
.groupdict()
1184 default
= mobj
['default'] if mobj
['default'] is not None else default
1185 value
= get_value(mobj
)
1186 replacement
= mobj
['replacement']
1187 if value
is None and mobj
['alternate']:
1188 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['remaining'][1:])
1192 fmt
= outer_mobj
.group('format')
1193 if fmt
== 's' and value
is not None and key
in field_size_compat_map
.keys():
1194 fmt
= f
'0{field_size_compat_map[key]:d}d'
1196 value
= default
if value
is None else value
if replacement
is None else replacement
1198 flags
= outer_mobj
.group('conversion') or ''
1199 str_fmt
= f
'{fmt[:-1]}s'
1200 if fmt
[-1] == 'l': # list
1201 delim
= '\n' if '#' in flags
else ', '
1202 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes)))), str_fmt
1203 elif fmt
[-1] == 'j': # json
1204 value
, fmt
= json
.dumps(value
, default
=_dumpjson_default
, indent
=4 if '#' in flags
else None), str_fmt
1205 elif fmt
[-1] == 'h': # html
1206 value
, fmt
= escapeHTML(value
), str_fmt
1207 elif fmt
[-1] == 'q': # quoted
1208 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1209 value
, fmt
= ' '.join(map(compat_shlex_quote
, value
)), str_fmt
1210 elif fmt
[-1] == 'B': # bytes
1211 value
= f
'%{str_fmt}'.encode() % str(value
).encode()
1212 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1213 elif fmt
[-1] == 'U': # unicode normalized
1214 value
, fmt
= unicodedata
.normalize(
1215 # "+" = compatibility equivalence, "#" = NFD
1216 'NF%s%s' % ('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1218 elif fmt
[-1] == 'D': # decimal suffix
1219 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1220 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1221 factor
=1024 if '#' in flags
else 1000)
1222 elif fmt
[-1] == 'S': # filename sanitization
1223 value
, fmt
= filename_sanitizer(initial_field
, value
, restricted
='#' in flags
), str_fmt
1224 elif fmt
[-1] == 'c':
1226 value
= str(value
)[0]
1229 elif fmt
[-1] not in 'rs': # numeric
1230 value
= float_or_none(value
)
1232 value
, fmt
= default
, 's'
1236 # If value is an object, sanitize might convert it to a string
1237 # So we convert it to repr first
1238 value
, fmt
= repr(value
), str_fmt
1239 if fmt
[-1] in 'csr':
1240 value
= sanitizer(initial_field
, value
)
1242 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1243 TMPL_DICT
[key
] = value
1244 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1246 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1248 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1249 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1250 return self
.escape_outtmpl(outtmpl
) % info_dict
1252 def _prepare_filename(self
, info_dict
, *, outtmpl
=None, tmpl_type
=None):
1253 assert None in (outtmpl
, tmpl_type
), 'outtmpl and tmpl_type are mutually exclusive'
1255 outtmpl
= self
.params
['outtmpl'].get(tmpl_type
or 'default', self
.params
['outtmpl']['default'])
1257 outtmpl
= self
._outtmpl
_expandpath
(outtmpl
)
1258 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1262 if tmpl_type
in ('', 'temp'):
1263 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1264 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1265 filename
= replace_extension(filename
, ext
, final_ext
)
1267 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1269 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1271 # https://github.com/blackjack4494/youtube-dlc/issues/85
1272 trim_file_name
= self
.params
.get('trim_file_name', False)
1274 no_ext
, *ext
= filename
.rsplit('.', 2)
1275 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1278 except ValueError as err
:
1279 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1282 def prepare_filename(self
, info_dict
, dir_type
='', *, outtmpl
=None, warn
=False):
1283 """Generate the output filename"""
1285 assert not dir_type
, 'outtmpl and dir_type are mutually exclusive'
1287 filename
= self
._prepare
_filename
(info_dict
, tmpl_type
=dir_type
, outtmpl
=outtmpl
)
1288 if not filename
and dir_type
not in ('', 'temp'):
1292 if not self
.params
.get('paths'):
1294 elif filename
== '-':
1295 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1296 elif os
.path
.isabs(filename
):
1297 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1298 if filename
== '-' or not filename
:
1301 return self
.get_output_path(dir_type
, filename
)
1303 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1304 """ Returns None if the file should be downloaded """
1306 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
1309 if 'title' in info_dict
:
1310 # This can happen when we're just evaluating the playlist
1311 title
= info_dict
['title']
1312 matchtitle
= self
.params
.get('matchtitle', False)
1314 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1315 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1316 rejecttitle
= self
.params
.get('rejecttitle', False)
1318 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1319 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1320 date
= info_dict
.get('upload_date')
1321 if date
is not None:
1322 dateRange
= self
.params
.get('daterange', DateRange())
1323 if date
not in dateRange
:
1324 return f
'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1325 view_count
= info_dict
.get('view_count')
1326 if view_count
is not None:
1327 min_views
= self
.params
.get('min_views')
1328 if min_views
is not None and view_count
< min_views
:
1329 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1330 max_views
= self
.params
.get('max_views')
1331 if max_views
is not None and view_count
> max_views
:
1332 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1333 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1334 return 'Skipping "%s" because it is age restricted' % video_title
1336 match_filter
= self
.params
.get('match_filter')
1337 if match_filter
is not None:
1339 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1341 # For backward compatibility
1342 ret
= None if incomplete
else match_filter(info_dict
)
1343 if ret
is NO_DEFAULT
:
1345 filename
= self
._format
_screen
(self
.prepare_filename(info_dict
), self
.Styles
.FILENAME
)
1346 reply
= input(self
._format
_screen
(
1347 f
'Download "{filename}"? (Y/n): ', self
.Styles
.EMPHASIS
)).lower().strip()
1348 if reply
in {'y', ''}
:
1351 return f
'Skipping {video_title}'
1352 elif ret
is not None:
1356 if self
.in_download_archive(info_dict
):
1357 reason
= '%s has already been recorded in the archive' % video_title
1358 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1360 reason
= check_filter()
1361 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1362 if reason
is not None:
1364 self
.to_screen('[download] ' + reason
)
1365 if self
.params
.get(break_opt
, False):
1370 def add_extra_info(info_dict
, extra_info
):
1371 '''Set the keys from extra_info in info dict if they are missing'''
1372 for key
, value
in extra_info
.items():
1373 info_dict
.setdefault(key
, value
)
1375 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1376 process
=True, force_generic_extractor
=False):
1378 Return a list with a dictionary for each video extracted.
1381 url -- URL to extract
1384 download -- whether to download videos during extraction
1385 ie_key -- extractor key hint
1386 extra_info -- dictionary containing the extra values to add to each result
1387 process -- whether to resolve all unresolved references (URLs, playlist items),
1388 must be True for download to work.
1389 force_generic_extractor -- force using the generic extractor
1392 if extra_info
is None:
1395 if not ie_key
and force_generic_extractor
:
1399 ies
= {ie_key: self._get_info_extractor_class(ie_key)}
1403 for ie_key
, ie
in ies
.items():
1404 if not ie
.suitable(url
):
1407 if not ie
.working():
1408 self
.report_warning('The program functionality for this site has been marked as broken, '
1409 'and will probably not work.')
1411 temp_id
= ie
.get_temp_id(url
)
1412 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
1413 self
.to_screen(f
'[{ie_key}] {temp_id}: has already been recorded in the archive')
1414 if self
.params
.get('break_on_existing', False):
1415 raise ExistingVideoReached()
1417 return self
.__extract
_info
(url
, self
.get_info_extractor(ie_key
), download
, extra_info
, process
)
1419 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
1421 def _handle_extraction_exceptions(func
):
1422 @functools.wraps(func
)
1423 def wrapper(self
, *args
, **kwargs
):
1426 return func(self
, *args
, **kwargs
)
1427 except (DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1429 except ReExtractInfo
as e
:
1431 self
.to_screen(f
'{e}; Re-extracting data')
1433 self
.to_stderr('\r')
1434 self
.report_warning(f
'{e}; Re-extracting data')
1436 except GeoRestrictedError
as e
:
1439 msg
+= '\nThis video is available in %s.' % ', '.join(
1440 map(ISO3166Utils
.short2full
, e
.countries
))
1441 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1442 self
.report_error(msg
)
1443 except ExtractorError
as e
: # An error we somewhat expected
1444 self
.report_error(str(e
), e
.format_traceback())
1445 except Exception as e
:
1446 if self
.params
.get('ignoreerrors'):
1447 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1453 def _wait_for_video(self
, ie_result
):
1454 if (not self
.params
.get('wait_for_video')
1455 or ie_result
.get('_type', 'video') != 'video'
1456 or ie_result
.get('formats') or ie_result
.get('url')):
1459 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1464 self
.to_screen(msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r', skip_eol
=True)
1467 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1468 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1469 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1470 diff
= round(random
.uniform(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
), 0)
1471 self
.report_warning('Release time of video is not known')
1472 elif (diff
or 0) <= 0:
1473 self
.report_warning('Video should already be available according to extracted info')
1474 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1475 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1477 wait_till
= time
.time() + diff
1480 diff
= wait_till
- time
.time()
1483 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1484 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1486 except KeyboardInterrupt:
1488 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1489 except BaseException
as e
:
1490 if not isinstance(e
, ReExtractInfo
):
1494 @_handle_extraction_exceptions
1495 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1496 ie_result
= ie
.extract(url
)
1497 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1498 self
.report_warning(f
'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1500 if isinstance(ie_result
, list):
1501 # Backwards compatibility: old IE result format
1503 '_type': 'compat_list',
1504 'entries': ie_result
,
1506 if extra_info
.get('original_url'):
1507 ie_result
.setdefault('original_url', extra_info
['original_url'])
1508 self
.add_default_extra_info(ie_result
, ie
, url
)
1510 self
._wait
_for
_video
(ie_result
)
1511 return self
.process_ie_result(ie_result
, download
, extra_info
)
1515 def add_default_extra_info(self
, ie_result
, ie
, url
):
1517 self
.add_extra_info(ie_result
, {
1519 'original_url': url
,
1521 webpage_url
= ie_result
.get('webpage_url')
1523 self
.add_extra_info(ie_result
, {
1524 'webpage_url_basename': url_basename(webpage_url
),
1525 'webpage_url_domain': get_domain(webpage_url
),
1528 self
.add_extra_info(ie_result
, {
1529 'extractor': ie
.IE_NAME
,
1530 'extractor_key': ie
.ie_key(),
1533 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1535 Take the result of the ie(may be modified) and resolve all unresolved
1536 references (URLs, playlist items).
1538 It will also download the videos if 'download'.
1539 Returns the resolved ie_result.
1541 if extra_info
is None:
1543 result_type
= ie_result
.get('_type', 'video')
1545 if result_type
in ('url', 'url_transparent'):
1546 ie_result
['url'] = sanitize_url(ie_result
['url'])
1547 if ie_result
.get('original_url'):
1548 extra_info
.setdefault('original_url', ie_result
['original_url'])
1550 extract_flat
= self
.params
.get('extract_flat', False)
1551 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1552 or extract_flat
is True):
1553 info_copy
= ie_result
.copy()
1554 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1555 if ie
and not ie_result
.get('id'):
1556 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1557 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1558 self
.add_extra_info(info_copy
, extra_info
)
1559 info_copy
, _
= self
.pre_process(info_copy
)
1560 self
.__forced
_printings
(info_copy
, self
.prepare_filename(info_copy
), incomplete
=True)
1561 self
._raise
_pending
_errors
(info_copy
)
1562 if self
.params
.get('force_write_download_archive', False):
1563 self
.record_download_archive(info_copy
)
1566 if result_type
== 'video':
1567 self
.add_extra_info(ie_result
, extra_info
)
1568 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1569 self
._raise
_pending
_errors
(ie_result
)
1570 additional_urls
= (ie_result
or {}).get('additional_urls')
1572 # TODO: Improve MetadataParserPP to allow setting a list
1573 if isinstance(additional_urls
, str):
1574 additional_urls
= [additional_urls
]
1576 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1577 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1578 ie_result
['additional_entries'] = [
1580 url
, download
, extra_info
=extra_info
,
1581 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1582 for url
in additional_urls
1585 elif result_type
== 'url':
1586 # We have to add extra_info to the results because it may be
1587 # contained in a playlist
1588 return self
.extract_info(
1589 ie_result
['url'], download
,
1590 ie_key
=ie_result
.get('ie_key'),
1591 extra_info
=extra_info
)
1592 elif result_type
== 'url_transparent':
1593 # Use the information from the embedding page
1594 info
= self
.extract_info(
1595 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1596 extra_info
=extra_info
, download
=False, process
=False)
1598 # extract_info may return None when ignoreerrors is enabled and
1599 # extraction failed with an error, don't crash and return early
1604 exempted_fields
= {'_type', 'url', 'ie_key'}
1605 if not ie_result
.get('section_end') and ie_result
.get('section_start') is None:
1606 # For video clips, the id etc of the clip extractor should be used
1607 exempted_fields |
= {'id', 'extractor', 'extractor_key'}
1609 new_result
= info
.copy()
1610 new_result
.update(filter_dict(ie_result
, lambda k
, v
: v
is not None and k
not in exempted_fields
))
1612 # Extracted info may not be a video result (i.e.
1613 # info.get('_type', 'video') != video) but rather an url or
1614 # url_transparent. In such cases outer metadata (from ie_result)
1615 # should be propagated to inner one (info). For this to happen
1616 # _type of info should be overridden with url_transparent. This
1617 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1618 if new_result
.get('_type') == 'url':
1619 new_result
['_type'] = 'url_transparent'
1621 return self
.process_ie_result(
1622 new_result
, download
=download
, extra_info
=extra_info
)
1623 elif result_type
in ('playlist', 'multi_video'):
1624 # Protect from infinite recursion due to recursively nested playlists
1625 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1626 webpage_url
= ie_result
['webpage_url']
1627 if webpage_url
in self
._playlist
_urls
:
1629 '[download] Skipping already downloaded playlist: %s'
1630 % ie_result
.get('title') or ie_result
.get('id'))
1633 self
._playlist
_level
+= 1
1634 self
._playlist
_urls
.add(webpage_url
)
1635 self
._fill
_common
_fields
(ie_result
, False)
1636 self
._sanitize
_thumbnails
(ie_result
)
1638 return self
.__process
_playlist
(ie_result
, download
)
1640 self
._playlist
_level
-= 1
1641 if not self
._playlist
_level
:
1642 self
._playlist
_urls
.clear()
1643 elif result_type
== 'compat_list':
1644 self
.report_warning(
1645 'Extractor %s returned a compat_list result. '
1646 'It needs to be updated.' % ie_result
.get('extractor'))
1649 self
.add_extra_info(r
, {
1650 'extractor': ie_result
['extractor'],
1651 'webpage_url': ie_result
['webpage_url'],
1652 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1653 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1654 'extractor_key': ie_result
['extractor_key'],
1657 ie_result
['entries'] = [
1658 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1659 for r
in ie_result
['entries']
1663 raise Exception('Invalid result type: %s' % result_type
)
1665 def _ensure_dir_exists(self
, path
):
1666 return make_dir(path
, self
.report_error
)
1669 def _playlist_infodict(ie_result
, **kwargs
):
1672 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1673 'playlist_id': ie_result
.get('id'),
1674 'playlist_title': ie_result
.get('title'),
1675 'playlist_uploader': ie_result
.get('uploader'),
1676 'playlist_uploader_id': ie_result
.get('uploader_id'),
1677 'playlist_index': 0,
1681 def __process_playlist(self
, ie_result
, download
):
1682 """Process each entry in the playlist"""
1683 title
= ie_result
.get('title') or ie_result
.get('id') or '<Untitled>'
1684 self
.to_screen(f
'[download] Downloading playlist: {title}')
1686 all_entries
= PlaylistEntries(self
, ie_result
)
1687 entries
= orderedSet(all_entries
.get_requested_items(), lazy
=True)
1689 lazy
= self
.params
.get('lazy_playlist')
1691 resolved_entries
, n_entries
= [], 'N/A'
1692 ie_result
['requested_entries'], ie_result
['entries'] = None, None
1694 entries
= resolved_entries
= list(entries
)
1695 n_entries
= len(resolved_entries
)
1696 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
1697 if not ie_result
.get('playlist_count'):
1698 # Better to do this after potentially exhausting entries
1699 ie_result
['playlist_count'] = all_entries
.get_full_count()
1701 _infojson_written
= False
1702 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
1703 if write_playlist_files
and self
.params
.get('list_thumbnails'):
1704 self
.list_thumbnails(ie_result
)
1705 if write_playlist_files
and not self
.params
.get('simulate'):
1706 ie_copy
= self
._playlist
_infodict
(ie_result
, n_entries
=int_or_none(n_entries
))
1707 _infojson_written
= self
._write
_info
_json
(
1708 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
1709 if _infojson_written
is None:
1711 if self
._write
_description
('playlist', ie_result
,
1712 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1714 # TODO: This should be passed to ThumbnailsConvertor if necessary
1715 self
._write
_thumbnails
('playlist', ie_copy
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1718 if self
.params
.get('playlistreverse') or self
.params
.get('playlistrandom'):
1719 self
.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once
=True)
1720 elif self
.params
.get('playlistreverse'):
1722 elif self
.params
.get('playlistrandom'):
1723 random
.shuffle(entries
)
1725 self
.to_screen(f
'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1726 f
'{format_field(ie_result, "playlist_count", " of %s")}')
1729 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1730 for i
, (playlist_index
, entry
) in enumerate(entries
):
1732 resolved_entries
.append((playlist_index
, entry
))
1734 # TODO: Add auto-generated fields
1735 if not entry
or self
._match
_entry
(entry
, incomplete
=True) is not None:
1738 self
.to_screen('[download] Downloading video %s of %s' % (
1739 self
._format
_screen
(i
+ 1, self
.Styles
.ID
), self
._format
_screen
(n_entries
, self
.Styles
.EMPHASIS
)))
1741 entry
['__x_forwarded_for_ip'] = ie_result
.get('__x_forwarded_for_ip')
1742 if not lazy
and 'playlist-index' in self
.params
.get('compat_opts', []):
1743 playlist_index
= ie_result
['requested_entries'][i
]
1745 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, {
1746 'n_entries': int_or_none(n_entries
),
1747 '__last_playlist_index': max(ie_result
['requested_entries'] or (0, 0)),
1748 'playlist_count': ie_result
.get('playlist_count'),
1749 'playlist_index': playlist_index
,
1750 'playlist_autonumber': i
+ 1,
1752 'playlist_id': ie_result
.get('id'),
1753 'playlist_title': ie_result
.get('title'),
1754 'playlist_uploader': ie_result
.get('uploader'),
1755 'playlist_uploader_id': ie_result
.get('uploader_id'),
1756 'extractor': ie_result
['extractor'],
1757 'webpage_url': ie_result
['webpage_url'],
1758 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1759 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1760 'extractor_key': ie_result
['extractor_key'],
1762 if not entry_result
:
1764 if failures
>= max_failures
:
1766 f
'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1768 resolved_entries
[i
] = (playlist_index
, entry_result
)
1770 # Update with processed data
1771 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
1773 # Write the updated info to json
1774 if _infojson_written
is True and self
._write
_info
_json
(
1775 'updated playlist', ie_result
,
1776 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
1779 ie_result
= self
.run_all_pps('playlist', ie_result
)
1780 self
.to_screen(f
'[download] Finished downloading playlist: {title}')
1783 @_handle_extraction_exceptions
1784 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1785 return self
.process_ie_result(
1786 entry
, download
=download
, extra_info
=extra_info
)
1788 def _build_format_filter(self
, filter_spec
):
1789 " Returns a function to filter the formats according to the filter_spec "
1799 operator_rex
= re
.compile(r
'''(?x)\s*
1800 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1801 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1802 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1803 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1804 m
= operator_rex
.fullmatch(filter_spec
)
1807 comparison_value
= int(m
.group('value'))
1809 comparison_value
= parse_filesize(m
.group('value'))
1810 if comparison_value
is None:
1811 comparison_value
= parse_filesize(m
.group('value') + 'B')
1812 if comparison_value
is None:
1814 'Invalid value %r in format specification %r' % (
1815 m
.group('value'), filter_spec
))
1816 op
= OPERATORS
[m
.group('op')]
1821 '^=': lambda attr
, value
: attr
.startswith(value
),
1822 '$=': lambda attr
, value
: attr
.endswith(value
),
1823 '*=': lambda attr
, value
: value
in attr
,
1824 '~=': lambda attr
, value
: value
.search(attr
) is not None
1826 str_operator_rex
= re
.compile(r
'''(?x)\s*
1827 (?P<key>[a-zA-Z0-9._-]+)\s*
1828 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1830 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1831 (?(quote)(?P=quote))\s*
1832 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1833 m
= str_operator_rex
.fullmatch(filter_spec
)
1835 if m
.group('op') == '~=':
1836 comparison_value
= re
.compile(m
.group('value'))
1838 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
1839 str_op
= STR_OPERATORS
[m
.group('op')]
1840 if m
.group('negation'):
1841 op
= lambda attr
, value
: not str_op(attr
, value
)
1846 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
1849 actual_value
= f
.get(m
.group('key'))
1850 if actual_value
is None:
1851 return m
.group('none_inclusive')
1852 return op(actual_value
, comparison_value
)
1855 def _check_formats(self
, formats
):
1857 self
.to_screen('[info] Testing format %s' % f
['format_id'])
1858 path
= self
.get_output_path('temp')
1859 if not self
._ensure
_dir
_exists
(f
'{path}/'):
1861 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
1864 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
1865 except (DownloadError
, OSError, ValueError) + network_exceptions
:
1868 if os
.path
.exists(temp_file
.name
):
1870 os
.remove(temp_file
.name
)
1872 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
1876 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
1878 def _default_format_spec(self
, info_dict
, download
=True):
1881 merger
= FFmpegMergerPP(self
)
1882 return merger
.available
and merger
.can_merge()
1885 not self
.params
.get('simulate')
1889 or info_dict
.get('is_live') and not self
.params
.get('live_from_start')
1890 or self
.params
['outtmpl']['default'] == '-'))
1893 or self
.params
.get('allow_multiple_audio_streams', False)
1894 or 'format-spec' in self
.params
['compat_opts'])
1897 'best/bestvideo+bestaudio' if prefer_best
1898 else 'bestvideo*+bestaudio/best' if not compat
1899 else 'bestvideo+bestaudio/best')
1901 def build_format_selector(self
, format_spec
):
1902 def syntax_error(note
, start
):
1904 'Invalid format specification: '
1905 '{}\n\t{}\n\t{}^'.format(note
, format_spec
, ' ' * start
[1]))
1906 return SyntaxError(message
)
1908 PICKFIRST
= 'PICKFIRST'
1912 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1914 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1915 'video': self
.params
.get('allow_multiple_video_streams', False)}
1917 check_formats
= self
.params
.get('check_formats') == 'selected'
1919 def _parse_filter(tokens
):
1921 for type, string
, start
, _
, _
in tokens
:
1922 if type == tokenize
.OP
and string
== ']':
1923 return ''.join(filter_parts
)
1925 filter_parts
.append(string
)
1927 def _remove_unused_ops(tokens
):
1928 # Remove operators that we don't use and join them with the surrounding strings
1929 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1930 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1931 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1932 for type, string
, start
, end
, line
in tokens
:
1933 if type == tokenize
.OP
and string
== '[':
1935 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1937 yield type, string
, start
, end
, line
1938 # everything inside brackets will be handled by _parse_filter
1939 for type, string
, start
, end
, line
in tokens
:
1940 yield type, string
, start
, end
, line
1941 if type == tokenize
.OP
and string
== ']':
1943 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1945 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1947 yield type, string
, start
, end
, line
1948 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1950 last_string
= string
1954 last_string
+= string
1956 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1958 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1960 current_selector
= None
1961 for type, string
, start
, _
, _
in tokens
:
1962 # ENCODING is only defined in python 3.x
1963 if type == getattr(tokenize
, 'ENCODING', None):
1965 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1966 current_selector
= FormatSelector(SINGLE
, string
, [])
1967 elif type == tokenize
.OP
:
1969 if not inside_group
:
1970 # ')' will be handled by the parentheses group
1971 tokens
.restore_last_token()
1973 elif inside_merge
and string
in ['/', ',']:
1974 tokens
.restore_last_token()
1976 elif inside_choice
and string
== ',':
1977 tokens
.restore_last_token()
1980 if not current_selector
:
1981 raise syntax_error('"," must follow a format selector', start
)
1982 selectors
.append(current_selector
)
1983 current_selector
= None
1985 if not current_selector
:
1986 raise syntax_error('"/" must follow a format selector', start
)
1987 first_choice
= current_selector
1988 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1989 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1991 if not current_selector
:
1992 current_selector
= FormatSelector(SINGLE
, 'best', [])
1993 format_filter
= _parse_filter(tokens
)
1994 current_selector
.filters
.append(format_filter
)
1996 if current_selector
:
1997 raise syntax_error('Unexpected "("', start
)
1998 group
= _parse_format_selection(tokens
, inside_group
=True)
1999 current_selector
= FormatSelector(GROUP
, group
, [])
2001 if not current_selector
:
2002 raise syntax_error('Unexpected "+"', start
)
2003 selector_1
= current_selector
2004 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2006 raise syntax_error('Expected a selector', start
)
2007 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2009 raise syntax_error(f
'Operator not recognized: "{string}"', start
)
2010 elif type == tokenize
.ENDMARKER
:
2012 if current_selector
:
2013 selectors
.append(current_selector
)
2016 def _merge(formats_pair
):
2017 format_1
, format_2
= formats_pair
2020 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2021 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2023 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2024 get_no_more
= {'video': False, 'audio': False}
2025 for (i
, fmt_info
) in enumerate(formats_info
):
2026 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2029 for aud_vid
in ['audio', 'video']:
2030 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2031 if get_no_more
[aud_vid
]:
2034 get_no_more
[aud_vid
] = True
2036 if len(formats_info
) == 1:
2037 return formats_info
[0]
2039 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2040 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2042 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2043 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2045 output_ext
= self
.params
.get('merge_output_format')
2048 output_ext
= the_only_video
['ext']
2049 elif the_only_audio
and not video_fmts
:
2050 output_ext
= the_only_audio
['ext']
2054 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2057 'requested_formats': formats_info
,
2058 'format': '+'.join(filtered('format')),
2059 'format_id': '+'.join(filtered('format_id')),
2061 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2062 'language': '+'.join(orderedSet(filtered('language'))) or None,
2063 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2064 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2065 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2070 'width': the_only_video
.get('width'),
2071 'height': the_only_video
.get('height'),
2072 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2073 'fps': the_only_video
.get('fps'),
2074 'dynamic_range': the_only_video
.get('dynamic_range'),
2075 'vcodec': the_only_video
.get('vcodec'),
2076 'vbr': the_only_video
.get('vbr'),
2077 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2082 'acodec': the_only_audio
.get('acodec'),
2083 'abr': the_only_audio
.get('abr'),
2084 'asr': the_only_audio
.get('asr'),
2089 def _check_formats(formats
):
2090 if not check_formats
:
2093 yield from self
._check
_formats
(formats
)
2095 def _build_selector_function(selector
):
2096 if isinstance(selector
, list): # ,
2097 fs
= [_build_selector_function(s
) for s
in selector
]
2099 def selector_function(ctx
):
2102 return selector_function
2104 elif selector
.type == GROUP
: # ()
2105 selector_function
= _build_selector_function(selector
.selector
)
2107 elif selector
.type == PICKFIRST
: # /
2108 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2110 def selector_function(ctx
):
2112 picked_formats
= list(f(ctx
))
2114 return picked_formats
2117 elif selector
.type == MERGE
: # +
2118 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2120 def selector_function(ctx
):
2121 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2124 elif selector
.type == SINGLE
: # atom
2125 format_spec
= selector
.selector
or 'best'
2127 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2128 if format_spec
== 'all':
2129 def selector_function(ctx
):
2130 yield from _check_formats(ctx
['formats'][::-1])
2131 elif format_spec
== 'mergeall':
2132 def selector_function(ctx
):
2133 formats
= list(_check_formats(
2134 f
for f
in ctx
['formats'] if f
.get('vcodec') != 'none' or f
.get('acodec') != 'none'))
2137 merged_format
= formats
[-1]
2138 for f
in formats
[-2::-1]:
2139 merged_format
= _merge((merged_format
, f
))
2143 format_fallback
, seperate_fallback
, format_reverse
, format_idx
= False, None, True, 1
2145 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2147 if mobj
is not None:
2148 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2149 format_reverse
= mobj
.group('bw')[0] == 'b'
2150 format_type
= (mobj
.group('type') or [None])[0]
2151 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
2152 format_modified
= mobj
.group('mod') is not None
2154 format_fallback
= not format_type
and not format_modified
# for b, w
2156 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
2157 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2158 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
2159 if format_type
# bv, ba, wv, wa
2160 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2161 if not format_modified
# b, w
2162 else lambda f
: True) # b*, w*
2163 filter_f
= lambda f
: _filter_f(f
) and (
2164 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2166 if format_spec
in self
._format
_selection
_exts
['audio']:
2167 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2168 elif format_spec
in self
._format
_selection
_exts
['video']:
2169 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2170 seperate_fallback
= lambda f
: f
.get('ext') == format_spec
and f
.get('vcodec') != 'none'
2171 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2172 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2174 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2176 def selector_function(ctx
):
2177 formats
= list(ctx
['formats'])
2178 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2180 if format_fallback
and ctx
['incomplete_formats']:
2181 # for extractors with incomplete formats (audio only (soundcloud)
2182 # or video only (imgur)) best/worst will fallback to
2183 # best/worst {video,audio}-only format
2185 elif seperate_fallback
and not ctx
['has_merged_format']:
2186 # for compatibility with youtube-dl when there is no pre-merged format
2187 matches
= list(filter(seperate_fallback
, formats
))
2188 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2190 yield matches
[format_idx
- 1]
2191 except LazyList
.IndexError:
2194 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2196 def final_selector(ctx
):
2197 ctx_copy
= dict(ctx
)
2198 for _filter
in filters
:
2199 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2200 return selector_function(ctx_copy
)
2201 return final_selector
2203 stream
= io
.BytesIO(format_spec
.encode())
2205 tokens
= list(_remove_unused_ops(tokenize
.tokenize(stream
.readline
)))
2206 except tokenize
.TokenError
:
2207 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2209 class TokenIterator
:
2210 def __init__(self
, tokens
):
2211 self
.tokens
= tokens
2218 if self
.counter
>= len(self
.tokens
):
2219 raise StopIteration()
2220 value
= self
.tokens
[self
.counter
]
2226 def restore_last_token(self
):
2229 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2230 return _build_selector_function(parsed_selector
)
2232 def _calc_headers(self
, info_dict
):
2233 res
= merge_headers(self
.params
['http_headers'], info_dict
.get('http_headers') or {})
2235 cookies
= self
._calc
_cookies
(info_dict
['url'])
2237 res
['Cookie'] = cookies
2239 if 'X-Forwarded-For' not in res
:
2240 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2241 if x_forwarded_for_ip
:
2242 res
['X-Forwarded-For'] = x_forwarded_for_ip
2246 def _calc_cookies(self
, url
):
2247 pr
= sanitized_Request(url
)
2248 self
.cookiejar
.add_cookie_header(pr
)
2249 return pr
.get_header('Cookie')
2251 def _sort_thumbnails(self
, thumbnails
):
2252 thumbnails
.sort(key
=lambda t
: (
2253 t
.get('preference') if t
.get('preference') is not None else -1,
2254 t
.get('width') if t
.get('width') is not None else -1,
2255 t
.get('height') if t
.get('height') is not None else -1,
2256 t
.get('id') if t
.get('id') is not None else '',
2259 def _sanitize_thumbnails(self
, info_dict
):
2260 thumbnails
= info_dict
.get('thumbnails')
2261 if thumbnails
is None:
2262 thumbnail
= info_dict
.get('thumbnail')
2264 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2268 def check_thumbnails(thumbnails
):
2269 for t
in thumbnails
:
2270 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2272 self
.urlopen(HEADRequest(t
['url']))
2273 except network_exceptions
as err
:
2274 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2278 self
._sort
_thumbnails
(thumbnails
)
2279 for i
, t
in enumerate(thumbnails
):
2280 if t
.get('id') is None:
2282 if t
.get('width') and t
.get('height'):
2283 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2284 t
['url'] = sanitize_url(t
['url'])
2286 if self
.params
.get('check_formats') is True:
2287 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2289 info_dict
['thumbnails'] = thumbnails
2291 def _fill_common_fields(self
, info_dict
, is_video
=True):
2292 # TODO: move sanitization here
2294 # playlists are allowed to lack "title"
2295 title
= info_dict
.get('title', NO_DEFAULT
)
2296 if title
is NO_DEFAULT
:
2297 raise ExtractorError('Missing "title" field in extractor result',
2298 video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2299 info_dict
['fulltitle'] = title
2302 self
.write_debug('Extractor gave empty title. Creating a generic title')
2304 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2305 info_dict
['title'] = f
'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2307 if info_dict
.get('duration') is not None:
2308 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2310 for ts_key
, date_key
in (
2311 ('timestamp', 'upload_date'),
2312 ('release_timestamp', 'release_date'),
2313 ('modified_timestamp', 'modified_date'),
2315 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2316 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2317 # see http://bugs.python.org/issue1646728)
2318 with contextlib
.suppress(ValueError, OverflowError, OSError):
2319 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2320 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2322 live_keys
= ('is_live', 'was_live')
2323 live_status
= info_dict
.get('live_status')
2324 if live_status
is None:
2325 for key
in live_keys
:
2326 if info_dict
.get(key
) is False:
2328 if info_dict
.get(key
):
2331 if all(info_dict
.get(key
) is False for key
in live_keys
):
2332 live_status
= 'not_live'
2334 info_dict
['live_status'] = live_status
2335 for key
in live_keys
:
2336 if info_dict
.get(key
) is None:
2337 info_dict
[key
] = (live_status
== key
)
2339 # Auto generate title fields corresponding to the *_number fields when missing
2340 # in order to always have clean titles. This is very common for TV series.
2341 for field
in ('chapter', 'season', 'episode'):
2342 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2343 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2345 def _raise_pending_errors(self
, info
):
2346 err
= info
.pop('__pending_error', None)
2348 self
.report_error(err
, tb
=False)
2350 def process_video_result(self
, info_dict
, download
=True):
2351 assert info_dict
.get('_type', 'video') == 'video'
2352 self
._num
_videos
+= 1
2354 if 'id' not in info_dict
:
2355 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2356 elif not info_dict
.get('id'):
2357 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2359 def report_force_conversion(field
, field_not
, conversion
):
2360 self
.report_warning(
2361 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2362 % (field
, field_not
, conversion
))
2364 def sanitize_string_field(info
, string_field
):
2365 field
= info
.get(string_field
)
2366 if field
is None or isinstance(field
, str):
2368 report_force_conversion(string_field
, 'a string', 'string')
2369 info
[string_field
] = str(field
)
2371 def sanitize_numeric_fields(info
):
2372 for numeric_field
in self
._NUMERIC
_FIELDS
:
2373 field
= info
.get(numeric_field
)
2374 if field
is None or isinstance(field
, (int, float)):
2376 report_force_conversion(numeric_field
, 'numeric', 'int')
2377 info
[numeric_field
] = int_or_none(field
)
2379 sanitize_string_field(info_dict
, 'id')
2380 sanitize_numeric_fields(info_dict
)
2381 if info_dict
.get('section_end') and info_dict
.get('section_start') is not None:
2382 info_dict
['duration'] = round(info_dict
['section_end'] - info_dict
['section_start'], 3)
2383 if (info_dict
.get('duration') or 0) <= 0 and info_dict
.pop('duration', None):
2384 self
.report_warning('"duration" field is negative, there is an error in extractor')
2386 chapters
= info_dict
.get('chapters') or []
2387 if chapters
and chapters
[0].get('start_time'):
2388 chapters
.insert(0, {'start_time': 0}
)
2390 dummy_chapter
= {'end_time': 0, 'start_time': info_dict.get('duration')}
2391 for idx
, (prev
, current
, next_
) in enumerate(zip(
2392 (dummy_chapter
, *chapters
), chapters
, (*chapters
[1:], dummy_chapter
)), 1):
2393 if current
.get('start_time') is None:
2394 current
['start_time'] = prev
.get('end_time')
2395 if not current
.get('end_time'):
2396 current
['end_time'] = next_
.get('start_time')
2397 if not current
.get('title'):
2398 current
['title'] = f
'<Untitled Chapter {idx}>'
2400 if 'playlist' not in info_dict
:
2401 # It isn't part of a playlist
2402 info_dict
['playlist'] = None
2403 info_dict
['playlist_index'] = None
2405 self
._sanitize
_thumbnails
(info_dict
)
2407 thumbnail
= info_dict
.get('thumbnail')
2408 thumbnails
= info_dict
.get('thumbnails')
2410 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2412 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2414 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2415 info_dict
['display_id'] = info_dict
['id']
2417 self
._fill
_common
_fields
(info_dict
)
2419 for cc_kind
in ('subtitles', 'automatic_captions'):
2420 cc
= info_dict
.get(cc_kind
)
2422 for _
, subtitle
in cc
.items():
2423 for subtitle_format
in subtitle
:
2424 if subtitle_format
.get('url'):
2425 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2426 if subtitle_format
.get('ext') is None:
2427 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2429 automatic_captions
= info_dict
.get('automatic_captions')
2430 subtitles
= info_dict
.get('subtitles')
2432 info_dict
['requested_subtitles'] = self
.process_subtitles(
2433 info_dict
['id'], subtitles
, automatic_captions
)
2435 if info_dict
.get('formats') is None:
2436 # There's only one format available
2437 formats
= [info_dict
]
2439 formats
= info_dict
['formats']
2441 # or None ensures --clean-infojson removes it
2442 info_dict
['_has_drm'] = any(f
.get('has_drm') for f
in formats
) or None
2443 if not self
.params
.get('allow_unplayable_formats'):
2444 formats
= [f
for f
in formats
if not f
.get('has_drm')]
2445 if info_dict
['_has_drm'] and all(
2446 f
.get('acodec') == f
.get('vcodec') == 'none' for f
in formats
):
2447 self
.report_warning(
2448 'This video is DRM protected and only images are available for download. '
2449 'Use --list-formats to see them')
2451 get_from_start
= not info_dict
.get('is_live') or bool(self
.params
.get('live_from_start'))
2452 if not get_from_start
:
2453 info_dict
['title'] += ' ' + datetime
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2454 if info_dict
.get('is_live') and formats
:
2455 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2456 if get_from_start
and not formats
:
2457 self
.raise_no_formats(info_dict
, msg
=(
2458 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2459 'If you want to download from the current time, use --no-live-from-start'))
2462 self
.raise_no_formats(info_dict
)
2464 def is_wellformed(f
):
2467 self
.report_warning(
2468 '"url" field is missing or empty - skipping format, '
2469 'there is an error in extractor')
2471 if isinstance(url
, bytes):
2472 sanitize_string_field(f
, 'url')
2475 # Filter out malformed formats for better extraction robustness
2476 formats
= list(filter(is_wellformed
, formats
))
2480 # We check that all the formats have the format and format_id fields
2481 for i
, format
in enumerate(formats
):
2482 sanitize_string_field(format
, 'format_id')
2483 sanitize_numeric_fields(format
)
2484 format
['url'] = sanitize_url(format
['url'])
2485 if not format
.get('format_id'):
2486 format
['format_id'] = str(i
)
2488 # Sanitize format_id from characters used in format selector expression
2489 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2490 format_id
= format
['format_id']
2491 if format_id
not in formats_dict
:
2492 formats_dict
[format_id
] = []
2493 formats_dict
[format_id
].append(format
)
2495 # Make sure all formats have unique format_id
2496 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2497 for format_id
, ambiguous_formats
in formats_dict
.items():
2498 ambigious_id
= len(ambiguous_formats
) > 1
2499 for i
, format
in enumerate(ambiguous_formats
):
2501 format
['format_id'] = '%s-%d' % (format_id
, i
)
2502 if format
.get('ext') is None:
2503 format
['ext'] = determine_ext(format
['url']).lower()
2504 # Ensure there is no conflict between id and ext in format selection
2505 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2506 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2507 format
['format_id'] = 'f%s' % format
['format_id']
2509 for i
, format
in enumerate(formats
):
2510 if format
.get('format') is None:
2511 format
['format'] = '{id} - {res}{note}'.format(
2512 id=format
['format_id'],
2513 res
=self
.format_resolution(format
),
2514 note
=format_field(format
, 'format_note', ' (%s)'),
2516 if format
.get('protocol') is None:
2517 format
['protocol'] = determine_protocol(format
)
2518 if format
.get('resolution') is None:
2519 format
['resolution'] = self
.format_resolution(format
, default
=None)
2520 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2521 format
['dynamic_range'] = 'SDR'
2522 if (info_dict
.get('duration') and format
.get('tbr')
2523 and not format
.get('filesize') and not format
.get('filesize_approx')):
2524 format
['filesize_approx'] = int(info_dict
['duration'] * format
['tbr'] * (1024 / 8))
2526 # Add HTTP headers, so that external programs can use them from the
2528 full_format_info
= info_dict
.copy()
2529 full_format_info
.update(format
)
2530 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
2531 # Remove private housekeeping stuff
2532 if '__x_forwarded_for_ip' in info_dict
:
2533 del info_dict
['__x_forwarded_for_ip']
2535 if self
.params
.get('check_formats') is True:
2536 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2538 if not formats
or formats
[0] is not info_dict
:
2539 # only set the 'formats' fields if the original info_dict list them
2540 # otherwise we end up with a circular reference, the first (and unique)
2541 # element in the 'formats' field in info_dict is info_dict itself,
2542 # which can't be exported to json
2543 info_dict
['formats'] = formats
2545 info_dict
, _
= self
.pre_process(info_dict
)
2547 if self
._match
_entry
(info_dict
, incomplete
=self
._format
_fields
) is not None:
2550 self
.post_extract(info_dict
)
2551 info_dict
, _
= self
.pre_process(info_dict
, 'after_filter')
2553 # The pre-processors may have modified the formats
2554 formats
= info_dict
.get('formats', [info_dict
])
2556 list_only
= self
.params
.get('simulate') is None and (
2557 self
.params
.get('list_thumbnails') or self
.params
.get('listformats') or self
.params
.get('listsubtitles'))
2558 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2559 if self
.params
.get('list_thumbnails'):
2560 self
.list_thumbnails(info_dict
)
2561 if self
.params
.get('listsubtitles'):
2562 if 'automatic_captions' in info_dict
:
2563 self
.list_subtitles(
2564 info_dict
['id'], automatic_captions
, 'automatic captions')
2565 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2566 if self
.params
.get('listformats') or interactive_format_selection
:
2567 self
.list_formats(info_dict
)
2569 # Without this printing, -F --print-json will not work
2570 self
.__forced
_printings
(info_dict
, self
.prepare_filename(info_dict
), incomplete
=True)
2573 format_selector
= self
.format_selector
2574 if format_selector
is None:
2575 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2576 self
.write_debug('Default format spec: %s' % req_format
)
2577 format_selector
= self
.build_format_selector(req_format
)
2580 if interactive_format_selection
:
2582 self
._format
_screen
('\nEnter format selector: ', self
.Styles
.EMPHASIS
))
2584 format_selector
= self
.build_format_selector(req_format
)
2585 except SyntaxError as err
:
2586 self
.report_error(err
, tb
=False, is_error
=False)
2589 formats_to_download
= list(format_selector({
2591 'has_merged_format': any('none' not in (f
.get('acodec'), f
.get('vcodec')) for f
in formats
),
2592 'incomplete_formats': (
2593 # All formats are video-only or
2594 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
2595 # all formats are audio-only
2596 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
)),
2598 if interactive_format_selection
and not formats_to_download
:
2599 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2603 if not formats_to_download
:
2604 if not self
.params
.get('ignore_no_formats_error'):
2605 raise ExtractorError(
2606 'Requested format is not available. Use --list-formats for a list of available formats',
2607 expected
=True, video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2608 self
.report_warning('Requested format is not available')
2609 # Process what we can, even without any available formats.
2610 formats_to_download
= [{}]
2612 requested_ranges
= self
.params
.get('download_ranges')
2613 if requested_ranges
:
2614 requested_ranges
= tuple(requested_ranges(info_dict
, self
))
2616 best_format
, downloaded_formats
= formats_to_download
[-1], []
2619 def to_screen(*msg
):
2620 self
.to_screen(f
'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2622 to_screen(f
'Downloading {len(formats_to_download)} format(s):',
2623 (f
['format_id'] for f
in formats_to_download
))
2624 if requested_ranges
:
2625 to_screen(f
'Downloading {len(requested_ranges)} time ranges:',
2626 (f
'{int(c["start_time"])}-{int(c["end_time"])}' for c
in requested_ranges
))
2627 max_downloads_reached
= False
2629 for fmt
, chapter
in itertools
.product(formats_to_download
, requested_ranges
or [{}]):
2630 new_info
= self
._copy
_infodict
(info_dict
)
2631 new_info
.update(fmt
)
2632 offset
, duration
= info_dict
.get('section_start') or 0, info_dict
.get('duration') or float('inf')
2633 if chapter
or offset
:
2635 'section_start': offset
+ chapter
.get('start_time', 0),
2636 'section_end': offset
+ min(chapter
.get('end_time', duration
), duration
),
2637 'section_title': chapter
.get('title'),
2638 'section_number': chapter
.get('index'),
2640 downloaded_formats
.append(new_info
)
2642 self
.process_info(new_info
)
2643 except MaxDownloadsReached
:
2644 max_downloads_reached
= True
2645 self
._raise
_pending
_errors
(new_info
)
2646 # Remove copied info
2647 for key
, val
in tuple(new_info
.items()):
2648 if info_dict
.get(key
) == val
:
2650 if max_downloads_reached
:
2653 write_archive
= {f.get('__write_download_archive', False) for f in downloaded_formats}
2654 assert write_archive
.issubset({True, False, 'ignore'}
)
2655 if True in write_archive
and False not in write_archive
:
2656 self
.record_download_archive(info_dict
)
2658 info_dict
['requested_downloads'] = downloaded_formats
2659 info_dict
= self
.run_all_pps('after_video', info_dict
)
2660 if max_downloads_reached
:
2661 raise MaxDownloadsReached()
2663 # We update the info dict with the selected best quality format (backwards compatibility)
2664 info_dict
.update(best_format
)
2667 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2668 """Select the requested subtitles and their format"""
2669 available_subs
, normal_sub_langs
= {}, []
2670 if normal_subtitles
and self
.params
.get('writesubtitles'):
2671 available_subs
.update(normal_subtitles
)
2672 normal_sub_langs
= tuple(normal_subtitles
.keys())
2673 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2674 for lang
, cap_info
in automatic_captions
.items():
2675 if lang
not in available_subs
:
2676 available_subs
[lang
] = cap_info
2678 if (not self
.params
.get('writesubtitles') and not
2679 self
.params
.get('writeautomaticsub') or not
2683 all_sub_langs
= tuple(available_subs
.keys())
2684 if self
.params
.get('allsubtitles', False):
2685 requested_langs
= all_sub_langs
2686 elif self
.params
.get('subtitleslangs', False):
2687 # A list is used so that the order of languages will be the same as
2688 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2689 requested_langs
= []
2690 for lang_re
in self
.params
.get('subtitleslangs'):
2691 discard
= lang_re
[0] == '-'
2693 lang_re
= lang_re
[1:]
2694 if lang_re
== 'all':
2696 requested_langs
= []
2698 requested_langs
.extend(all_sub_langs
)
2700 current_langs
= filter(re
.compile(lang_re
+ '$').match
, all_sub_langs
)
2702 for lang
in current_langs
:
2703 while lang
in requested_langs
:
2704 requested_langs
.remove(lang
)
2706 requested_langs
.extend(current_langs
)
2707 requested_langs
= orderedSet(requested_langs
)
2708 elif normal_sub_langs
:
2709 requested_langs
= ['en'] if 'en' in normal_sub_langs
else normal_sub_langs
[:1]
2711 requested_langs
= ['en'] if 'en' in all_sub_langs
else all_sub_langs
[:1]
2713 self
.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs
))
2715 formats_query
= self
.params
.get('subtitlesformat', 'best')
2716 formats_preference
= formats_query
.split('/') if formats_query
else []
2718 for lang
in requested_langs
:
2719 formats
= available_subs
.get(lang
)
2721 self
.report_warning(f
'{lang} subtitles not available for {video_id}')
2723 for ext
in formats_preference
:
2727 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2733 self
.report_warning(
2734 'No subtitle format found matching "%s" for language %s, '
2735 'using %s' % (formats_query
, lang
, f
['ext']))
2739 def _forceprint(self
, key
, info_dict
):
2740 if info_dict
is None:
2742 info_copy
= info_dict
.copy()
2743 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
2744 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
2745 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
2746 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
2748 def format_tmpl(tmpl
):
2749 mobj
= re
.match(r
'\w+(=?)$', tmpl
)
2750 if mobj
and mobj
.group(1):
2751 return f
'{tmpl[:-1]} = %({tmpl[:-1]})r'
2753 return f
'%({tmpl})s'
2756 for tmpl
in self
.params
['forceprint'].get(key
, []):
2757 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
2759 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
2760 filename
= self
.prepare_filename(info_dict
, outtmpl
=file_tmpl
)
2761 tmpl
= format_tmpl(tmpl
)
2762 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
2763 if self
._ensure
_dir
_exists
(filename
):
2764 with open(filename
, 'a', encoding
='utf-8') as f
:
2765 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + '\n')
2767 def __forced_printings(self
, info_dict
, filename
, incomplete
):
2768 def print_mandatory(field
, actual_field
=None):
2769 if actual_field
is None:
2770 actual_field
= field
2771 if (self
.params
.get('force%s' % field
, False)
2772 and (not incomplete
or info_dict
.get(actual_field
) is not None)):
2773 self
.to_stdout(info_dict
[actual_field
])
2775 def print_optional(field
):
2776 if (self
.params
.get('force%s' % field
, False)
2777 and info_dict
.get(field
) is not None):
2778 self
.to_stdout(info_dict
[field
])
2780 info_dict
= info_dict
.copy()
2781 if filename
is not None:
2782 info_dict
['filename'] = filename
2783 if info_dict
.get('requested_formats') is not None:
2784 # For RTMP URLs, also include the playpath
2785 info_dict
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2786 elif info_dict
.get('url'):
2787 info_dict
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
2789 if (self
.params
.get('forcejson')
2790 or self
.params
['forceprint'].get('video')
2791 or self
.params
['print_to_file'].get('video')):
2792 self
.post_extract(info_dict
)
2793 self
._forceprint
('video', info_dict
)
2795 print_mandatory('title')
2796 print_mandatory('id')
2797 print_mandatory('url', 'urls')
2798 print_optional('thumbnail')
2799 print_optional('description')
2800 print_optional('filename')
2801 if self
.params
.get('forceduration') and info_dict
.get('duration') is not None:
2802 self
.to_stdout(formatSeconds(info_dict
['duration']))
2803 print_mandatory('format')
2805 if self
.params
.get('forcejson'):
2806 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
2808 def dl(self
, name
, info
, subtitle
=False, test
=False):
2809 if not info
.get('url'):
2810 self
.raise_no_formats(info
, True)
2813 verbose
= self
.params
.get('verbose')
2816 'quiet': self
.params
.get('quiet') or not verbose
,
2818 'noprogress': not verbose
,
2820 'skip_unavailable_fragments': False,
2821 'keep_fragments': False,
2823 '_no_ytdl_file': True,
2826 params
= self
.params
2827 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
2829 for ph
in self
._progress
_hooks
:
2830 fd
.add_progress_hook(ph
)
2832 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
2833 for f
in info
.get('requested_formats', []) or [info
])
2834 self
.write_debug(f
'Invoking {fd.FD_NAME} downloader on "{urls}"')
2836 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2837 # But it may contain objects that are not deep-copyable
2838 new_info
= self
._copy
_infodict
(info
)
2839 if new_info
.get('http_headers') is None:
2840 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
2841 return fd
.download(name
, new_info
, subtitle
)
2843 def existing_file(self
, filepaths
, *, default_overwrite
=True):
2844 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
2845 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
2846 return existing_files
[0]
2848 for file in existing_files
:
2849 self
.report_file_delete(file)
2853 def process_info(self
, info_dict
):
2854 """Process a single resolved IE result. (Modifies it in-place)"""
2856 assert info_dict
.get('_type', 'video') == 'video'
2857 original_infodict
= info_dict
2859 if 'format' not in info_dict
and 'ext' in info_dict
:
2860 info_dict
['format'] = info_dict
['ext']
2862 # This is mostly just for backward compatibility of process_info
2863 # As a side-effect, this allows for format-specific filters
2864 if self
._match
_entry
(info_dict
) is not None:
2865 info_dict
['__write_download_archive'] = 'ignore'
2868 # Does nothing under normal operation - for backward compatibility of process_info
2869 self
.post_extract(info_dict
)
2870 self
._num
_downloads
+= 1
2872 # info_dict['_filename'] needs to be set for backward compatibility
2873 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2874 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2878 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
2880 def check_max_downloads():
2881 if self
._num
_downloads
>= float(self
.params
.get('max_downloads') or 'inf'):
2882 raise MaxDownloadsReached()
2884 if self
.params
.get('simulate'):
2885 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
2886 check_max_downloads()
2889 if full_filename
is None:
2891 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
2893 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
2896 if self
._write
_description
('video', info_dict
,
2897 self
.prepare_filename(info_dict
, 'description')) is None:
2900 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
2901 if sub_files
is None:
2903 files_to_move
.update(dict(sub_files
))
2905 thumb_files
= self
._write
_thumbnails
(
2906 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
2907 if thumb_files
is None:
2909 files_to_move
.update(dict(thumb_files
))
2911 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2912 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
2913 if _infojson_written
:
2914 info_dict
['infojson_filename'] = infofn
2915 # For backward compatibility, even though it was a private field
2916 info_dict
['__infojson_filename'] = infofn
2917 elif _infojson_written
is None:
2920 # Note: Annotations are deprecated
2922 if self
.params
.get('writeannotations', False):
2923 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2925 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
2927 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2928 self
.to_screen('[info] Video annotations are already present')
2929 elif not info_dict
.get('annotations'):
2930 self
.report_warning('There are no annotations to write.')
2933 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2934 with open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2935 annofile
.write(info_dict
['annotations'])
2936 except (KeyError, TypeError):
2937 self
.report_warning('There are no annotations to write.')
2939 self
.report_error('Cannot write annotations file: ' + annofn
)
2942 # Write internet shortcut files
2943 def _write_link_file(link_type
):
2944 url
= try_get(info_dict
['webpage_url'], iri_to_uri
)
2946 self
.report_warning(
2947 f
'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2949 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
2950 if not self
._ensure
_dir
_exists
(encodeFilename(linkfn
)):
2952 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2953 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
2956 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2957 with open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8',
2958 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
2959 template_vars
= {'url': url}
2960 if link_type
== 'desktop':
2961 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
2962 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
2964 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
2969 'url': self
.params
.get('writeurllink'),
2970 'webloc': self
.params
.get('writewebloclink'),
2971 'desktop': self
.params
.get('writedesktoplink'),
2973 if self
.params
.get('writelink'):
2974 link_type
= ('webloc' if sys
.platform
== 'darwin'
2975 else 'desktop' if sys
.platform
.startswith('linux')
2977 write_links
[link_type
] = True
2979 if any(should_write
and not _write_link_file(link_type
)
2980 for link_type
, should_write
in write_links
.items()):
2983 def replace_info_dict(new_info
):
2985 if new_info
== info_dict
:
2988 info_dict
.update(new_info
)
2990 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
2991 replace_info_dict(new_info
)
2993 if self
.params
.get('skip_download'):
2994 info_dict
['filepath'] = temp_filename
2995 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2996 info_dict
['__files_to_move'] = files_to_move
2997 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
2998 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3001 info_dict
.setdefault('__postprocessors', [])
3004 def existing_video_file(*filepaths
):
3005 ext
= info_dict
.get('ext')
3006 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
3007 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
3008 default_overwrite
=False)
3010 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
3013 fd
, success
= None, True
3014 if info_dict
.get('protocol') or info_dict
.get('url'):
3015 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3016 if fd
is not FFmpegFD
and (
3017 info_dict
.get('section_start') or info_dict
.get('section_end')):
3018 msg
= ('This format cannot be partially downloaded' if FFmpegFD
.available()
3019 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3020 self
.report_error(f
'{msg}. Aborting')
3023 if info_dict
.get('requested_formats') is not None:
3025 def compatible_formats(formats
):
3026 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3027 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
3028 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
3029 if len(video_formats
) > 2 or len(audio_formats
) > 2:
3033 exts
= {format.get('ext') for format in formats}
3035 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'}
,
3038 for ext_sets
in COMPATIBLE_EXTS
:
3039 if ext_sets
.issuperset(exts
):
3041 # TODO: Check acodec/vcodec
3044 requested_formats
= info_dict
['requested_formats']
3045 old_ext
= info_dict
['ext']
3046 if self
.params
.get('merge_output_format') is None:
3047 if not compatible_formats(requested_formats
):
3048 info_dict
['ext'] = 'mkv'
3049 self
.report_warning(
3050 'Requested formats are incompatible for merge and will be merged into mkv')
3051 if (info_dict
['ext'] == 'webm'
3052 and info_dict
.get('thumbnails')
3053 # check with type instead of pp_key, __name__, or isinstance
3054 # since we dont want any custom PPs to trigger this
3055 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])): # noqa: E721
3056 info_dict
['ext'] = 'mkv'
3057 self
.report_warning(
3058 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3059 new_ext
= info_dict
['ext']
3061 def correct_ext(filename
, ext
=new_ext
):
3064 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3066 os
.path
.splitext(filename
)[0]
3067 if filename_real_ext
in (old_ext
, new_ext
)
3069 return f
'{filename_wo_ext}.{ext}'
3071 # Ensure filename always has a correct extension for successful merge
3072 full_filename
= correct_ext(full_filename
)
3073 temp_filename
= correct_ext(temp_filename
)
3074 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3075 info_dict
['__real_download'] = False
3077 merger
= FFmpegMergerPP(self
)
3079 if dl_filename
is not None:
3080 self
.report_file_already_downloaded(dl_filename
)
3082 for f
in requested_formats
if fd
!= FFmpegFD
else []:
3083 f
['filepath'] = fname
= prepend_extension(
3084 correct_ext(temp_filename
, info_dict
['ext']),
3085 'f%s' % f
['format_id'], info_dict
['ext'])
3086 downloaded
.append(fname
)
3087 info_dict
['url'] = '\n'.join(f
['url'] for f
in requested_formats
)
3088 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3089 info_dict
['__real_download'] = real_download
3091 if self
.params
.get('allow_unplayable_formats'):
3092 self
.report_warning(
3093 'You have requested merging of multiple formats '
3094 'while also allowing unplayable formats to be downloaded. '
3095 'The formats won\'t be merged to prevent data corruption.')
3096 elif not merger
.available
:
3097 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3098 if not self
.params
.get('ignoreerrors'):
3099 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3101 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3103 if temp_filename
== '-':
3104 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3105 else 'but the formats are incompatible for simultaneous download' if merger
.available
3106 else 'but ffmpeg is not installed')
3107 self
.report_warning(
3108 f
'You have requested downloading multiple formats to stdout {reason}. '
3109 'The formats will be streamed one after the other')
3110 fname
= temp_filename
3111 for f
in requested_formats
:
3112 new_info
= dict(info_dict
)
3113 del new_info
['requested_formats']
3115 if temp_filename
!= '-':
3116 fname
= prepend_extension(
3117 correct_ext(temp_filename
, new_info
['ext']),
3118 'f%s' % f
['format_id'], new_info
['ext'])
3119 if not self
._ensure
_dir
_exists
(fname
):
3121 f
['filepath'] = fname
3122 downloaded
.append(fname
)
3123 partial_success
, real_download
= self
.dl(fname
, new_info
)
3124 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3125 success
= success
and partial_success
3127 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3128 info_dict
['__postprocessors'].append(merger
)
3129 info_dict
['__files_to_merge'] = downloaded
3130 # Even if there were no downloads, it is being merged only now
3131 info_dict
['__real_download'] = True
3133 for file in downloaded
:
3134 files_to_move
[file] = None
3136 # Just a single file
3137 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3138 if dl_filename
is None or dl_filename
== temp_filename
:
3139 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3140 # So we should try to resume the download
3141 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3142 info_dict
['__real_download'] = real_download
3144 self
.report_file_already_downloaded(dl_filename
)
3146 dl_filename
= dl_filename
or temp_filename
3147 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3149 except network_exceptions
as err
:
3150 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
3152 except OSError as err
:
3153 raise UnavailableVideoError(err
)
3154 except (ContentTooShortError
, ) as err
:
3155 self
.report_error(f
'content too short (expected {err.expected} bytes and served {err.downloaded})')
3158 self
._raise
_pending
_errors
(info_dict
)
3159 if success
and full_filename
!= '-':
3163 fixup_policy
= self
.params
.get('fixup')
3164 vid
= info_dict
['id']
3166 if fixup_policy
in ('ignore', 'never'):
3168 elif fixup_policy
== 'warn':
3170 elif fixup_policy
!= 'force':
3171 assert fixup_policy
in ('detect_or_warn', None)
3172 if not info_dict
.get('__real_download'):
3175 def ffmpeg_fixup(cndn
, msg
, cls
):
3176 if not (do_fixup
and cndn
):
3178 elif do_fixup
== 'warn':
3179 self
.report_warning(f
'{vid}: {msg}')
3183 info_dict
['__postprocessors'].append(pp
)
3185 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3187 stretched_ratio
= info_dict
.get('stretched_ratio')
3188 ffmpeg_fixup(stretched_ratio
not in (1, None),
3189 f
'Non-uniform pixel ratio {stretched_ratio}',
3190 FFmpegFixupStretchedPP
)
3192 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3193 downloader
= downloader
.FD_NAME
if downloader
else None
3195 ext
= info_dict
.get('ext')
3196 postprocessed_by_ffmpeg
= info_dict
.get('requested_formats') or any((
3197 isinstance(pp
, FFmpegVideoConvertorPP
)
3198 and resolve_recode_mapping(ext
, pp
.mapping
)[0] not in (ext
, None)
3199 ) for pp
in self
._pps
['post_process'])
3201 if not postprocessed_by_ffmpeg
:
3202 ffmpeg_fixup(ext
== 'm4a' and info_dict
.get('container') == 'm4a_dash',
3203 'writing DASH m4a. Only some players support this container',
3205 ffmpeg_fixup(downloader
== 'hlsnative' and not self
.params
.get('hls_use_mpegts')
3206 or info_dict
.get('is_live') and self
.params
.get('hls_use_mpegts') is None,
3207 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3209 ffmpeg_fixup(info_dict
.get('is_live') and downloader
== 'DashSegmentsFD',
3210 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3212 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3213 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP
)
3217 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3218 except PostProcessingError
as err
:
3219 self
.report_error('Postprocessing: %s' % str(err
))
3222 for ph
in self
._post
_hooks
:
3223 ph(info_dict
['filepath'])
3224 except Exception as err
:
3225 self
.report_error('post hooks: %s' % str(err
))
3227 info_dict
['__write_download_archive'] = True
3229 assert info_dict
is original_infodict
# Make sure the info_dict was modified in-place
3230 if self
.params
.get('force_write_download_archive'):
3231 info_dict
['__write_download_archive'] = True
3232 check_max_downloads()
3234 def __download_wrapper(self
, func
):
3235 @functools.wraps(func
)
3236 def wrapper(*args
, **kwargs
):
3238 res
= func(*args
, **kwargs
)
3239 except UnavailableVideoError
as e
:
3240 self
.report_error(e
)
3241 except DownloadCancelled
as e
:
3242 self
.to_screen(f
'[info] {e}')
3243 if not self
.params
.get('break_per_url'):
3246 if self
.params
.get('dump_single_json', False):
3247 self
.post_extract(res
)
3248 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3251 def download(self
, url_list
):
3252 """Download a given list of URLs."""
3253 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3254 outtmpl
= self
.params
['outtmpl']['default']
3255 if (len(url_list
) > 1
3257 and '%' not in outtmpl
3258 and self
.params
.get('max_downloads') != 1):
3259 raise SameFileError(outtmpl
)
3261 for url
in url_list
:
3262 self
.__download
_wrapper
(self
.extract_info
)(
3263 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3265 return self
._download
_retcode
3267 def download_with_info_file(self
, info_filename
):
3268 with contextlib
.closing(fileinput
.FileInput(
3269 [info_filename
], mode
='r',
3270 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3271 # FileInput doesn't have a read method, we can't call json.load
3272 info
= self
.sanitize_info(json
.loads('\n'.join(f
)), self
.params
.get('clean_infojson', True))
3274 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3275 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3276 if not isinstance(e
, EntryNotInPlaylist
):
3277 self
.to_stderr('\r')
3278 webpage_url
= info
.get('webpage_url')
3279 if webpage_url
is not None:
3280 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3281 return self
.download([webpage_url
])
3284 return self
._download
_retcode
3287 def sanitize_info(info_dict
, remove_private_keys
=False):
3288 ''' Sanitize the infodict for converting to json '''
3289 if info_dict
is None:
3291 info_dict
.setdefault('epoch', int(time
.time()))
3292 info_dict
.setdefault('_type', 'video')
3294 if remove_private_keys
:
3295 reject
= lambda k
, v
: v
is None or k
.startswith('__') or k
in {
3296 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3297 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3300 reject
= lambda k
, v
: False
3303 if isinstance(obj
, dict):
3304 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3305 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3306 return list(map(filter_fn
, obj
))
3307 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3312 return filter_fn(info_dict
)
3315 def filter_requested_info(info_dict
, actually_filter
=True):
3316 ''' Alias of sanitize_info for backward compatibility '''
3317 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3319 def _delete_downloaded_files(self
, *files_to_delete
, info
={}, msg
=None):
3320 for filename
in set(filter(None, files_to_delete
)):
3322 self
.to_screen(msg
% filename
)
3326 self
.report_warning(f
'Unable to delete file {filename}')
3327 if filename
in info
.get('__files_to_move', []): # NB: Delete even if None
3328 del info
['__files_to_move'][filename
]
3331 def post_extract(info_dict
):
3332 def actual_post_extract(info_dict
):
3333 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3334 for video_dict
in info_dict
.get('entries', {}):
3335 actual_post_extract(video_dict
or {})
3338 post_extractor
= info_dict
.pop('__post_extractor', None) or (lambda: {})
3339 info_dict
.update(post_extractor())
3341 actual_post_extract(info_dict
or {})
3343 def run_pp(self
, pp
, infodict
):
3344 files_to_delete
= []
3345 if '__files_to_move' not in infodict
:
3346 infodict
['__files_to_move'] = {}
3348 files_to_delete
, infodict
= pp
.run(infodict
)
3349 except PostProcessingError
as e
:
3350 # Must be True and not 'only_download'
3351 if self
.params
.get('ignoreerrors') is True:
3352 self
.report_error(e
)
3356 if not files_to_delete
:
3358 if self
.params
.get('keepvideo', False):
3359 for f
in files_to_delete
:
3360 infodict
['__files_to_move'].setdefault(f
, '')
3362 self
._delete
_downloaded
_files
(
3363 *files_to_delete
, info
=infodict
, msg
='Deleting original file %s (pass -k to keep)')
3366 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3367 self
._forceprint
(key
, info
)
3368 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3369 info
= self
.run_pp(pp
, info
)
3372 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3373 info
= dict(ie_info
)
3374 info
['__files_to_move'] = files_to_move
or {}
3376 info
= self
.run_all_pps(key
, info
)
3377 except PostProcessingError
as err
:
3378 msg
= f
'Preprocessing: {err}'
3379 info
.setdefault('__pending_error', msg
)
3380 self
.report_error(msg
, is_error
=False)
3381 return info
, info
.pop('__files_to_move', None)
3383 def post_process(self
, filename
, info
, files_to_move
=None):
3384 """Run all the postprocessors on the given file."""
3385 info
['filepath'] = filename
3386 info
['__files_to_move'] = files_to_move
or {}
3387 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3388 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3389 del info
['__files_to_move']
3390 return self
.run_all_pps('after_move', info
)
3392 def _make_archive_id(self
, info_dict
):
3393 video_id
= info_dict
.get('id')
3396 # Future-proof against any change in case
3397 # and backwards compatibility with prior versions
3398 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3399 if extractor
is None:
3400 url
= str_or_none(info_dict
.get('url'))
3403 # Try to find matching extractor for the URL and take its ie_key
3404 for ie_key
, ie
in self
._ies
.items():
3405 if ie
.suitable(url
):
3410 return f
'{extractor.lower()} {video_id}'
3412 def in_download_archive(self
, info_dict
):
3413 fn
= self
.params
.get('download_archive')
3417 vid_id
= self
._make
_archive
_id
(info_dict
)
3419 return False # Incomplete video information
3421 return vid_id
in self
.archive
3423 def record_download_archive(self
, info_dict
):
3424 fn
= self
.params
.get('download_archive')
3427 vid_id
= self
._make
_archive
_id
(info_dict
)
3429 self
.write_debug(f
'Adding to archive: {vid_id}')
3430 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3431 archive_file
.write(vid_id
+ '\n')
3432 self
.archive
.add(vid_id
)
3435 def format_resolution(format
, default
='unknown'):
3436 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3438 if format
.get('resolution') is not None:
3439 return format
['resolution']
3440 if format
.get('width') and format
.get('height'):
3441 return '%dx%d' % (format
['width'], format
['height'])
3442 elif format
.get('height'):
3443 return '%sp' % format
['height']
3444 elif format
.get('width'):
3445 return '%dx?' % format
['width']
3448 def _list_format_headers(self
, *headers
):
3449 if self
.params
.get('listformats_table', True) is not False:
3450 return [self
._format
_out
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3453 def _format_note(self
, fdict
):
3455 if fdict
.get('ext') in ['f4f', 'f4m']:
3456 res
+= '(unsupported)'
3457 if fdict
.get('language'):
3460 res
+= '[%s]' % fdict
['language']
3461 if fdict
.get('format_note') is not None:
3464 res
+= fdict
['format_note']
3465 if fdict
.get('tbr') is not None:
3468 res
+= '%4dk' % fdict
['tbr']
3469 if fdict
.get('container') is not None:
3472 res
+= '%s container' % fdict
['container']
3473 if (fdict
.get('vcodec') is not None
3474 and fdict
.get('vcodec') != 'none'):
3477 res
+= fdict
['vcodec']
3478 if fdict
.get('vbr') is not None:
3480 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3482 if fdict
.get('vbr') is not None:
3483 res
+= '%4dk' % fdict
['vbr']
3484 if fdict
.get('fps') is not None:
3487 res
+= '%sfps' % fdict
['fps']
3488 if fdict
.get('acodec') is not None:
3491 if fdict
['acodec'] == 'none':
3494 res
+= '%-5s' % fdict
['acodec']
3495 elif fdict
.get('abr') is not None:
3499 if fdict
.get('abr') is not None:
3500 res
+= '@%3dk' % fdict
['abr']
3501 if fdict
.get('asr') is not None:
3502 res
+= ' (%5dHz)' % fdict
['asr']
3503 if fdict
.get('filesize') is not None:
3506 res
+= format_bytes(fdict
['filesize'])
3507 elif fdict
.get('filesize_approx') is not None:
3510 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3513 def render_formats_table(self
, info_dict
):
3514 if not info_dict
.get('formats') and not info_dict
.get('url'):
3517 formats
= info_dict
.get('formats', [info_dict
])
3518 if not self
.params
.get('listformats_table', True) is not False:
3521 format_field(f
, 'format_id'),
3522 format_field(f
, 'ext'),
3523 self
.format_resolution(f
),
3524 self
._format
_note
(f
)
3525 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3526 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3528 def simplified_codec(f
, field
):
3529 assert field
in ('acodec', 'vcodec')
3530 codec
= f
.get(field
, 'unknown')
3532 return '.'.join(codec
.split('.')[:4])
3534 if field
== 'vcodec' and f
.get('acodec') == 'none':
3536 elif field
== 'acodec' and f
.get('vcodec') == 'none':
3538 return self
._format
_out
('audio only' if field
== 'vcodec' else 'video only',
3539 self
.Styles
.SUPPRESS
)
3541 delim
= self
._format
_out
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3544 self
._format
_out
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3545 format_field(f
, 'ext'),
3546 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3547 format_field(f
, 'fps', '\t%d', func
=round),
3548 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3550 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~\t%s', func
=format_bytes
),
3551 format_field(f
, 'tbr', '\t%dk', func
=round),
3552 shorten_protocol_name(f
.get('protocol', '')),
3554 simplified_codec(f
, 'vcodec'),
3555 format_field(f
, 'vbr', '\t%dk', func
=round),
3556 simplified_codec(f
, 'acodec'),
3557 format_field(f
, 'abr', '\t%dk', func
=round),
3558 format_field(f
, 'asr', '\t%s', func
=format_decimal_suffix
),
3560 self
._format
_out
('UNSUPPORTED', 'light red') if f
.get('ext') in ('f4f', 'f4m') else None,
3561 format_field(f
, 'language', '[%s]'),
3562 join_nonempty(format_field(f
, 'format_note'),
3563 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3566 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3567 header_line
= self
._list
_format
_headers
(
3568 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3569 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3571 return render_table(
3572 header_line
, table
, hide_empty
=True,
3573 delim
=self
._format
_out
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3575 def render_thumbnails_table(self
, info_dict
):
3576 thumbnails
= list(info_dict
.get('thumbnails') or [])
3579 return render_table(
3580 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3581 [[t
.get('id'), t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
])
3583 def render_subtitles_table(self
, video_id
, subtitles
):
3584 def _row(lang
, formats
):
3585 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3586 if len(set(names
)) == 1:
3587 names
= [] if names
[0] == 'unknown' else names
[:1]
3588 return [lang
, ', '.join(names
), ', '.join(exts
)]
3592 return render_table(
3593 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3594 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3597 def __list_table(self
, video_id
, name
, func
, *args
):
3600 self
.to_screen(f
'{video_id} has no {name}')
3602 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3603 self
.to_stdout(table
)
3605 def list_formats(self
, info_dict
):
3606 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3608 def list_thumbnails(self
, info_dict
):
3609 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3611 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3612 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3614 def urlopen(self
, req
):
3615 """ Start an HTTP download """
3616 if isinstance(req
, str):
3617 req
= sanitized_Request(req
)
3618 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
3620 def print_debug_header(self
):
3621 if not self
.params
.get('verbose'):
3624 # These imports can be slow. So import them only as needed
3625 from .extractor
.extractors
import _LAZY_LOADER
3626 from .extractor
.extractors
import _PLUGIN_CLASSES
as plugin_extractors
3628 def get_encoding(stream
):
3629 ret
= str(getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__))
3630 if not supports_terminal_sequences(stream
):
3631 from .utils
import WINDOWS_VT_MODE
# Must be imported locally
3632 ret
+= ' (No VT)' if WINDOWS_VT_MODE
is False else ' (No ANSI)'
3635 encoding_str
= 'Encodings: locale %s, fs %s, pref %s, %s' % (
3636 locale
.getpreferredencoding(),
3637 sys
.getfilesystemencoding(),
3638 self
.get_encoding(),
3640 f
'{key} {get_encoding(stream)}' for key
, stream
in self
._out
_files
.items_
3641 if stream
is not None and key
!= 'console')
3644 logger
= self
.params
.get('logger')
3646 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3647 write_debug(encoding_str
)
3649 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
3650 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3652 source
= detect_variant()
3653 write_debug(join_nonempty(
3654 'yt-dlp version', __version__
,
3655 f
'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD
else '',
3656 '' if source
== 'unknown' else f
'({source})',
3658 if not _LAZY_LOADER
:
3659 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3660 write_debug('Lazy loading extractors is forcibly disabled')
3662 write_debug('Lazy loading extractors is disabled')
3663 if plugin_extractors
or plugin_postprocessors
:
3664 write_debug('Plugins: %s' % [
3665 '%s%s' % (klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3666 for name
, klass
in itertools
.chain(plugin_extractors
.items(), plugin_postprocessors
.items())])
3667 if self
.params
['compat_opts']:
3668 write_debug('Compatibility options: %s' % ', '.join(self
.params
['compat_opts']))
3670 if source
== 'source':
3672 stdout
, _
, _
= Popen
.run(
3673 ['git', 'rev-parse', '--short', 'HEAD'],
3674 text
=True, cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)),
3675 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
3676 if re
.fullmatch('[0-9a-f]+', stdout
.strip()):
3677 write_debug(f
'Git HEAD: {stdout.strip()}')
3679 with contextlib
.suppress(Exception):
3682 write_debug(system_identifier())
3684 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
3685 ffmpeg_features
= {key for key, val in ffmpeg_features.items() if val}
3687 exe_versions
['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features
))
3689 exe_versions
['rtmpdump'] = rtmpdump_version()
3690 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3691 exe_str
= ', '.join(
3692 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3694 write_debug('exe versions: %s' % exe_str
)
3696 from .compat
.compat_utils
import get_package_info
3697 from .dependencies
import available_dependencies
3699 write_debug('Optional libraries: %s' % (', '.join(sorted({
3700 join_nonempty(*get_package_info(m
)) for m
in available_dependencies
.values()
3703 self
._setup
_opener
()
3705 for handler
in self
._opener
.handlers
:
3706 if hasattr(handler
, 'proxies'):
3707 proxy_map
.update(handler
.proxies
)
3708 write_debug(f
'Proxy map: {proxy_map}')
3711 if False and self
.params
.get('call_home'):
3712 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode()
3713 write_debug('Public IP address: %s' % ipaddr
)
3714 latest_version
= self
.urlopen(
3715 'https://yt-dl.org/latest/version').read().decode()
3716 if version_tuple(latest_version
) > version_tuple(__version__
):
3717 self
.report_warning(
3718 'You are using an outdated version (newest version: %s)! '
3719 'See https://yt-dl.org/update if you need help updating.' %
3722 def _setup_opener(self
):
3723 if hasattr(self
, '_opener'):
3725 timeout_val
= self
.params
.get('socket_timeout')
3726 self
._socket
_timeout
= 20 if timeout_val
is None else float(timeout_val
)
3728 opts_cookiesfrombrowser
= self
.params
.get('cookiesfrombrowser')
3729 opts_cookiefile
= self
.params
.get('cookiefile')
3730 opts_proxy
= self
.params
.get('proxy')
3732 self
.cookiejar
= load_cookies(opts_cookiefile
, opts_cookiesfrombrowser
, self
)
3734 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
3735 if opts_proxy
is not None:
3736 if opts_proxy
== '':
3739 proxies
= {'http': opts_proxy, 'https': opts_proxy}
3741 proxies
= urllib
.request
.getproxies()
3742 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3743 if 'http' in proxies
and 'https' not in proxies
:
3744 proxies
['https'] = proxies
['http']
3745 proxy_handler
= PerRequestProxyHandler(proxies
)
3747 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
3748 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
3749 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
3750 redirect_handler
= YoutubeDLRedirectHandler()
3751 data_handler
= urllib
.request
.DataHandler()
3753 # When passing our own FileHandler instance, build_opener won't add the
3754 # default FileHandler and allows us to disable the file protocol, which
3755 # can be used for malicious purposes (see
3756 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3757 file_handler
= urllib
.request
.FileHandler()
3759 def file_open(*args
, **kwargs
):
3760 raise urllib
.error
.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3761 file_handler
.file_open
= file_open
3763 opener
= urllib
.request
.build_opener(
3764 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
3766 # Delete the default user-agent header, which would otherwise apply in
3767 # cases where our custom HTTP handler doesn't come into play
3768 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3769 opener
.addheaders
= []
3770 self
._opener
= opener
3772 def encode(self
, s
):
3773 if isinstance(s
, bytes):
3774 return s
# Already encoded
3777 return s
.encode(self
.get_encoding())
3778 except UnicodeEncodeError as err
:
3779 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
3782 def get_encoding(self
):
3783 encoding
= self
.params
.get('encoding')
3784 if encoding
is None:
3785 encoding
= preferredencoding()
3788 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
3789 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3790 if overwrite
is None:
3791 overwrite
= self
.params
.get('overwrites', True)
3792 if not self
.params
.get('writeinfojson'):
3795 self
.write_debug(f
'Skipping writing {label} infojson')
3797 elif not self
._ensure
_dir
_exists
(infofn
):
3799 elif not overwrite
and os
.path
.exists(infofn
):
3800 self
.to_screen(f
'[info] {label.title()} metadata is already present')
3803 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
3805 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
3808 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
3811 def _write_description(self
, label
, ie_result
, descfn
):
3812 ''' Write description and returns True = written, False = skip, None = error '''
3813 if not self
.params
.get('writedescription'):
3816 self
.write_debug(f
'Skipping writing {label} description')
3818 elif not self
._ensure
_dir
_exists
(descfn
):
3820 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
3821 self
.to_screen(f
'[info] {label.title()} description is already present')
3822 elif ie_result
.get('description') is None:
3823 self
.report_warning(f
'There\'s no {label} description to write')
3827 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
3828 with open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
3829 descfile
.write(ie_result
['description'])
3831 self
.report_error(f
'Cannot write {label} description file {descfn}')
3835 def _write_subtitles(self
, info_dict
, filename
):
3836 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3838 subtitles
= info_dict
.get('requested_subtitles')
3839 if not subtitles
or not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
3840 # subtitles download errors are already managed as troubles in relevant IE
3841 # that way it will silently go on when used with unsupporting IE
3844 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
3845 if not sub_filename_base
:
3846 self
.to_screen('[info] Skipping writing video subtitles')
3848 for sub_lang
, sub_info
in subtitles
.items():
3849 sub_format
= sub_info
['ext']
3850 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
3851 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
3852 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
3854 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3855 sub_info
['filepath'] = existing_sub
3856 ret
.append((existing_sub
, sub_filename_final
))
3859 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
3860 if sub_info
.get('data') is not None:
3862 # Use newline='' to prevent conversion of newline characters
3863 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3864 with open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
3865 subfile
.write(sub_info
['data'])
3866 sub_info
['filepath'] = sub_filename
3867 ret
.append((sub_filename
, sub_filename_final
))
3870 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
3874 sub_copy
= sub_info
.copy()
3875 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
3876 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
3877 sub_info
['filepath'] = sub_filename
3878 ret
.append((sub_filename
, sub_filename_final
))
3879 except (DownloadError
, ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
3880 msg
= f
'Unable to download video subtitles for {sub_lang!r}: {err}'
3881 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
3882 if not self
.params
.get('ignoreerrors'):
3883 self
.report_error(msg
)
3884 raise DownloadError(msg
)
3885 self
.report_warning(msg
)
3888 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
3889 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3890 write_all
= self
.params
.get('write_all_thumbnails', False)
3891 thumbnails
, ret
= [], []
3892 if write_all
or self
.params
.get('writethumbnail', False):
3893 thumbnails
= info_dict
.get('thumbnails') or []
3894 multiple
= write_all
and len(thumbnails
) > 1
3896 if thumb_filename_base
is None:
3897 thumb_filename_base
= filename
3898 if thumbnails
and not thumb_filename_base
:
3899 self
.write_debug(f
'Skipping writing {label} thumbnail')
3902 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
3903 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
3904 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
3905 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
3906 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
3908 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
3910 self
.to_screen('[info] %s is already present' % (
3911 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize())
3912 t
['filepath'] = existing_thumb
3913 ret
.append((existing_thumb
, thumb_filename_final
))
3915 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
3917 uf
= self
.urlopen(sanitized_Request(t
['url'], headers
=t
.get('http_headers', {})))
3918 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
3919 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
3920 shutil
.copyfileobj(uf
, thumbf
)
3921 ret
.append((thumb_filename
, thumb_filename_final
))
3922 t
['filepath'] = thumb_filename
3923 except network_exceptions
as err
:
3925 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
3926 if ret
and not write_all
: