24 from string
import ascii_letters
26 from .cache
import Cache
27 from .compat
import compat_os_name
, compat_shlex_quote
28 from .cookies
import load_cookies
29 from .downloader
import FFmpegFD
, get_suitable_downloader
, shorten_protocol_name
30 from .downloader
.rtmp
import rtmpdump_version
31 from .extractor
import gen_extractor_classes
, get_info_extractor
32 from .extractor
.openload
import PhantomJSwrapper
33 from .minicurses
import format_text
34 from .postprocessor
import _PLUGIN_CLASSES
as plugin_postprocessors
35 from .postprocessor
import (
37 FFmpegFixupDuplicateMoovPP
,
38 FFmpegFixupDurationPP
,
41 FFmpegFixupStretchedPP
,
42 FFmpegFixupTimestampPP
,
45 FFmpegVideoConvertorPP
,
46 MoveFilesAfterDownloadPP
,
49 from .postprocessor
.ffmpeg
import resolve_mapping
as resolve_recode_mapping
50 from .update
import detect_variant
76 PerRequestProxyHandler
,
83 UnavailableVideoError
,
85 YoutubeDLCookieProcessor
,
87 YoutubeDLRedirectHandler
,
102 format_decimal_suffix
,
119 register_socks_protocols
,
120 remove_terminal_sequences
,
131 supports_terminal_sequences
,
140 windows_enable_vt_mode
,
144 from .version
import RELEASE_GIT_HEAD
, __version__
146 if compat_os_name
== 'nt':
153 YoutubeDL objects are the ones responsible of downloading the
154 actual video file and writing it to disk if the user has requested
155 it, among some other tasks. In most cases there should be one per
156 program. As, given a video URL, the downloader doesn't know how to
157 extract all the needed information, task that InfoExtractors do, it
158 has to pass the URL to one of them.
160 For this, YoutubeDL objects have a method that allows
161 InfoExtractors to be registered in a given order. When it is passed
162 a URL, the YoutubeDL object handles it to the first InfoExtractor it
163 finds that reports being able to handle it. The InfoExtractor extracts
164 all the information about the video or videos the URL refers to, and
165 YoutubeDL process the extracted information, possibly using a File
166 Downloader to download the video.
168 YoutubeDL objects accept a lot of parameters. In order not to saturate
169 the object constructor with arguments, it receives a dictionary of
170 options instead. These options are available through the params
171 attribute for the InfoExtractors to use. The YoutubeDL also
172 registers itself as the downloader in charge for the InfoExtractors
173 that are added to it, so this is a "mutual registration".
177 username: Username for authentication purposes.
178 password: Password for authentication purposes.
179 videopassword: Password for accessing a video.
180 ap_mso: Adobe Pass multiple-system operator identifier.
181 ap_username: Multiple-system operator account username.
182 ap_password: Multiple-system operator account password.
183 usenetrc: Use netrc for authentication instead.
184 verbose: Print additional info to stdout.
185 quiet: Do not print messages to stdout.
186 no_warnings: Do not print out anything for warnings.
187 forceprint: A dict with keys WHEN mapped to a list of templates to
188 print to stdout. The allowed keys are video or any of the
189 items in utils.POSTPROCESS_WHEN.
190 For compatibility, a single list is also accepted
191 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
192 a list of tuples with (template, filename)
193 forcejson: Force printing info_dict as JSON.
194 dump_single_json: Force printing the info_dict of the whole playlist
195 (or video) as a single JSON line.
196 force_write_download_archive: Force writing download archive regardless
197 of 'skip_download' or 'simulate'.
198 simulate: Do not download the video files. If unset (or None),
199 simulate only if listsubtitles, listformats or list_thumbnails is used
200 format: Video format code. see "FORMAT SELECTION" for more details.
201 You can also pass a function. The function takes 'ctx' as
202 argument and returns the formats to download.
203 See "build_format_selector" for an implementation
204 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
205 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
206 extracting metadata even if the video is not actually
207 available for download (experimental)
208 format_sort: A list of fields by which to sort the video formats.
209 See "Sorting Formats" for more details.
210 format_sort_force: Force the given format_sort. see "Sorting Formats"
212 prefer_free_formats: Whether to prefer video formats with free containers
213 over non-free ones of same quality.
214 allow_multiple_video_streams: Allow multiple video streams to be merged
216 allow_multiple_audio_streams: Allow multiple audio streams to be merged
218 check_formats Whether to test if the formats are downloadable.
219 Can be True (check all), False (check none),
220 'selected' (check selected formats),
221 or None (check only if requested by extractor)
222 paths: Dictionary of output paths. The allowed keys are 'home'
223 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
224 outtmpl: Dictionary of templates for output names. Allowed keys
225 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
226 For compatibility with youtube-dl, a single string can also be used
227 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
228 restrictfilenames: Do not allow "&" and spaces in file names
229 trim_file_name: Limit length of filename (extension excluded)
230 windowsfilenames: Force the filenames to be windows compatible
231 ignoreerrors: Do not stop on download/postprocessing errors.
232 Can be 'only_download' to ignore only download errors.
233 Default is 'only_download' for CLI, but False for API
234 skip_playlist_after_errors: Number of allowed failures until the rest of
235 the playlist is skipped
236 force_generic_extractor: Force downloader to use the generic extractor
237 overwrites: Overwrite all video and metadata files if True,
238 overwrite only non-video files if None
239 and don't overwrite any file if False
240 For compatibility with youtube-dl,
241 "nooverwrites" may also be used instead
242 playlist_items: Specific indices of playlist to download.
243 playlistrandom: Download playlist items in random order.
244 lazy_playlist: Process playlist entries as they are received.
245 matchtitle: Download only matching titles.
246 rejecttitle: Reject downloads for matching titles.
247 logger: Log messages to a logging.Logger instance.
248 logtostderr: Log messages to stderr instead of stdout.
249 consoletitle: Display progress in console window's titlebar.
250 writedescription: Write the video description to a .description file
251 writeinfojson: Write the video description to a .info.json file
252 clean_infojson: Remove private fields from the infojson
253 getcomments: Extract video comments. This will not be written to disk
254 unless writeinfojson is also given
255 writeannotations: Write the video annotations to a .annotations.xml file
256 writethumbnail: Write the thumbnail image to a file
257 allow_playlist_files: Whether to write playlists' description, infojson etc
258 also to disk when using the 'write*' options
259 write_all_thumbnails: Write all thumbnail formats to files
260 writelink: Write an internet shortcut file, depending on the
261 current platform (.url/.webloc/.desktop)
262 writeurllink: Write a Windows internet shortcut file (.url)
263 writewebloclink: Write a macOS internet shortcut file (.webloc)
264 writedesktoplink: Write a Linux internet shortcut file (.desktop)
265 writesubtitles: Write the video subtitles to a file
266 writeautomaticsub: Write the automatically generated subtitles to a file
267 listsubtitles: Lists all available subtitles for the video
268 subtitlesformat: The format code for subtitles
269 subtitleslangs: List of languages of the subtitles to download (can be regex).
270 The list may contain "all" to refer to all the available
271 subtitles. The language can be prefixed with a "-" to
272 exclude it from the requested languages. Eg: ['all', '-live_chat']
273 keepvideo: Keep the video file after post-processing
274 daterange: A DateRange object, download only if the upload_date is in the range.
275 skip_download: Skip the actual download of the video file
276 cachedir: Location of the cache files in the filesystem.
277 False to disable filesystem cache.
278 noplaylist: Download single video instead of a playlist if in doubt.
279 age_limit: An integer representing the user's age in years.
280 Unsuitable videos for the given age are skipped.
281 min_views: An integer representing the minimum view count the video
282 must have in order to not be skipped.
283 Videos without view count information are always
284 downloaded. None for no limit.
285 max_views: An integer representing the maximum view count.
286 Videos that are more popular than that are not
288 Videos without view count information are always
289 downloaded. None for no limit.
290 download_archive: File name of a file where all downloads are recorded.
291 Videos already present in the file are not downloaded
293 break_on_existing: Stop the download process after attempting to download a
294 file that is in the archive.
295 break_on_reject: Stop the download process when encountering a video that
296 has been filtered out.
297 break_per_url: Whether break_on_reject and break_on_existing
298 should act on each input URL as opposed to for the entire queue
299 cookiefile: File name or text stream from where cookies should be read and dumped to
300 cookiesfrombrowser: A tuple containing the name of the browser, the profile
301 name/pathfrom where cookies are loaded, and the name of the
302 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
303 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
304 support RFC 5746 secure renegotiation
305 nocheckcertificate: Do not verify SSL certificates
306 client_certificate: Path to client certificate file in PEM format. May include the private key
307 client_certificate_key: Path to private key file for client certificate
308 client_certificate_password: Password for client certificate private key, if encrypted.
309 If not provided and the key is encrypted, yt-dlp will ask interactively
310 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
311 (Only supported by some extractors)
312 http_headers: A dictionary of custom headers to be used for all requests
313 proxy: URL of the proxy server to use
314 geo_verification_proxy: URL of the proxy to use for IP address verification
315 on geo-restricted sites.
316 socket_timeout: Time to wait for unresponsive hosts, in seconds
317 bidi_workaround: Work around buggy terminals without bidirectional text
318 support, using fridibi
319 debug_printtraffic:Print out sent and received HTTP traffic
320 default_search: Prepend this string if an input url is not valid.
321 'auto' for elaborate guessing
322 encoding: Use this encoding instead of the system-specified.
323 extract_flat: Whether to resolve and process url_results further
324 * False: Always process (default)
325 * True: Never process
326 * 'in_playlist': Do not process inside playlist/multi_video
327 * 'discard': Always process, but don't return the result
328 from inside playlist/multi_video
329 * 'discard_in_playlist': Same as "discard", but only for
330 playlists (not multi_video)
331 wait_for_video: If given, wait for scheduled streams to become available.
332 The value should be a tuple containing the range
333 (min_secs, max_secs) to wait between retries
334 postprocessors: A list of dictionaries, each with an entry
335 * key: The name of the postprocessor. See
336 yt_dlp/postprocessor/__init__.py for a list.
337 * when: When to run the postprocessor. Allowed values are
338 the entries of utils.POSTPROCESS_WHEN
339 Assumed to be 'post_process' if not given
340 progress_hooks: A list of functions that get called on download
341 progress, with a dictionary with the entries
342 * status: One of "downloading", "error", or "finished".
343 Check this first and ignore unknown values.
344 * info_dict: The extracted info_dict
346 If status is one of "downloading", or "finished", the
347 following properties may also be present:
348 * filename: The final filename (always present)
349 * tmpfilename: The filename we're currently writing to
350 * downloaded_bytes: Bytes on disk
351 * total_bytes: Size of the whole file, None if unknown
352 * total_bytes_estimate: Guess of the eventual file size,
354 * elapsed: The number of seconds since download started.
355 * eta: The estimated time in seconds, None if unknown
356 * speed: The download speed in bytes/second, None if
358 * fragment_index: The counter of the currently
359 downloaded video fragment.
360 * fragment_count: The number of fragments (= individual
361 files that will be merged)
363 Progress hooks are guaranteed to be called at least once
364 (with status "finished") if the download is successful.
365 postprocessor_hooks: A list of functions that get called on postprocessing
366 progress, with a dictionary with the entries
367 * status: One of "started", "processing", or "finished".
368 Check this first and ignore unknown values.
369 * postprocessor: Name of the postprocessor
370 * info_dict: The extracted info_dict
372 Progress hooks are guaranteed to be called at least twice
373 (with status "started" and "finished") if the processing is successful.
374 merge_output_format: Extension to use when merging formats.
375 final_ext: Expected final extension; used to detect when the file was
376 already downloaded and converted
377 fixup: Automatically correct known faults of the file.
379 - "never": do nothing
380 - "warn": only emit a warning
381 - "detect_or_warn": check whether we can do anything
382 about it, warn otherwise (default)
383 source_address: Client-side IP address to bind to.
384 sleep_interval_requests: Number of seconds to sleep between requests
386 sleep_interval: Number of seconds to sleep before each download when
387 used alone or a lower bound of a range for randomized
388 sleep before each download (minimum possible number
389 of seconds to sleep) when used along with
391 max_sleep_interval:Upper bound of a range for randomized sleep before each
392 download (maximum possible number of seconds to sleep).
393 Must only be used along with sleep_interval.
394 Actual sleep time will be a random float from range
395 [sleep_interval; max_sleep_interval].
396 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
397 listformats: Print an overview of available video formats and exit.
398 list_thumbnails: Print a table of all thumbnails and exit.
399 match_filter: A function that gets called for every video with the signature
400 (info_dict, *, incomplete: bool) -> Optional[str]
401 For backward compatibility with youtube-dl, the signature
402 (info_dict) -> Optional[str] is also allowed.
403 - If it returns a message, the video is ignored.
404 - If it returns None, the video is downloaded.
405 - If it returns utils.NO_DEFAULT, the user is interactively
406 asked whether to download the video.
407 match_filter_func in utils.py is one example for this.
408 no_color: Do not emit color codes in output.
409 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
412 Two-letter ISO 3166-2 country code that will be used for
413 explicit geographic restriction bypassing via faking
414 X-Forwarded-For HTTP header
416 IP range in CIDR notation that will be used similarly to
418 external_downloader: A dictionary of protocol keys and the executable of the
419 external downloader to use for it. The allowed protocols
420 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
421 Set the value to 'native' to use the native downloader
422 compat_opts: Compatibility options. See "Differences in default behavior".
423 The following options do not work when used through the API:
424 filename, abort-on-error, multistreams, no-live-chat, format-sort
425 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
426 Refer __init__.py for their implementation
427 progress_template: Dictionary of templates for progress outputs.
428 Allowed keys are 'download', 'postprocess',
429 'download-title' (console title) and 'postprocess-title'.
430 The template is mapped on a dictionary with keys 'progress' and 'info'
431 retry_sleep_functions: Dictionary of functions that takes the number of attempts
432 as argument and returns the time to sleep in seconds.
433 Allowed keys are 'http', 'fragment', 'file_access'
434 download_ranges: A callback function that gets called for every video with
435 the signature (info_dict, ydl) -> Iterable[Section].
436 Only the returned sections will be downloaded.
437 Each Section is a dict with the following keys:
438 * start_time: Start time of the section in seconds
439 * end_time: End time of the section in seconds
440 * title: Section title (Optional)
441 * index: Section number (Optional)
442 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
443 noprogress: Do not print the progress bar
445 The following parameters are not used by YoutubeDL itself, they are used by
446 the downloader (see yt_dlp/downloader/common.py):
447 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
448 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
449 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
450 external_downloader_args, concurrent_fragment_downloads.
452 The following options are used by the post processors:
453 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
454 to the binary or its containing directory.
455 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
456 and a list of additional command-line arguments for the
457 postprocessor/executable. The dict can also have "PP+EXE" keys
458 which are used when the given exe is used by the given PP.
459 Use 'default' as the name for arguments to passed to all PP
460 For compatibility with youtube-dl, a single list of args
463 The following options are used by the extractors:
464 extractor_retries: Number of times to retry for known errors
465 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
466 hls_split_discontinuity: Split HLS playlists to different formats at
467 discontinuities such as ad breaks (default: False)
468 extractor_args: A dictionary of arguments to be passed to the extractors.
469 See "EXTRACTOR ARGUMENTS" for details.
470 Eg: {'youtube': {'skip': ['dash', 'hls']}}
471 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
473 The following options are deprecated and may be removed in the future:
475 playliststart: - Use playlist_items
476 Playlist item to start at.
477 playlistend: - Use playlist_items
478 Playlist item to end at.
479 playlistreverse: - Use playlist_items
480 Download playlist items in reverse order.
481 forceurl: - Use forceprint
482 Force printing final URL.
483 forcetitle: - Use forceprint
484 Force printing title.
485 forceid: - Use forceprint
487 forcethumbnail: - Use forceprint
488 Force printing thumbnail URL.
489 forcedescription: - Use forceprint
490 Force printing description.
491 forcefilename: - Use forceprint
492 Force printing final filename.
493 forceduration: - Use forceprint
494 Force printing duration.
495 allsubtitles: - Use subtitleslangs = ['all']
496 Downloads all the subtitles of the video
497 (requires writesubtitles or writeautomaticsub)
498 include_ads: - Doesn't work
500 call_home: - Not implemented
501 Boolean, true iff we are allowed to contact the
502 yt-dlp servers for debugging.
503 post_hooks: - Register a custom postprocessor
504 A list of functions that get called as the final step
505 for each video file, after all postprocessors have been
506 called. The filename will be passed as the only argument.
507 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
508 Use the native HLS downloader instead of ffmpeg/avconv
509 if True, otherwise use ffmpeg/avconv if False, otherwise
510 use downloader suggested by extractor if None.
511 prefer_ffmpeg: - avconv support is deprecated
512 If False, use avconv instead of ffmpeg if both are available,
513 otherwise prefer ffmpeg.
514 youtube_include_dash_manifest: - Use extractor_args
515 If True (default), DASH manifests and related
516 data will be downloaded and processed by extractor.
517 You can reduce network I/O by disabling it if you don't
518 care about DASH. (only for youtube)
519 youtube_include_hls_manifest: - Use extractor_args
520 If True (default), HLS manifests and related
521 data will be downloaded and processed by extractor.
522 You can reduce network I/O by disabling it if you don't
523 care about HLS. (only for youtube)
527 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
528 'timestamp', 'release_timestamp',
529 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
530 'average_rating', 'comment_count', 'age_limit',
531 'start_time', 'end_time',
532 'chapter_number', 'season_number', 'episode_number',
533 'track_number', 'disc_number', 'release_year',
537 # NB: Keep in sync with the docstring of extractor/common.py
538 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
539 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
540 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
541 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
542 'preference', 'language', 'language_preference', 'quality', 'source_preference',
543 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
544 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
546 _format_selection_exts
= {
547 'audio': set(MEDIA_EXTENSIONS
.common_audio
),
548 'video': set(MEDIA_EXTENSIONS
.common_video
+ ('3gp', )),
549 'storyboards': set(MEDIA_EXTENSIONS
.storyboards
),
552 def __init__(self
, params
=None, auto_init
=True):
553 """Create a FileDownloader object with the given options.
554 @param auto_init Whether to load the default extractors and print header (if verbose).
555 Set to 'no_verbose_header' to not print the header
561 self
._ies
_instances
= {}
562 self
._pps
= {k: [] for k in POSTPROCESS_WHEN}
563 self
._printed
_messages
= set()
564 self
._first
_webpage
_request
= True
565 self
._post
_hooks
= []
566 self
._progress
_hooks
= []
567 self
._postprocessor
_hooks
= []
568 self
._download
_retcode
= 0
569 self
._num
_downloads
= 0
571 self
._playlist
_level
= 0
572 self
._playlist
_urls
= set()
573 self
.cache
= Cache(self
)
575 windows_enable_vt_mode()
576 stdout
= sys
.stderr
if self
.params
.get('logtostderr') else sys
.stdout
577 self
._out
_files
= Namespace(
580 screen
=sys
.stderr
if self
.params
.get('quiet') else stdout
,
581 console
=None if compat_os_name
== 'nt' else next(
582 filter(supports_terminal_sequences
, (sys
.stderr
, sys
.stdout
)), None)
584 self
._allow
_colors
= Namespace(**{
585 type_
: not self
.params
.get('no_color') and supports_terminal_sequences(stream
)
586 for type_
, stream
in self
._out
_files
.items_
if type_
!= 'console'
589 # The code is left like this to be reused for future deprecations
590 MIN_SUPPORTED
, MIN_RECOMMENDED
= (3, 7), (3, 7)
591 current_version
= sys
.version_info
[:2]
592 if current_version
< MIN_RECOMMENDED
:
593 msg
= ('Support for Python version %d.%d has been deprecated. '
594 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
595 '\n You will no longer receive updates on this version')
596 if current_version
< MIN_SUPPORTED
:
597 msg
= 'Python version %d.%d is no longer supported'
598 self
.deprecation_warning(
599 f
'{msg}! Please update to Python %d.%d or above' % (*current_version
, *MIN_RECOMMENDED
))
601 if self
.params
.get('allow_unplayable_formats'):
603 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
604 'This is a developer option intended for debugging. \n'
605 ' If you experience any issues while using this option, '
606 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
608 def check_deprecated(param
, option
, suggestion
):
609 if self
.params
.get(param
) is not None:
610 self
.report_warning(f
'{option} is deprecated. Use {suggestion} instead')
614 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
615 if self
.params
.get('geo_verification_proxy') is None:
616 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
618 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
619 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
620 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
622 for msg
in self
.params
.get('_warnings', []):
623 self
.report_warning(msg
)
624 for msg
in self
.params
.get('_deprecation_warnings', []):
625 self
.deprecation_warning(msg
)
627 self
.params
['compat_opts'] = set(self
.params
.get('compat_opts', ()))
628 if 'list-formats' in self
.params
['compat_opts']:
629 self
.params
['listformats_table'] = False
631 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
632 # nooverwrites was unnecessarily changed to overwrites
633 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
634 # This ensures compatibility with both keys
635 self
.params
['overwrites'] = not self
.params
['nooverwrites']
636 elif self
.params
.get('overwrites') is None:
637 self
.params
.pop('overwrites', None)
639 self
.params
['nooverwrites'] = not self
.params
['overwrites']
641 self
.params
.setdefault('forceprint', {})
642 self
.params
.setdefault('print_to_file', {})
644 # Compatibility with older syntax
645 if not isinstance(params
['forceprint'], dict):
646 self
.params
['forceprint'] = {'video': params['forceprint']}
648 if self
.params
.get('bidi_workaround', False):
651 master
, slave
= pty
.openpty()
652 width
= shutil
.get_terminal_size().columns
653 width_args
= [] if width
is None else ['-w', str(width
)]
654 sp_kwargs
= {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
656 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
658 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
659 self
._output
_channel
= os
.fdopen(master
, 'rb')
660 except OSError as ose
:
661 if ose
.errno
== errno
.ENOENT
:
663 'Could not find fribidi executable, ignoring --bidi-workaround. '
664 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
669 if auto_init
!= 'no_verbose_header':
670 self
.print_debug_header()
671 self
.add_default_info_extractors()
673 if (sys
.platform
!= 'win32'
674 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
675 and not self
.params
.get('restrictfilenames', False)):
676 # Unicode filesystem API will throw errors (#1474, #13027)
678 'Assuming --restrict-filenames since file system encoding '
679 'cannot encode all characters. '
680 'Set the LC_ALL environment variable to fix this.')
681 self
.params
['restrictfilenames'] = True
683 self
._parse
_outtmpl
()
685 # Creating format selector here allows us to catch syntax errors before the extraction
686 self
.format_selector
= (
687 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
688 else self
.params
['format'] if callable(self
.params
['format'])
689 else self
.build_format_selector(self
.params
['format']))
691 # Set http_headers defaults according to std_headers
692 self
.params
['http_headers'] = merge_headers(std_headers
, self
.params
.get('http_headers', {}))
695 'post_hooks': self
.add_post_hook
,
696 'progress_hooks': self
.add_progress_hook
,
697 'postprocessor_hooks': self
.add_postprocessor_hook
,
699 for opt
, fn
in hooks
.items():
700 for ph
in self
.params
.get(opt
, []):
703 for pp_def_raw
in self
.params
.get('postprocessors', []):
704 pp_def
= dict(pp_def_raw
)
705 when
= pp_def
.pop('when', 'post_process')
706 self
.add_post_processor(
707 get_postprocessor(pp_def
.pop('key'))(self
, **pp_def
),
711 register_socks_protocols()
713 def preload_download_archive(fn
):
714 """Preload the archive, if any is specified"""
717 self
.write_debug(f
'Loading archive file {fn!r}')
719 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
720 for line
in archive_file
:
721 self
.archive
.add(line
.strip())
722 except OSError as ioe
:
723 if ioe
.errno
!= errno
.ENOENT
:
729 preload_download_archive(self
.params
.get('download_archive'))
731 def warn_if_short_id(self
, argv
):
732 # short YouTube ID starting with dash?
734 i
for i
, a
in enumerate(argv
)
735 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
739 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
740 + ['--'] + [argv
[i
] for i
in idxs
]
743 'Long argument string detected. '
744 'Use -- to separate parameters and URLs, like this:\n%s' %
745 args_to_str(correct_argv
))
747 def add_info_extractor(self
, ie
):
748 """Add an InfoExtractor object to the end of the list."""
750 self
._ies
[ie_key
] = ie
751 if not isinstance(ie
, type):
752 self
._ies
_instances
[ie_key
] = ie
753 ie
.set_downloader(self
)
755 def _get_info_extractor_class(self
, ie_key
):
756 ie
= self
._ies
.get(ie_key
)
758 ie
= get_info_extractor(ie_key
)
759 self
.add_info_extractor(ie
)
762 def get_info_extractor(self
, ie_key
):
764 Get an instance of an IE with name ie_key, it will try to get one from
765 the _ies list, if there's no instance it will create a new one and add
766 it to the extractor list.
768 ie
= self
._ies
_instances
.get(ie_key
)
770 ie
= get_info_extractor(ie_key
)()
771 self
.add_info_extractor(ie
)
774 def add_default_info_extractors(self
):
776 Add the InfoExtractors returned by gen_extractors to the end of the list
778 for ie
in gen_extractor_classes():
779 self
.add_info_extractor(ie
)
781 def add_post_processor(self
, pp
, when
='post_process'):
782 """Add a PostProcessor object to the end of the chain."""
783 assert when
in POSTPROCESS_WHEN
, f
'Invalid when={when}'
784 self
._pps
[when
].append(pp
)
785 pp
.set_downloader(self
)
787 def add_post_hook(self
, ph
):
788 """Add the post hook"""
789 self
._post
_hooks
.append(ph
)
791 def add_progress_hook(self
, ph
):
792 """Add the download progress hook"""
793 self
._progress
_hooks
.append(ph
)
795 def add_postprocessor_hook(self
, ph
):
796 """Add the postprocessing progress hook"""
797 self
._postprocessor
_hooks
.append(ph
)
798 for pps
in self
._pps
.values():
800 pp
.add_progress_hook(ph
)
802 def _bidi_workaround(self
, message
):
803 if not hasattr(self
, '_output_channel'):
806 assert hasattr(self
, '_output_process')
807 assert isinstance(message
, str)
808 line_count
= message
.count('\n') + 1
809 self
._output
_process
.stdin
.write((message
+ '\n').encode())
810 self
._output
_process
.stdin
.flush()
811 res
= ''.join(self
._output
_channel
.readline().decode()
812 for _
in range(line_count
))
813 return res
[:-len('\n')]
815 def _write_string(self
, message
, out
=None, only_once
=False):
817 if message
in self
._printed
_messages
:
819 self
._printed
_messages
.add(message
)
820 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
822 def to_stdout(self
, message
, skip_eol
=False, quiet
=None):
823 """Print message to stdout"""
824 if quiet
is not None:
825 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
826 if skip_eol
is not False:
827 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
828 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.out
)
830 def to_screen(self
, message
, skip_eol
=False, quiet
=None):
831 """Print message to screen if not in quiet mode"""
832 if self
.params
.get('logger'):
833 self
.params
['logger'].debug(message
)
835 if (self
.params
.get('quiet') if quiet
is None else quiet
) and not self
.params
.get('verbose'):
838 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
839 self
._out
_files
.screen
)
841 def to_stderr(self
, message
, only_once
=False):
842 """Print message to stderr"""
843 assert isinstance(message
, str)
844 if self
.params
.get('logger'):
845 self
.params
['logger'].error(message
)
847 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.error
, only_once
=only_once
)
849 def _send_console_code(self
, code
):
850 if compat_os_name
== 'nt' or not self
._out
_files
.console
:
852 self
._write
_string
(code
, self
._out
_files
.console
)
854 def to_console_title(self
, message
):
855 if not self
.params
.get('consoletitle', False):
857 message
= remove_terminal_sequences(message
)
858 if compat_os_name
== 'nt':
859 if ctypes
.windll
.kernel32
.GetConsoleWindow():
860 # c_wchar_p() might not be necessary if `message` is
861 # already of type unicode()
862 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
864 self
._send
_console
_code
(f
'\033]0;{message}\007')
866 def save_console_title(self
):
867 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
869 self
._send
_console
_code
('\033[22;0t') # Save the title on stack
871 def restore_console_title(self
):
872 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
874 self
._send
_console
_code
('\033[23;0t') # Restore the title from stack
877 self
.save_console_title()
880 def __exit__(self
, *args
):
881 self
.restore_console_title()
883 if self
.params
.get('cookiefile') is not None:
884 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
886 def trouble(self
, message
=None, tb
=None, is_error
=True):
887 """Determine action to take when a download problem appears.
889 Depending on if the downloader has been configured to ignore
890 download errors or not, this method may throw an exception or
891 not when errors are found, after printing the message.
893 @param tb If given, is additional traceback information
894 @param is_error Whether to raise error according to ignorerrors
896 if message
is not None:
897 self
.to_stderr(message
)
898 if self
.params
.get('verbose'):
900 if sys
.exc_info()[0]: # if .trouble has been called from an except block
902 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
903 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
904 tb
+= encode_compat_str(traceback
.format_exc())
906 tb_data
= traceback
.format_list(traceback
.extract_stack())
907 tb
= ''.join(tb_data
)
912 if not self
.params
.get('ignoreerrors'):
913 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
914 exc_info
= sys
.exc_info()[1].exc_info
916 exc_info
= sys
.exc_info()
917 raise DownloadError(message
, exc_info
)
918 self
._download
_retcode
= 1
922 EMPHASIS
='light blue',
928 SUPPRESS
='light black',
931 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
935 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
936 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', None) or 'ascii'
937 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
938 if fallback
is not None and text
!= original_text
:
940 return format_text(text
, f
) if allow_colors
else text
if fallback
is None else fallback
942 def _format_out(self
, *args
, **kwargs
):
943 return self
._format
_text
(self
._out
_files
.out
, self
._allow
_colors
.out
, *args
, **kwargs
)
945 def _format_screen(self
, *args
, **kwargs
):
946 return self
._format
_text
(self
._out
_files
.screen
, self
._allow
_colors
.screen
, *args
, **kwargs
)
948 def _format_err(self
, *args
, **kwargs
):
949 return self
._format
_text
(self
._out
_files
.error
, self
._allow
_colors
.error
, *args
, **kwargs
)
951 def report_warning(self
, message
, only_once
=False):
953 Print the message to stderr, it will be prefixed with 'WARNING:'
954 If stderr is a tty file the 'WARNING:' will be colored
956 if self
.params
.get('logger') is not None:
957 self
.params
['logger'].warning(message
)
959 if self
.params
.get('no_warnings'):
961 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
963 def deprecation_warning(self
, message
):
964 if self
.params
.get('logger') is not None:
965 self
.params
['logger'].warning(f
'DeprecationWarning: {message}')
967 self
.to_stderr(f
'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
969 def report_error(self
, message
, *args
, **kwargs
):
971 Do the same as trouble, but prefixes the message with 'ERROR:', colored
972 in red if stderr is a tty file.
974 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
976 def write_debug(self
, message
, only_once
=False):
977 '''Log debug message or Print message to stderr'''
978 if not self
.params
.get('verbose', False):
980 message
= f
'[debug] {message}'
981 if self
.params
.get('logger'):
982 self
.params
['logger'].debug(message
)
984 self
.to_stderr(message
, only_once
)
986 def report_file_already_downloaded(self
, file_name
):
987 """Report file has already been fully downloaded."""
989 self
.to_screen('[download] %s has already been downloaded' % file_name
)
990 except UnicodeEncodeError:
991 self
.to_screen('[download] The file has already been downloaded')
993 def report_file_delete(self
, file_name
):
994 """Report that existing file will be deleted."""
996 self
.to_screen('Deleting existing file %s' % file_name
)
997 except UnicodeEncodeError:
998 self
.to_screen('Deleting existing file')
1000 def raise_no_formats(self
, info
, forced
=False, *, msg
=None):
1001 has_drm
= info
.get('_has_drm')
1002 ignored
, expected
= self
.params
.get('ignore_no_formats_error'), bool(msg
)
1003 msg
= msg
or has_drm
and 'This video is DRM protected' or 'No video formats found!'
1004 if forced
or not ignored
:
1005 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
1006 expected
=has_drm
or ignored
or expected
)
1008 self
.report_warning(msg
)
1010 def parse_outtmpl(self
):
1011 self
.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1012 self
._parse
_outtmpl
()
1013 return self
.params
['outtmpl']
1015 def _parse_outtmpl(self
):
1017 if self
.params
.get('restrictfilenames'): # Remove spaces in the default template
1018 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
1020 outtmpl
= self
.params
.setdefault('outtmpl', {})
1021 if not isinstance(outtmpl
, dict):
1022 self
.params
['outtmpl'] = outtmpl
= {'default': outtmpl}
1023 outtmpl
.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}
)
1025 def get_output_path(self
, dir_type
='', filename
=None):
1026 paths
= self
.params
.get('paths', {})
1027 assert isinstance(paths
, dict)
1028 path
= os
.path
.join(
1029 expand_path(paths
.get('home', '').strip()),
1030 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
1032 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1035 def _outtmpl_expandpath(outtmpl
):
1036 # expand_path translates '%%' into '%' and '$$' into '$'
1037 # correspondingly that is not what we want since we need to keep
1038 # '%%' intact for template dict substitution step. Working around
1039 # with boundary-alike separator hack.
1040 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
1041 outtmpl
= outtmpl
.replace('%%', f
'%{sep}%').replace('$$', f
'${sep}$')
1043 # outtmpl should be expand_path'ed before template dict substitution
1044 # because meta fields may contain env variables we don't want to
1045 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1046 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1047 return expand_path(outtmpl
).replace(sep
, '')
1050 def escape_outtmpl(outtmpl
):
1051 ''' Escape any remaining strings like %s, %abc% etc. '''
1053 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1054 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1058 def validate_outtmpl(cls
, outtmpl
):
1059 ''' @return None or Exception object '''
1061 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljhqBUDS]'),
1062 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1063 cls
._outtmpl
_expandpath
(outtmpl
))
1065 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1067 except ValueError as err
:
1071 def _copy_infodict(info_dict
):
1072 info_dict
= dict(info_dict
)
1073 info_dict
.pop('__postprocessors', None)
1074 info_dict
.pop('__pending_error', None)
1077 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1078 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1079 @param sanitize Whether to sanitize the output as a filename.
1080 For backward compatibility, a function can also be passed
1083 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1085 info_dict
= self
._copy
_infodict
(info_dict
)
1086 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1087 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1088 if info_dict
.get('duration', None) is not None
1090 info_dict
['autonumber'] = int(self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
)
1091 info_dict
['video_autonumber'] = self
._num
_videos
1092 if info_dict
.get('resolution') is None:
1093 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1095 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1096 # of %(field)s to %(field)0Nd for backward compatibility
1097 field_size_compat_map
= {
1098 'playlist_index': number_of_digits(info_dict
.get('__last_playlist_index') or 0),
1099 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1100 'autonumber': self
.params
.get('autonumber_size') or 5,
1104 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1109 # Field is of the form key1.key2...
1110 # where keys (except first) can be string, int or slice
1111 FIELD_RE
= r
'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num
=r
'(?:-?\d+)')
1112 MATH_FIELD_RE
= rf
'(?:{FIELD_RE}|-?{NUMBER_RE})'
1113 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
1114 INTERNAL_FORMAT_RE
= re
.compile(rf
'''(?x)
1116 (?P<fields>{FIELD_RE})
1117 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1118 (?:>(?P<strf_format>.+?))?
1120 (?P<alternate>(?<!\\),[^|&)]+)?
1121 (?:&(?P<replacement>.*?))?
1122 (?:\|(?P<default>.*?))?
1125 def _traverse_infodict(k
):
1129 return traverse_obj(info_dict
, k
, is_user_input
=True, traverse_string
=True)
1131 def get_value(mdict
):
1133 value
= _traverse_infodict(mdict
['fields'])
1136 value
= float_or_none(value
)
1137 if value
is not None:
1140 offset_key
= mdict
['maths']
1142 value
= float_or_none(value
)
1146 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1147 offset_key
).group(0)
1148 offset_key
= offset_key
[len(item
):]
1149 if operator
is None:
1150 operator
= MATH_FUNCTIONS
[item
]
1152 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1153 offset
= float_or_none(item
)
1155 offset
= float_or_none(_traverse_infodict(item
))
1157 value
= operator(value
, multiplier
* offset
)
1158 except (TypeError, ZeroDivisionError):
1161 # Datetime formatting
1162 if mdict
['strf_format']:
1163 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1165 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1166 if sanitize
and value
== '':
1170 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1172 def filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames')):
1173 return sanitize_filename(str(value
), restricted
=restricted
, is_id
=(
1174 bool(re
.search(r
'(^|[_.])id(\.|$)', key
))
1175 if 'filename-sanitization' in self
.params
['compat_opts']
1178 sanitizer
= sanitize
if callable(sanitize
) else filename_sanitizer
1179 sanitize
= bool(sanitize
)
1181 def _dumpjson_default(obj
):
1182 if isinstance(obj
, (set, LazyList
)):
1186 def create_key(outer_mobj
):
1187 if not outer_mobj
.group('has_key'):
1188 return outer_mobj
.group(0)
1189 key
= outer_mobj
.group('key')
1190 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1191 initial_field
= mobj
.group('fields') if mobj
else ''
1192 value
, replacement
, default
= None, None, na
1194 mobj
= mobj
.groupdict()
1195 default
= mobj
['default'] if mobj
['default'] is not None else default
1196 value
= get_value(mobj
)
1197 replacement
= mobj
['replacement']
1198 if value
is None and mobj
['alternate']:
1199 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['remaining'][1:])
1203 fmt
= outer_mobj
.group('format')
1204 if fmt
== 's' and value
is not None and key
in field_size_compat_map
.keys():
1205 fmt
= f
'0{field_size_compat_map[key]:d}d'
1207 value
= default
if value
is None else value
if replacement
is None else replacement
1209 flags
= outer_mobj
.group('conversion') or ''
1210 str_fmt
= f
'{fmt[:-1]}s'
1211 if fmt
[-1] == 'l': # list
1212 delim
= '\n' if '#' in flags
else ', '
1213 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes)))), str_fmt
1214 elif fmt
[-1] == 'j': # json
1215 value
, fmt
= json
.dumps(value
, default
=_dumpjson_default
, indent
=4 if '#' in flags
else None), str_fmt
1216 elif fmt
[-1] == 'h': # html
1217 value
, fmt
= escapeHTML(value
), str_fmt
1218 elif fmt
[-1] == 'q': # quoted
1219 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1220 value
, fmt
= ' '.join(map(compat_shlex_quote
, value
)), str_fmt
1221 elif fmt
[-1] == 'B': # bytes
1222 value
= f
'%{str_fmt}'.encode() % str(value
).encode()
1223 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1224 elif fmt
[-1] == 'U': # unicode normalized
1225 value
, fmt
= unicodedata
.normalize(
1226 # "+" = compatibility equivalence, "#" = NFD
1227 'NF%s%s' % ('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1229 elif fmt
[-1] == 'D': # decimal suffix
1230 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1231 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1232 factor
=1024 if '#' in flags
else 1000)
1233 elif fmt
[-1] == 'S': # filename sanitization
1234 value
, fmt
= filename_sanitizer(initial_field
, value
, restricted
='#' in flags
), str_fmt
1235 elif fmt
[-1] == 'c':
1237 value
= str(value
)[0]
1240 elif fmt
[-1] not in 'rs': # numeric
1241 value
= float_or_none(value
)
1243 value
, fmt
= default
, 's'
1247 # If value is an object, sanitize might convert it to a string
1248 # So we convert it to repr first
1249 value
, fmt
= repr(value
), str_fmt
1250 if fmt
[-1] in 'csr':
1251 value
= sanitizer(initial_field
, value
)
1253 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1254 TMPL_DICT
[key
] = value
1255 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1257 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1259 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1260 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1261 return self
.escape_outtmpl(outtmpl
) % info_dict
1263 def _prepare_filename(self
, info_dict
, *, outtmpl
=None, tmpl_type
=None):
1264 assert None in (outtmpl
, tmpl_type
), 'outtmpl and tmpl_type are mutually exclusive'
1266 outtmpl
= self
.params
['outtmpl'].get(tmpl_type
or 'default', self
.params
['outtmpl']['default'])
1268 outtmpl
= self
._outtmpl
_expandpath
(outtmpl
)
1269 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1273 if tmpl_type
in ('', 'temp'):
1274 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1275 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1276 filename
= replace_extension(filename
, ext
, final_ext
)
1278 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1280 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1282 # https://github.com/blackjack4494/youtube-dlc/issues/85
1283 trim_file_name
= self
.params
.get('trim_file_name', False)
1285 no_ext
, *ext
= filename
.rsplit('.', 2)
1286 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1289 except ValueError as err
:
1290 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1293 def prepare_filename(self
, info_dict
, dir_type
='', *, outtmpl
=None, warn
=False):
1294 """Generate the output filename"""
1296 assert not dir_type
, 'outtmpl and dir_type are mutually exclusive'
1298 filename
= self
._prepare
_filename
(info_dict
, tmpl_type
=dir_type
, outtmpl
=outtmpl
)
1299 if not filename
and dir_type
not in ('', 'temp'):
1303 if not self
.params
.get('paths'):
1305 elif filename
== '-':
1306 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1307 elif os
.path
.isabs(filename
):
1308 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1309 if filename
== '-' or not filename
:
1312 return self
.get_output_path(dir_type
, filename
)
1314 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1315 """ Returns None if the file should be downloaded """
1317 video_title
= info_dict
.get('title', info_dict
.get('id', 'entry'))
1320 if 'title' in info_dict
:
1321 # This can happen when we're just evaluating the playlist
1322 title
= info_dict
['title']
1323 matchtitle
= self
.params
.get('matchtitle', False)
1325 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1326 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1327 rejecttitle
= self
.params
.get('rejecttitle', False)
1329 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1330 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1331 date
= info_dict
.get('upload_date')
1332 if date
is not None:
1333 dateRange
= self
.params
.get('daterange', DateRange())
1334 if date
not in dateRange
:
1335 return f
'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1336 view_count
= info_dict
.get('view_count')
1337 if view_count
is not None:
1338 min_views
= self
.params
.get('min_views')
1339 if min_views
is not None and view_count
< min_views
:
1340 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1341 max_views
= self
.params
.get('max_views')
1342 if max_views
is not None and view_count
> max_views
:
1343 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1344 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1345 return 'Skipping "%s" because it is age restricted' % video_title
1347 match_filter
= self
.params
.get('match_filter')
1348 if match_filter
is not None:
1350 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1352 # For backward compatibility
1353 ret
= None if incomplete
else match_filter(info_dict
)
1354 if ret
is NO_DEFAULT
:
1356 filename
= self
._format
_screen
(self
.prepare_filename(info_dict
), self
.Styles
.FILENAME
)
1357 reply
= input(self
._format
_screen
(
1358 f
'Download "{filename}"? (Y/n): ', self
.Styles
.EMPHASIS
)).lower().strip()
1359 if reply
in {'y', ''}
:
1362 return f
'Skipping {video_title}'
1363 elif ret
is not None:
1367 if self
.in_download_archive(info_dict
):
1368 reason
= '%s has already been recorded in the archive' % video_title
1369 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1371 reason
= check_filter()
1372 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1373 if reason
is not None:
1375 self
.to_screen('[download] ' + reason
)
1376 if self
.params
.get(break_opt
, False):
1381 def add_extra_info(info_dict
, extra_info
):
1382 '''Set the keys from extra_info in info dict if they are missing'''
1383 for key
, value
in extra_info
.items():
1384 info_dict
.setdefault(key
, value
)
1386 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1387 process
=True, force_generic_extractor
=False):
1389 Return a list with a dictionary for each video extracted.
1392 url -- URL to extract
1395 download -- whether to download videos during extraction
1396 ie_key -- extractor key hint
1397 extra_info -- dictionary containing the extra values to add to each result
1398 process -- whether to resolve all unresolved references (URLs, playlist items),
1399 must be True for download to work.
1400 force_generic_extractor -- force using the generic extractor
1403 if extra_info
is None:
1406 if not ie_key
and force_generic_extractor
:
1410 ies
= {ie_key: self._get_info_extractor_class(ie_key)}
1414 for ie_key
, ie
in ies
.items():
1415 if not ie
.suitable(url
):
1418 if not ie
.working():
1419 self
.report_warning('The program functionality for this site has been marked as broken, '
1420 'and will probably not work.')
1422 temp_id
= ie
.get_temp_id(url
)
1423 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
1424 self
.to_screen(f
'[{ie_key}] {temp_id}: has already been recorded in the archive')
1425 if self
.params
.get('break_on_existing', False):
1426 raise ExistingVideoReached()
1428 return self
.__extract
_info
(url
, self
.get_info_extractor(ie_key
), download
, extra_info
, process
)
1430 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
1432 def _handle_extraction_exceptions(func
):
1433 @functools.wraps(func
)
1434 def wrapper(self
, *args
, **kwargs
):
1437 return func(self
, *args
, **kwargs
)
1438 except (DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1440 except ReExtractInfo
as e
:
1442 self
.to_screen(f
'{e}; Re-extracting data')
1444 self
.to_stderr('\r')
1445 self
.report_warning(f
'{e}; Re-extracting data')
1447 except GeoRestrictedError
as e
:
1450 msg
+= '\nThis video is available in %s.' % ', '.join(
1451 map(ISO3166Utils
.short2full
, e
.countries
))
1452 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1453 self
.report_error(msg
)
1454 except ExtractorError
as e
: # An error we somewhat expected
1455 self
.report_error(str(e
), e
.format_traceback())
1456 except Exception as e
:
1457 if self
.params
.get('ignoreerrors'):
1458 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1464 def _wait_for_video(self
, ie_result
={}):
1465 if (not self
.params
.get('wait_for_video')
1466 or ie_result
.get('_type', 'video') != 'video'
1467 or ie_result
.get('formats') or ie_result
.get('url')):
1470 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1475 full_msg
= f
'{msg}\n'
1476 if not self
.params
.get('noprogress'):
1477 full_msg
= msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r'
1480 self
.to_screen(full_msg
, skip_eol
=True)
1483 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1484 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1485 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1486 diff
= round(random
.uniform(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
), 0)
1487 self
.report_warning('Release time of video is not known')
1488 elif ie_result
and (diff
or 0) <= 0:
1489 self
.report_warning('Video should already be available according to extracted info')
1490 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1491 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1493 wait_till
= time
.time() + diff
1496 diff
= wait_till
- time
.time()
1499 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1500 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1502 except KeyboardInterrupt:
1504 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1505 except BaseException
as e
:
1506 if not isinstance(e
, ReExtractInfo
):
1510 @_handle_extraction_exceptions
1511 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1513 ie_result
= ie
.extract(url
)
1514 except UserNotLive
as e
:
1516 if self
.params
.get('wait_for_video'):
1517 self
.report_warning(e
)
1518 self
._wait
_for
_video
()
1520 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1521 self
.report_warning(f
'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1523 if isinstance(ie_result
, list):
1524 # Backwards compatibility: old IE result format
1526 '_type': 'compat_list',
1527 'entries': ie_result
,
1529 if extra_info
.get('original_url'):
1530 ie_result
.setdefault('original_url', extra_info
['original_url'])
1531 self
.add_default_extra_info(ie_result
, ie
, url
)
1533 self
._wait
_for
_video
(ie_result
)
1534 return self
.process_ie_result(ie_result
, download
, extra_info
)
1538 def add_default_extra_info(self
, ie_result
, ie
, url
):
1540 self
.add_extra_info(ie_result
, {
1542 'original_url': url
,
1544 webpage_url
= ie_result
.get('webpage_url')
1546 self
.add_extra_info(ie_result
, {
1547 'webpage_url_basename': url_basename(webpage_url
),
1548 'webpage_url_domain': get_domain(webpage_url
),
1551 self
.add_extra_info(ie_result
, {
1552 'extractor': ie
.IE_NAME
,
1553 'extractor_key': ie
.ie_key(),
1556 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1558 Take the result of the ie(may be modified) and resolve all unresolved
1559 references (URLs, playlist items).
1561 It will also download the videos if 'download'.
1562 Returns the resolved ie_result.
1564 if extra_info
is None:
1566 result_type
= ie_result
.get('_type', 'video')
1568 if result_type
in ('url', 'url_transparent'):
1569 ie_result
['url'] = sanitize_url(
1570 ie_result
['url'], scheme
='http' if self
.params
.get('prefer_insecure') else 'https')
1571 if ie_result
.get('original_url'):
1572 extra_info
.setdefault('original_url', ie_result
['original_url'])
1574 extract_flat
= self
.params
.get('extract_flat', False)
1575 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1576 or extract_flat
is True):
1577 info_copy
= ie_result
.copy()
1578 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1579 if ie
and not ie_result
.get('id'):
1580 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1581 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1582 self
.add_extra_info(info_copy
, extra_info
)
1583 info_copy
, _
= self
.pre_process(info_copy
)
1584 self
.__forced
_printings
(info_copy
, self
.prepare_filename(info_copy
), incomplete
=True)
1585 self
._raise
_pending
_errors
(info_copy
)
1586 if self
.params
.get('force_write_download_archive', False):
1587 self
.record_download_archive(info_copy
)
1590 if result_type
== 'video':
1591 self
.add_extra_info(ie_result
, extra_info
)
1592 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1593 self
._raise
_pending
_errors
(ie_result
)
1594 additional_urls
= (ie_result
or {}).get('additional_urls')
1596 # TODO: Improve MetadataParserPP to allow setting a list
1597 if isinstance(additional_urls
, str):
1598 additional_urls
= [additional_urls
]
1600 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1601 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1602 ie_result
['additional_entries'] = [
1604 url
, download
, extra_info
=extra_info
,
1605 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1606 for url
in additional_urls
1609 elif result_type
== 'url':
1610 # We have to add extra_info to the results because it may be
1611 # contained in a playlist
1612 return self
.extract_info(
1613 ie_result
['url'], download
,
1614 ie_key
=ie_result
.get('ie_key'),
1615 extra_info
=extra_info
)
1616 elif result_type
== 'url_transparent':
1617 # Use the information from the embedding page
1618 info
= self
.extract_info(
1619 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1620 extra_info
=extra_info
, download
=False, process
=False)
1622 # extract_info may return None when ignoreerrors is enabled and
1623 # extraction failed with an error, don't crash and return early
1628 exempted_fields
= {'_type', 'url', 'ie_key'}
1629 if not ie_result
.get('section_end') and ie_result
.get('section_start') is None:
1630 # For video clips, the id etc of the clip extractor should be used
1631 exempted_fields |
= {'id', 'extractor', 'extractor_key'}
1633 new_result
= info
.copy()
1634 new_result
.update(filter_dict(ie_result
, lambda k
, v
: v
is not None and k
not in exempted_fields
))
1636 # Extracted info may not be a video result (i.e.
1637 # info.get('_type', 'video') != video) but rather an url or
1638 # url_transparent. In such cases outer metadata (from ie_result)
1639 # should be propagated to inner one (info). For this to happen
1640 # _type of info should be overridden with url_transparent. This
1641 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1642 if new_result
.get('_type') == 'url':
1643 new_result
['_type'] = 'url_transparent'
1645 return self
.process_ie_result(
1646 new_result
, download
=download
, extra_info
=extra_info
)
1647 elif result_type
in ('playlist', 'multi_video'):
1648 # Protect from infinite recursion due to recursively nested playlists
1649 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1650 webpage_url
= ie_result
['webpage_url']
1651 if webpage_url
in self
._playlist
_urls
:
1653 '[download] Skipping already downloaded playlist: %s'
1654 % ie_result
.get('title') or ie_result
.get('id'))
1657 self
._playlist
_level
+= 1
1658 self
._playlist
_urls
.add(webpage_url
)
1659 self
._fill
_common
_fields
(ie_result
, False)
1660 self
._sanitize
_thumbnails
(ie_result
)
1662 return self
.__process
_playlist
(ie_result
, download
)
1664 self
._playlist
_level
-= 1
1665 if not self
._playlist
_level
:
1666 self
._playlist
_urls
.clear()
1667 elif result_type
== 'compat_list':
1668 self
.report_warning(
1669 'Extractor %s returned a compat_list result. '
1670 'It needs to be updated.' % ie_result
.get('extractor'))
1673 self
.add_extra_info(r
, {
1674 'extractor': ie_result
['extractor'],
1675 'webpage_url': ie_result
['webpage_url'],
1676 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1677 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1678 'extractor_key': ie_result
['extractor_key'],
1681 ie_result
['entries'] = [
1682 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1683 for r
in ie_result
['entries']
1687 raise Exception('Invalid result type: %s' % result_type
)
1689 def _ensure_dir_exists(self
, path
):
1690 return make_dir(path
, self
.report_error
)
1693 def _playlist_infodict(ie_result
, strict
=False, **kwargs
):
1695 'playlist_count': ie_result
.get('playlist_count'),
1696 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1697 'playlist_id': ie_result
.get('id'),
1698 'playlist_title': ie_result
.get('title'),
1699 'playlist_uploader': ie_result
.get('uploader'),
1700 'playlist_uploader_id': ie_result
.get('uploader_id'),
1707 'playlist_index': 0,
1708 '__last_playlist_index': max(ie_result
['requested_entries'] or (0, 0)),
1709 'extractor': ie_result
['extractor'],
1710 'webpage_url': ie_result
['webpage_url'],
1711 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1712 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1713 'extractor_key': ie_result
['extractor_key'],
1716 def __process_playlist(self
, ie_result
, download
):
1717 """Process each entry in the playlist"""
1718 assert ie_result
['_type'] in ('playlist', 'multi_video')
1720 common_info
= self
._playlist
_infodict
(ie_result
, strict
=True)
1721 title
= common_info
.get('playlist') or '<Untitled>'
1722 if self
._match
_entry
(common_info
, incomplete
=True) is not None:
1724 self
.to_screen(f
'[download] Downloading {ie_result["_type"]}: {title}')
1726 all_entries
= PlaylistEntries(self
, ie_result
)
1727 entries
= orderedSet(all_entries
.get_requested_items(), lazy
=True)
1729 lazy
= self
.params
.get('lazy_playlist')
1731 resolved_entries
, n_entries
= [], 'N/A'
1732 ie_result
['requested_entries'], ie_result
['entries'] = None, None
1734 entries
= resolved_entries
= list(entries
)
1735 n_entries
= len(resolved_entries
)
1736 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
1737 if not ie_result
.get('playlist_count'):
1738 # Better to do this after potentially exhausting entries
1739 ie_result
['playlist_count'] = all_entries
.get_full_count()
1741 ie_copy
= collections
.ChainMap(
1742 ie_result
, self
._playlist
_infodict
(ie_result
, n_entries
=int_or_none(n_entries
)))
1744 _infojson_written
= False
1745 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
1746 if write_playlist_files
and self
.params
.get('list_thumbnails'):
1747 self
.list_thumbnails(ie_result
)
1748 if write_playlist_files
and not self
.params
.get('simulate'):
1749 _infojson_written
= self
._write
_info
_json
(
1750 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
1751 if _infojson_written
is None:
1753 if self
._write
_description
('playlist', ie_result
,
1754 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1756 # TODO: This should be passed to ThumbnailsConvertor if necessary
1757 self
._write
_thumbnails
('playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1760 if self
.params
.get('playlistreverse') or self
.params
.get('playlistrandom'):
1761 self
.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once
=True)
1762 elif self
.params
.get('playlistreverse'):
1764 elif self
.params
.get('playlistrandom'):
1765 random
.shuffle(entries
)
1767 self
.to_screen(f
'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1768 f
'{format_field(ie_result, "playlist_count", " of %s")}')
1770 keep_resolved_entries
= self
.params
.get('extract_flat') != 'discard'
1771 if self
.params
.get('extract_flat') == 'discard_in_playlist':
1772 keep_resolved_entries
= ie_result
['_type'] != 'playlist'
1773 if keep_resolved_entries
:
1774 self
.write_debug('The information of all playlist entries will be held in memory')
1777 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1778 for i
, (playlist_index
, entry
) in enumerate(entries
):
1780 resolved_entries
.append((playlist_index
, entry
))
1784 entry
['__x_forwarded_for_ip'] = ie_result
.get('__x_forwarded_for_ip')
1785 if not lazy
and 'playlist-index' in self
.params
.get('compat_opts', []):
1786 playlist_index
= ie_result
['requested_entries'][i
]
1790 'n_entries': int_or_none(n_entries
),
1791 'playlist_index': playlist_index
,
1792 'playlist_autonumber': i
+ 1,
1795 if self
._match
_entry
(collections
.ChainMap(entry
, extra
), incomplete
=True) is not None:
1798 self
.to_screen('[download] Downloading video %s of %s' % (
1799 self
._format
_screen
(i
+ 1, self
.Styles
.ID
), self
._format
_screen
(n_entries
, self
.Styles
.EMPHASIS
)))
1801 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1802 if not entry_result
:
1804 if failures
>= max_failures
:
1806 f
'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1808 if keep_resolved_entries
:
1809 resolved_entries
[i
] = (playlist_index
, entry_result
)
1811 # Update with processed data
1812 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
1814 # Write the updated info to json
1815 if _infojson_written
is True and self
._write
_info
_json
(
1816 'updated playlist', ie_result
,
1817 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
1820 ie_result
= self
.run_all_pps('playlist', ie_result
)
1821 self
.to_screen(f
'[download] Finished downloading playlist: {title}')
1824 @_handle_extraction_exceptions
1825 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1826 return self
.process_ie_result(
1827 entry
, download
=download
, extra_info
=extra_info
)
1829 def _build_format_filter(self
, filter_spec
):
1830 " Returns a function to filter the formats according to the filter_spec "
1840 operator_rex
= re
.compile(r
'''(?x)\s*
1841 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1842 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1843 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1844 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1845 m
= operator_rex
.fullmatch(filter_spec
)
1848 comparison_value
= int(m
.group('value'))
1850 comparison_value
= parse_filesize(m
.group('value'))
1851 if comparison_value
is None:
1852 comparison_value
= parse_filesize(m
.group('value') + 'B')
1853 if comparison_value
is None:
1855 'Invalid value %r in format specification %r' % (
1856 m
.group('value'), filter_spec
))
1857 op
= OPERATORS
[m
.group('op')]
1862 '^=': lambda attr
, value
: attr
.startswith(value
),
1863 '$=': lambda attr
, value
: attr
.endswith(value
),
1864 '*=': lambda attr
, value
: value
in attr
,
1865 '~=': lambda attr
, value
: value
.search(attr
) is not None
1867 str_operator_rex
= re
.compile(r
'''(?x)\s*
1868 (?P<key>[a-zA-Z0-9._-]+)\s*
1869 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1871 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1872 (?(quote)(?P=quote))\s*
1873 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1874 m
= str_operator_rex
.fullmatch(filter_spec
)
1876 if m
.group('op') == '~=':
1877 comparison_value
= re
.compile(m
.group('value'))
1879 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
1880 str_op
= STR_OPERATORS
[m
.group('op')]
1881 if m
.group('negation'):
1882 op
= lambda attr
, value
: not str_op(attr
, value
)
1887 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
1890 actual_value
= f
.get(m
.group('key'))
1891 if actual_value
is None:
1892 return m
.group('none_inclusive')
1893 return op(actual_value
, comparison_value
)
1896 def _check_formats(self
, formats
):
1898 self
.to_screen('[info] Testing format %s' % f
['format_id'])
1899 path
= self
.get_output_path('temp')
1900 if not self
._ensure
_dir
_exists
(f
'{path}/'):
1902 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
1905 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
1906 except (DownloadError
, OSError, ValueError) + network_exceptions
:
1909 if os
.path
.exists(temp_file
.name
):
1911 os
.remove(temp_file
.name
)
1913 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
1917 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
1919 def _default_format_spec(self
, info_dict
, download
=True):
1922 merger
= FFmpegMergerPP(self
)
1923 return merger
.available
and merger
.can_merge()
1926 not self
.params
.get('simulate')
1930 or info_dict
.get('is_live') and not self
.params
.get('live_from_start')
1931 or self
.params
['outtmpl']['default'] == '-'))
1934 or self
.params
.get('allow_multiple_audio_streams', False)
1935 or 'format-spec' in self
.params
['compat_opts'])
1938 'best/bestvideo+bestaudio' if prefer_best
1939 else 'bestvideo*+bestaudio/best' if not compat
1940 else 'bestvideo+bestaudio/best')
1942 def build_format_selector(self
, format_spec
):
1943 def syntax_error(note
, start
):
1945 'Invalid format specification: '
1946 '{}\n\t{}\n\t{}^'.format(note
, format_spec
, ' ' * start
[1]))
1947 return SyntaxError(message
)
1949 PICKFIRST
= 'PICKFIRST'
1953 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1955 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1956 'video': self
.params
.get('allow_multiple_video_streams', False)}
1958 check_formats
= self
.params
.get('check_formats') == 'selected'
1960 def _parse_filter(tokens
):
1962 for type, string
, start
, _
, _
in tokens
:
1963 if type == tokenize
.OP
and string
== ']':
1964 return ''.join(filter_parts
)
1966 filter_parts
.append(string
)
1968 def _remove_unused_ops(tokens
):
1969 # Remove operators that we don't use and join them with the surrounding strings
1970 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1971 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1972 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1973 for type, string
, start
, end
, line
in tokens
:
1974 if type == tokenize
.OP
and string
== '[':
1976 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1978 yield type, string
, start
, end
, line
1979 # everything inside brackets will be handled by _parse_filter
1980 for type, string
, start
, end
, line
in tokens
:
1981 yield type, string
, start
, end
, line
1982 if type == tokenize
.OP
and string
== ']':
1984 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1986 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1988 yield type, string
, start
, end
, line
1989 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1991 last_string
= string
1995 last_string
+= string
1997 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1999 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
2001 current_selector
= None
2002 for type, string
, start
, _
, _
in tokens
:
2003 # ENCODING is only defined in python 3.x
2004 if type == getattr(tokenize
, 'ENCODING', None):
2006 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
2007 current_selector
= FormatSelector(SINGLE
, string
, [])
2008 elif type == tokenize
.OP
:
2010 if not inside_group
:
2011 # ')' will be handled by the parentheses group
2012 tokens
.restore_last_token()
2014 elif inside_merge
and string
in ['/', ',']:
2015 tokens
.restore_last_token()
2017 elif inside_choice
and string
== ',':
2018 tokens
.restore_last_token()
2021 if not current_selector
:
2022 raise syntax_error('"," must follow a format selector', start
)
2023 selectors
.append(current_selector
)
2024 current_selector
= None
2026 if not current_selector
:
2027 raise syntax_error('"/" must follow a format selector', start
)
2028 first_choice
= current_selector
2029 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
2030 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
2032 if not current_selector
:
2033 current_selector
= FormatSelector(SINGLE
, 'best', [])
2034 format_filter
= _parse_filter(tokens
)
2035 current_selector
.filters
.append(format_filter
)
2037 if current_selector
:
2038 raise syntax_error('Unexpected "("', start
)
2039 group
= _parse_format_selection(tokens
, inside_group
=True)
2040 current_selector
= FormatSelector(GROUP
, group
, [])
2042 if not current_selector
:
2043 raise syntax_error('Unexpected "+"', start
)
2044 selector_1
= current_selector
2045 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2047 raise syntax_error('Expected a selector', start
)
2048 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2050 raise syntax_error(f
'Operator not recognized: "{string}"', start
)
2051 elif type == tokenize
.ENDMARKER
:
2053 if current_selector
:
2054 selectors
.append(current_selector
)
2057 def _merge(formats_pair
):
2058 format_1
, format_2
= formats_pair
2061 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2062 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2064 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2065 get_no_more
= {'video': False, 'audio': False}
2066 for (i
, fmt_info
) in enumerate(formats_info
):
2067 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2070 for aud_vid
in ['audio', 'video']:
2071 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2072 if get_no_more
[aud_vid
]:
2075 get_no_more
[aud_vid
] = True
2077 if len(formats_info
) == 1:
2078 return formats_info
[0]
2080 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2081 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2083 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2084 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2086 output_ext
= self
.params
.get('merge_output_format')
2089 output_ext
= the_only_video
['ext']
2090 elif the_only_audio
and not video_fmts
:
2091 output_ext
= the_only_audio
['ext']
2095 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2098 'requested_formats': formats_info
,
2099 'format': '+'.join(filtered('format')),
2100 'format_id': '+'.join(filtered('format_id')),
2102 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2103 'language': '+'.join(orderedSet(filtered('language'))) or None,
2104 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2105 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2106 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2111 'width': the_only_video
.get('width'),
2112 'height': the_only_video
.get('height'),
2113 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2114 'fps': the_only_video
.get('fps'),
2115 'dynamic_range': the_only_video
.get('dynamic_range'),
2116 'vcodec': the_only_video
.get('vcodec'),
2117 'vbr': the_only_video
.get('vbr'),
2118 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2123 'acodec': the_only_audio
.get('acodec'),
2124 'abr': the_only_audio
.get('abr'),
2125 'asr': the_only_audio
.get('asr'),
2130 def _check_formats(formats
):
2131 if not check_formats
:
2134 yield from self
._check
_formats
(formats
)
2136 def _build_selector_function(selector
):
2137 if isinstance(selector
, list): # ,
2138 fs
= [_build_selector_function(s
) for s
in selector
]
2140 def selector_function(ctx
):
2143 return selector_function
2145 elif selector
.type == GROUP
: # ()
2146 selector_function
= _build_selector_function(selector
.selector
)
2148 elif selector
.type == PICKFIRST
: # /
2149 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2151 def selector_function(ctx
):
2153 picked_formats
= list(f(ctx
))
2155 return picked_formats
2158 elif selector
.type == MERGE
: # +
2159 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2161 def selector_function(ctx
):
2162 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2165 elif selector
.type == SINGLE
: # atom
2166 format_spec
= selector
.selector
or 'best'
2168 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2169 if format_spec
== 'all':
2170 def selector_function(ctx
):
2171 yield from _check_formats(ctx
['formats'][::-1])
2172 elif format_spec
== 'mergeall':
2173 def selector_function(ctx
):
2174 formats
= list(_check_formats(
2175 f
for f
in ctx
['formats'] if f
.get('vcodec') != 'none' or f
.get('acodec') != 'none'))
2178 merged_format
= formats
[-1]
2179 for f
in formats
[-2::-1]:
2180 merged_format
= _merge((merged_format
, f
))
2184 format_fallback
, seperate_fallback
, format_reverse
, format_idx
= False, None, True, 1
2186 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2188 if mobj
is not None:
2189 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2190 format_reverse
= mobj
.group('bw')[0] == 'b'
2191 format_type
= (mobj
.group('type') or [None])[0]
2192 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
2193 format_modified
= mobj
.group('mod') is not None
2195 format_fallback
= not format_type
and not format_modified
# for b, w
2197 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
2198 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2199 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
2200 if format_type
# bv, ba, wv, wa
2201 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2202 if not format_modified
# b, w
2203 else lambda f
: True) # b*, w*
2204 filter_f
= lambda f
: _filter_f(f
) and (
2205 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2207 if format_spec
in self
._format
_selection
_exts
['audio']:
2208 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2209 elif format_spec
in self
._format
_selection
_exts
['video']:
2210 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2211 seperate_fallback
= lambda f
: f
.get('ext') == format_spec
and f
.get('vcodec') != 'none'
2212 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2213 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2215 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2217 def selector_function(ctx
):
2218 formats
= list(ctx
['formats'])
2219 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2221 if format_fallback
and ctx
['incomplete_formats']:
2222 # for extractors with incomplete formats (audio only (soundcloud)
2223 # or video only (imgur)) best/worst will fallback to
2224 # best/worst {video,audio}-only format
2226 elif seperate_fallback
and not ctx
['has_merged_format']:
2227 # for compatibility with youtube-dl when there is no pre-merged format
2228 matches
= list(filter(seperate_fallback
, formats
))
2229 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2231 yield matches
[format_idx
- 1]
2232 except LazyList
.IndexError:
2235 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2237 def final_selector(ctx
):
2238 ctx_copy
= dict(ctx
)
2239 for _filter
in filters
:
2240 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2241 return selector_function(ctx_copy
)
2242 return final_selector
2244 stream
= io
.BytesIO(format_spec
.encode())
2246 tokens
= list(_remove_unused_ops(tokenize
.tokenize(stream
.readline
)))
2247 except tokenize
.TokenError
:
2248 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2250 class TokenIterator
:
2251 def __init__(self
, tokens
):
2252 self
.tokens
= tokens
2259 if self
.counter
>= len(self
.tokens
):
2260 raise StopIteration()
2261 value
= self
.tokens
[self
.counter
]
2267 def restore_last_token(self
):
2270 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2271 return _build_selector_function(parsed_selector
)
2273 def _calc_headers(self
, info_dict
):
2274 res
= merge_headers(self
.params
['http_headers'], info_dict
.get('http_headers') or {})
2276 cookies
= self
._calc
_cookies
(info_dict
['url'])
2278 res
['Cookie'] = cookies
2280 if 'X-Forwarded-For' not in res
:
2281 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2282 if x_forwarded_for_ip
:
2283 res
['X-Forwarded-For'] = x_forwarded_for_ip
2287 def _calc_cookies(self
, url
):
2288 pr
= sanitized_Request(url
)
2289 self
.cookiejar
.add_cookie_header(pr
)
2290 return pr
.get_header('Cookie')
2292 def _sort_thumbnails(self
, thumbnails
):
2293 thumbnails
.sort(key
=lambda t
: (
2294 t
.get('preference') if t
.get('preference') is not None else -1,
2295 t
.get('width') if t
.get('width') is not None else -1,
2296 t
.get('height') if t
.get('height') is not None else -1,
2297 t
.get('id') if t
.get('id') is not None else '',
2300 def _sanitize_thumbnails(self
, info_dict
):
2301 thumbnails
= info_dict
.get('thumbnails')
2302 if thumbnails
is None:
2303 thumbnail
= info_dict
.get('thumbnail')
2305 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2309 def check_thumbnails(thumbnails
):
2310 for t
in thumbnails
:
2311 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2313 self
.urlopen(HEADRequest(t
['url']))
2314 except network_exceptions
as err
:
2315 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2319 self
._sort
_thumbnails
(thumbnails
)
2320 for i
, t
in enumerate(thumbnails
):
2321 if t
.get('id') is None:
2323 if t
.get('width') and t
.get('height'):
2324 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2325 t
['url'] = sanitize_url(t
['url'])
2327 if self
.params
.get('check_formats') is True:
2328 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2330 info_dict
['thumbnails'] = thumbnails
2332 def _fill_common_fields(self
, info_dict
, is_video
=True):
2333 # TODO: move sanitization here
2335 # playlists are allowed to lack "title"
2336 title
= info_dict
.get('title', NO_DEFAULT
)
2337 if title
is NO_DEFAULT
:
2338 raise ExtractorError('Missing "title" field in extractor result',
2339 video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2340 info_dict
['fulltitle'] = title
2343 self
.write_debug('Extractor gave empty title. Creating a generic title')
2345 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2346 info_dict
['title'] = f
'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2348 if info_dict
.get('duration') is not None:
2349 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2351 for ts_key
, date_key
in (
2352 ('timestamp', 'upload_date'),
2353 ('release_timestamp', 'release_date'),
2354 ('modified_timestamp', 'modified_date'),
2356 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2357 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2358 # see http://bugs.python.org/issue1646728)
2359 with contextlib
.suppress(ValueError, OverflowError, OSError):
2360 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2361 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2363 live_keys
= ('is_live', 'was_live')
2364 live_status
= info_dict
.get('live_status')
2365 if live_status
is None:
2366 for key
in live_keys
:
2367 if info_dict
.get(key
) is False:
2369 if info_dict
.get(key
):
2372 if all(info_dict
.get(key
) is False for key
in live_keys
):
2373 live_status
= 'not_live'
2375 info_dict
['live_status'] = live_status
2376 for key
in live_keys
:
2377 if info_dict
.get(key
) is None:
2378 info_dict
[key
] = (live_status
== key
)
2380 # Auto generate title fields corresponding to the *_number fields when missing
2381 # in order to always have clean titles. This is very common for TV series.
2382 for field
in ('chapter', 'season', 'episode'):
2383 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2384 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2386 def _raise_pending_errors(self
, info
):
2387 err
= info
.pop('__pending_error', None)
2389 self
.report_error(err
, tb
=False)
2391 def process_video_result(self
, info_dict
, download
=True):
2392 assert info_dict
.get('_type', 'video') == 'video'
2393 self
._num
_videos
+= 1
2395 if 'id' not in info_dict
:
2396 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2397 elif not info_dict
.get('id'):
2398 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2400 def report_force_conversion(field
, field_not
, conversion
):
2401 self
.report_warning(
2402 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2403 % (field
, field_not
, conversion
))
2405 def sanitize_string_field(info
, string_field
):
2406 field
= info
.get(string_field
)
2407 if field
is None or isinstance(field
, str):
2409 report_force_conversion(string_field
, 'a string', 'string')
2410 info
[string_field
] = str(field
)
2412 def sanitize_numeric_fields(info
):
2413 for numeric_field
in self
._NUMERIC
_FIELDS
:
2414 field
= info
.get(numeric_field
)
2415 if field
is None or isinstance(field
, (int, float)):
2417 report_force_conversion(numeric_field
, 'numeric', 'int')
2418 info
[numeric_field
] = int_or_none(field
)
2420 sanitize_string_field(info_dict
, 'id')
2421 sanitize_numeric_fields(info_dict
)
2422 if info_dict
.get('section_end') and info_dict
.get('section_start') is not None:
2423 info_dict
['duration'] = round(info_dict
['section_end'] - info_dict
['section_start'], 3)
2424 if (info_dict
.get('duration') or 0) <= 0 and info_dict
.pop('duration', None):
2425 self
.report_warning('"duration" field is negative, there is an error in extractor')
2427 chapters
= info_dict
.get('chapters') or []
2428 if chapters
and chapters
[0].get('start_time'):
2429 chapters
.insert(0, {'start_time': 0}
)
2431 dummy_chapter
= {'end_time': 0, 'start_time': info_dict.get('duration')}
2432 for idx
, (prev
, current
, next_
) in enumerate(zip(
2433 (dummy_chapter
, *chapters
), chapters
, (*chapters
[1:], dummy_chapter
)), 1):
2434 if current
.get('start_time') is None:
2435 current
['start_time'] = prev
.get('end_time')
2436 if not current
.get('end_time'):
2437 current
['end_time'] = next_
.get('start_time')
2438 if not current
.get('title'):
2439 current
['title'] = f
'<Untitled Chapter {idx}>'
2441 if 'playlist' not in info_dict
:
2442 # It isn't part of a playlist
2443 info_dict
['playlist'] = None
2444 info_dict
['playlist_index'] = None
2446 self
._sanitize
_thumbnails
(info_dict
)
2448 thumbnail
= info_dict
.get('thumbnail')
2449 thumbnails
= info_dict
.get('thumbnails')
2451 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2453 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2455 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2456 info_dict
['display_id'] = info_dict
['id']
2458 self
._fill
_common
_fields
(info_dict
)
2460 for cc_kind
in ('subtitles', 'automatic_captions'):
2461 cc
= info_dict
.get(cc_kind
)
2463 for _
, subtitle
in cc
.items():
2464 for subtitle_format
in subtitle
:
2465 if subtitle_format
.get('url'):
2466 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2467 if subtitle_format
.get('ext') is None:
2468 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2470 automatic_captions
= info_dict
.get('automatic_captions')
2471 subtitles
= info_dict
.get('subtitles')
2473 info_dict
['requested_subtitles'] = self
.process_subtitles(
2474 info_dict
['id'], subtitles
, automatic_captions
)
2476 if info_dict
.get('formats') is None:
2477 # There's only one format available
2478 formats
= [info_dict
]
2480 formats
= info_dict
['formats']
2482 # or None ensures --clean-infojson removes it
2483 info_dict
['_has_drm'] = any(f
.get('has_drm') for f
in formats
) or None
2484 if not self
.params
.get('allow_unplayable_formats'):
2485 formats
= [f
for f
in formats
if not f
.get('has_drm')]
2486 if info_dict
['_has_drm'] and all(
2487 f
.get('acodec') == f
.get('vcodec') == 'none' for f
in formats
):
2488 self
.report_warning(
2489 'This video is DRM protected and only images are available for download. '
2490 'Use --list-formats to see them')
2492 get_from_start
= not info_dict
.get('is_live') or bool(self
.params
.get('live_from_start'))
2493 if not get_from_start
:
2494 info_dict
['title'] += ' ' + datetime
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2495 if info_dict
.get('is_live') and formats
:
2496 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2497 if get_from_start
and not formats
:
2498 self
.raise_no_formats(info_dict
, msg
=(
2499 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2500 'If you want to download from the current time, use --no-live-from-start'))
2503 self
.raise_no_formats(info_dict
)
2505 def is_wellformed(f
):
2508 self
.report_warning(
2509 '"url" field is missing or empty - skipping format, '
2510 'there is an error in extractor')
2512 if isinstance(url
, bytes):
2513 sanitize_string_field(f
, 'url')
2516 # Filter out malformed formats for better extraction robustness
2517 formats
= list(filter(is_wellformed
, formats
))
2521 # We check that all the formats have the format and format_id fields
2522 for i
, format
in enumerate(formats
):
2523 sanitize_string_field(format
, 'format_id')
2524 sanitize_numeric_fields(format
)
2525 format
['url'] = sanitize_url(format
['url'])
2526 if not format
.get('format_id'):
2527 format
['format_id'] = str(i
)
2529 # Sanitize format_id from characters used in format selector expression
2530 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2531 format_id
= format
['format_id']
2532 if format_id
not in formats_dict
:
2533 formats_dict
[format_id
] = []
2534 formats_dict
[format_id
].append(format
)
2536 # Make sure all formats have unique format_id
2537 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2538 for format_id
, ambiguous_formats
in formats_dict
.items():
2539 ambigious_id
= len(ambiguous_formats
) > 1
2540 for i
, format
in enumerate(ambiguous_formats
):
2542 format
['format_id'] = '%s-%d' % (format_id
, i
)
2543 if format
.get('ext') is None:
2544 format
['ext'] = determine_ext(format
['url']).lower()
2545 # Ensure there is no conflict between id and ext in format selection
2546 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2547 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2548 format
['format_id'] = 'f%s' % format
['format_id']
2550 for i
, format
in enumerate(formats
):
2551 if format
.get('format') is None:
2552 format
['format'] = '{id} - {res}{note}'.format(
2553 id=format
['format_id'],
2554 res
=self
.format_resolution(format
),
2555 note
=format_field(format
, 'format_note', ' (%s)'),
2557 if format
.get('protocol') is None:
2558 format
['protocol'] = determine_protocol(format
)
2559 if format
.get('resolution') is None:
2560 format
['resolution'] = self
.format_resolution(format
, default
=None)
2561 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2562 format
['dynamic_range'] = 'SDR'
2563 if (info_dict
.get('duration') and format
.get('tbr')
2564 and not format
.get('filesize') and not format
.get('filesize_approx')):
2565 format
['filesize_approx'] = int(info_dict
['duration'] * format
['tbr'] * (1024 / 8))
2567 # Add HTTP headers, so that external programs can use them from the
2569 full_format_info
= info_dict
.copy()
2570 full_format_info
.update(format
)
2571 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
2572 # Remove private housekeeping stuff
2573 if '__x_forwarded_for_ip' in info_dict
:
2574 del info_dict
['__x_forwarded_for_ip']
2576 if self
.params
.get('check_formats') is True:
2577 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2579 if not formats
or formats
[0] is not info_dict
:
2580 # only set the 'formats' fields if the original info_dict list them
2581 # otherwise we end up with a circular reference, the first (and unique)
2582 # element in the 'formats' field in info_dict is info_dict itself,
2583 # which can't be exported to json
2584 info_dict
['formats'] = formats
2586 info_dict
, _
= self
.pre_process(info_dict
)
2588 if self
._match
_entry
(info_dict
, incomplete
=self
._format
_fields
) is not None:
2591 self
.post_extract(info_dict
)
2592 info_dict
, _
= self
.pre_process(info_dict
, 'after_filter')
2594 # The pre-processors may have modified the formats
2595 formats
= info_dict
.get('formats', [info_dict
])
2597 list_only
= self
.params
.get('simulate') is None and (
2598 self
.params
.get('list_thumbnails') or self
.params
.get('listformats') or self
.params
.get('listsubtitles'))
2599 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2600 if self
.params
.get('list_thumbnails'):
2601 self
.list_thumbnails(info_dict
)
2602 if self
.params
.get('listsubtitles'):
2603 if 'automatic_captions' in info_dict
:
2604 self
.list_subtitles(
2605 info_dict
['id'], automatic_captions
, 'automatic captions')
2606 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2607 if self
.params
.get('listformats') or interactive_format_selection
:
2608 self
.list_formats(info_dict
)
2610 # Without this printing, -F --print-json will not work
2611 self
.__forced
_printings
(info_dict
, self
.prepare_filename(info_dict
), incomplete
=True)
2614 format_selector
= self
.format_selector
2615 if format_selector
is None:
2616 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2617 self
.write_debug('Default format spec: %s' % req_format
)
2618 format_selector
= self
.build_format_selector(req_format
)
2621 if interactive_format_selection
:
2623 self
._format
_screen
('\nEnter format selector: ', self
.Styles
.EMPHASIS
))
2625 format_selector
= self
.build_format_selector(req_format
)
2626 except SyntaxError as err
:
2627 self
.report_error(err
, tb
=False, is_error
=False)
2630 formats_to_download
= list(format_selector({
2632 'has_merged_format': any('none' not in (f
.get('acodec'), f
.get('vcodec')) for f
in formats
),
2633 'incomplete_formats': (
2634 # All formats are video-only or
2635 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
2636 # all formats are audio-only
2637 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
)),
2639 if interactive_format_selection
and not formats_to_download
:
2640 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2644 if not formats_to_download
:
2645 if not self
.params
.get('ignore_no_formats_error'):
2646 raise ExtractorError(
2647 'Requested format is not available. Use --list-formats for a list of available formats',
2648 expected
=True, video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2649 self
.report_warning('Requested format is not available')
2650 # Process what we can, even without any available formats.
2651 formats_to_download
= [{}]
2653 requested_ranges
= self
.params
.get('download_ranges')
2654 if requested_ranges
:
2655 requested_ranges
= tuple(requested_ranges(info_dict
, self
))
2657 best_format
, downloaded_formats
= formats_to_download
[-1], []
2660 def to_screen(*msg
):
2661 self
.to_screen(f
'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2663 to_screen(f
'Downloading {len(formats_to_download)} format(s):',
2664 (f
['format_id'] for f
in formats_to_download
))
2665 if requested_ranges
:
2666 to_screen(f
'Downloading {len(requested_ranges)} time ranges:',
2667 (f
'{int(c["start_time"])}-{int(c["end_time"])}' for c
in requested_ranges
))
2668 max_downloads_reached
= False
2670 for fmt
, chapter
in itertools
.product(formats_to_download
, requested_ranges
or [{}]):
2671 new_info
= self
._copy
_infodict
(info_dict
)
2672 new_info
.update(fmt
)
2673 offset
, duration
= info_dict
.get('section_start') or 0, info_dict
.get('duration') or float('inf')
2674 if chapter
or offset
:
2676 'section_start': offset
+ chapter
.get('start_time', 0),
2677 'section_end': offset
+ min(chapter
.get('end_time', duration
), duration
),
2678 'section_title': chapter
.get('title'),
2679 'section_number': chapter
.get('index'),
2681 downloaded_formats
.append(new_info
)
2683 self
.process_info(new_info
)
2684 except MaxDownloadsReached
:
2685 max_downloads_reached
= True
2686 self
._raise
_pending
_errors
(new_info
)
2687 # Remove copied info
2688 for key
, val
in tuple(new_info
.items()):
2689 if info_dict
.get(key
) == val
:
2691 if max_downloads_reached
:
2694 write_archive
= {f.get('__write_download_archive', False) for f in downloaded_formats}
2695 assert write_archive
.issubset({True, False, 'ignore'}
)
2696 if True in write_archive
and False not in write_archive
:
2697 self
.record_download_archive(info_dict
)
2699 info_dict
['requested_downloads'] = downloaded_formats
2700 info_dict
= self
.run_all_pps('after_video', info_dict
)
2701 if max_downloads_reached
:
2702 raise MaxDownloadsReached()
2704 # We update the info dict with the selected best quality format (backwards compatibility)
2705 info_dict
.update(best_format
)
2708 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2709 """Select the requested subtitles and their format"""
2710 available_subs
, normal_sub_langs
= {}, []
2711 if normal_subtitles
and self
.params
.get('writesubtitles'):
2712 available_subs
.update(normal_subtitles
)
2713 normal_sub_langs
= tuple(normal_subtitles
.keys())
2714 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2715 for lang
, cap_info
in automatic_captions
.items():
2716 if lang
not in available_subs
:
2717 available_subs
[lang
] = cap_info
2719 if (not self
.params
.get('writesubtitles') and not
2720 self
.params
.get('writeautomaticsub') or not
2724 all_sub_langs
= tuple(available_subs
.keys())
2725 if self
.params
.get('allsubtitles', False):
2726 requested_langs
= all_sub_langs
2727 elif self
.params
.get('subtitleslangs', False):
2728 # A list is used so that the order of languages will be the same as
2729 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2730 requested_langs
= []
2731 for lang_re
in self
.params
.get('subtitleslangs'):
2732 discard
= lang_re
[0] == '-'
2734 lang_re
= lang_re
[1:]
2735 if lang_re
== 'all':
2737 requested_langs
= []
2739 requested_langs
.extend(all_sub_langs
)
2741 current_langs
= filter(re
.compile(lang_re
+ '$').match
, all_sub_langs
)
2743 for lang
in current_langs
:
2744 while lang
in requested_langs
:
2745 requested_langs
.remove(lang
)
2747 requested_langs
.extend(current_langs
)
2748 requested_langs
= orderedSet(requested_langs
)
2749 elif normal_sub_langs
:
2750 requested_langs
= ['en'] if 'en' in normal_sub_langs
else normal_sub_langs
[:1]
2752 requested_langs
= ['en'] if 'en' in all_sub_langs
else all_sub_langs
[:1]
2754 self
.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs
))
2756 formats_query
= self
.params
.get('subtitlesformat', 'best')
2757 formats_preference
= formats_query
.split('/') if formats_query
else []
2759 for lang
in requested_langs
:
2760 formats
= available_subs
.get(lang
)
2762 self
.report_warning(f
'{lang} subtitles not available for {video_id}')
2764 for ext
in formats_preference
:
2768 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2774 self
.report_warning(
2775 'No subtitle format found matching "%s" for language %s, '
2776 'using %s' % (formats_query
, lang
, f
['ext']))
2780 def _forceprint(self
, key
, info_dict
):
2781 if info_dict
is None:
2783 info_copy
= info_dict
.copy()
2784 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
2785 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
2786 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
2787 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
2789 def format_tmpl(tmpl
):
2790 mobj
= re
.match(r
'\w+(=?)$', tmpl
)
2791 if mobj
and mobj
.group(1):
2792 return f
'{tmpl[:-1]} = %({tmpl[:-1]})r'
2794 return f
'%({tmpl})s'
2797 for tmpl
in self
.params
['forceprint'].get(key
, []):
2798 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
2800 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
2801 filename
= self
.prepare_filename(info_dict
, outtmpl
=file_tmpl
)
2802 tmpl
= format_tmpl(tmpl
)
2803 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
2804 if self
._ensure
_dir
_exists
(filename
):
2805 with open(filename
, 'a', encoding
='utf-8') as f
:
2806 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + '\n')
2808 def __forced_printings(self
, info_dict
, filename
, incomplete
):
2809 def print_mandatory(field
, actual_field
=None):
2810 if actual_field
is None:
2811 actual_field
= field
2812 if (self
.params
.get('force%s' % field
, False)
2813 and (not incomplete
or info_dict
.get(actual_field
) is not None)):
2814 self
.to_stdout(info_dict
[actual_field
])
2816 def print_optional(field
):
2817 if (self
.params
.get('force%s' % field
, False)
2818 and info_dict
.get(field
) is not None):
2819 self
.to_stdout(info_dict
[field
])
2821 info_dict
= info_dict
.copy()
2822 if filename
is not None:
2823 info_dict
['filename'] = filename
2824 if info_dict
.get('requested_formats') is not None:
2825 # For RTMP URLs, also include the playpath
2826 info_dict
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2827 elif info_dict
.get('url'):
2828 info_dict
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
2830 if (self
.params
.get('forcejson')
2831 or self
.params
['forceprint'].get('video')
2832 or self
.params
['print_to_file'].get('video')):
2833 self
.post_extract(info_dict
)
2834 self
._forceprint
('video', info_dict
)
2836 print_mandatory('title')
2837 print_mandatory('id')
2838 print_mandatory('url', 'urls')
2839 print_optional('thumbnail')
2840 print_optional('description')
2841 print_optional('filename')
2842 if self
.params
.get('forceduration') and info_dict
.get('duration') is not None:
2843 self
.to_stdout(formatSeconds(info_dict
['duration']))
2844 print_mandatory('format')
2846 if self
.params
.get('forcejson'):
2847 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
2849 def dl(self
, name
, info
, subtitle
=False, test
=False):
2850 if not info
.get('url'):
2851 self
.raise_no_formats(info
, True)
2854 verbose
= self
.params
.get('verbose')
2857 'quiet': self
.params
.get('quiet') or not verbose
,
2859 'noprogress': not verbose
,
2861 'skip_unavailable_fragments': False,
2862 'keep_fragments': False,
2864 '_no_ytdl_file': True,
2867 params
= self
.params
2868 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
2870 for ph
in self
._progress
_hooks
:
2871 fd
.add_progress_hook(ph
)
2873 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
2874 for f
in info
.get('requested_formats', []) or [info
])
2875 self
.write_debug(f
'Invoking {fd.FD_NAME} downloader on "{urls}"')
2877 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2878 # But it may contain objects that are not deep-copyable
2879 new_info
= self
._copy
_infodict
(info
)
2880 if new_info
.get('http_headers') is None:
2881 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
2882 return fd
.download(name
, new_info
, subtitle
)
2884 def existing_file(self
, filepaths
, *, default_overwrite
=True):
2885 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
2886 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
2887 return existing_files
[0]
2889 for file in existing_files
:
2890 self
.report_file_delete(file)
2894 def process_info(self
, info_dict
):
2895 """Process a single resolved IE result. (Modifies it in-place)"""
2897 assert info_dict
.get('_type', 'video') == 'video'
2898 original_infodict
= info_dict
2900 if 'format' not in info_dict
and 'ext' in info_dict
:
2901 info_dict
['format'] = info_dict
['ext']
2903 # This is mostly just for backward compatibility of process_info
2904 # As a side-effect, this allows for format-specific filters
2905 if self
._match
_entry
(info_dict
) is not None:
2906 info_dict
['__write_download_archive'] = 'ignore'
2909 # Does nothing under normal operation - for backward compatibility of process_info
2910 self
.post_extract(info_dict
)
2911 self
._num
_downloads
+= 1
2913 # info_dict['_filename'] needs to be set for backward compatibility
2914 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2915 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2919 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
2921 def check_max_downloads():
2922 if self
._num
_downloads
>= float(self
.params
.get('max_downloads') or 'inf'):
2923 raise MaxDownloadsReached()
2925 if self
.params
.get('simulate'):
2926 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
2927 check_max_downloads()
2930 if full_filename
is None:
2932 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
2934 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
2937 if self
._write
_description
('video', info_dict
,
2938 self
.prepare_filename(info_dict
, 'description')) is None:
2941 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
2942 if sub_files
is None:
2944 files_to_move
.update(dict(sub_files
))
2946 thumb_files
= self
._write
_thumbnails
(
2947 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
2948 if thumb_files
is None:
2950 files_to_move
.update(dict(thumb_files
))
2952 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2953 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
2954 if _infojson_written
:
2955 info_dict
['infojson_filename'] = infofn
2956 # For backward compatibility, even though it was a private field
2957 info_dict
['__infojson_filename'] = infofn
2958 elif _infojson_written
is None:
2961 # Note: Annotations are deprecated
2963 if self
.params
.get('writeannotations', False):
2964 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2966 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
2968 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2969 self
.to_screen('[info] Video annotations are already present')
2970 elif not info_dict
.get('annotations'):
2971 self
.report_warning('There are no annotations to write.')
2974 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2975 with open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2976 annofile
.write(info_dict
['annotations'])
2977 except (KeyError, TypeError):
2978 self
.report_warning('There are no annotations to write.')
2980 self
.report_error('Cannot write annotations file: ' + annofn
)
2983 # Write internet shortcut files
2984 def _write_link_file(link_type
):
2985 url
= try_get(info_dict
['webpage_url'], iri_to_uri
)
2987 self
.report_warning(
2988 f
'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2990 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
2991 if not self
._ensure
_dir
_exists
(encodeFilename(linkfn
)):
2993 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2994 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
2997 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2998 with open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8',
2999 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
3000 template_vars
= {'url': url}
3001 if link_type
== 'desktop':
3002 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
3003 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
3005 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
3010 'url': self
.params
.get('writeurllink'),
3011 'webloc': self
.params
.get('writewebloclink'),
3012 'desktop': self
.params
.get('writedesktoplink'),
3014 if self
.params
.get('writelink'):
3015 link_type
= ('webloc' if sys
.platform
== 'darwin'
3016 else 'desktop' if sys
.platform
.startswith('linux')
3018 write_links
[link_type
] = True
3020 if any(should_write
and not _write_link_file(link_type
)
3021 for link_type
, should_write
in write_links
.items()):
3024 def replace_info_dict(new_info
):
3026 if new_info
== info_dict
:
3029 info_dict
.update(new_info
)
3031 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
3032 replace_info_dict(new_info
)
3034 if self
.params
.get('skip_download'):
3035 info_dict
['filepath'] = temp_filename
3036 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3037 info_dict
['__files_to_move'] = files_to_move
3038 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
3039 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3042 info_dict
.setdefault('__postprocessors', [])
3045 def existing_video_file(*filepaths
):
3046 ext
= info_dict
.get('ext')
3047 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
3048 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
3049 default_overwrite
=False)
3051 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
3054 fd
, success
= None, True
3055 if info_dict
.get('protocol') or info_dict
.get('url'):
3056 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3057 if fd
is not FFmpegFD
and (
3058 info_dict
.get('section_start') or info_dict
.get('section_end')):
3059 msg
= ('This format cannot be partially downloaded' if FFmpegFD
.available()
3060 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3061 self
.report_error(f
'{msg}. Aborting')
3064 if info_dict
.get('requested_formats') is not None:
3066 def compatible_formats(formats
):
3067 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3068 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
3069 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
3070 if len(video_formats
) > 2 or len(audio_formats
) > 2:
3074 exts
= {format.get('ext') for format in formats}
3076 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'}
,
3079 for ext_sets
in COMPATIBLE_EXTS
:
3080 if ext_sets
.issuperset(exts
):
3082 # TODO: Check acodec/vcodec
3085 requested_formats
= info_dict
['requested_formats']
3086 old_ext
= info_dict
['ext']
3087 if self
.params
.get('merge_output_format') is None:
3088 if not compatible_formats(requested_formats
):
3089 info_dict
['ext'] = 'mkv'
3090 self
.report_warning(
3091 'Requested formats are incompatible for merge and will be merged into mkv')
3092 if (info_dict
['ext'] == 'webm'
3093 and info_dict
.get('thumbnails')
3094 # check with type instead of pp_key, __name__, or isinstance
3095 # since we dont want any custom PPs to trigger this
3096 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])): # noqa: E721
3097 info_dict
['ext'] = 'mkv'
3098 self
.report_warning(
3099 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3100 new_ext
= info_dict
['ext']
3102 def correct_ext(filename
, ext
=new_ext
):
3105 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3107 os
.path
.splitext(filename
)[0]
3108 if filename_real_ext
in (old_ext
, new_ext
)
3110 return f
'{filename_wo_ext}.{ext}'
3112 # Ensure filename always has a correct extension for successful merge
3113 full_filename
= correct_ext(full_filename
)
3114 temp_filename
= correct_ext(temp_filename
)
3115 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3116 info_dict
['__real_download'] = False
3118 merger
= FFmpegMergerPP(self
)
3120 if dl_filename
is not None:
3121 self
.report_file_already_downloaded(dl_filename
)
3123 for f
in requested_formats
if fd
!= FFmpegFD
else []:
3124 f
['filepath'] = fname
= prepend_extension(
3125 correct_ext(temp_filename
, info_dict
['ext']),
3126 'f%s' % f
['format_id'], info_dict
['ext'])
3127 downloaded
.append(fname
)
3128 info_dict
['url'] = '\n'.join(f
['url'] for f
in requested_formats
)
3129 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3130 info_dict
['__real_download'] = real_download
3132 if self
.params
.get('allow_unplayable_formats'):
3133 self
.report_warning(
3134 'You have requested merging of multiple formats '
3135 'while also allowing unplayable formats to be downloaded. '
3136 'The formats won\'t be merged to prevent data corruption.')
3137 elif not merger
.available
:
3138 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3139 if not self
.params
.get('ignoreerrors'):
3140 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3142 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3144 if temp_filename
== '-':
3145 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3146 else 'but the formats are incompatible for simultaneous download' if merger
.available
3147 else 'but ffmpeg is not installed')
3148 self
.report_warning(
3149 f
'You have requested downloading multiple formats to stdout {reason}. '
3150 'The formats will be streamed one after the other')
3151 fname
= temp_filename
3152 for f
in requested_formats
:
3153 new_info
= dict(info_dict
)
3154 del new_info
['requested_formats']
3156 if temp_filename
!= '-':
3157 fname
= prepend_extension(
3158 correct_ext(temp_filename
, new_info
['ext']),
3159 'f%s' % f
['format_id'], new_info
['ext'])
3160 if not self
._ensure
_dir
_exists
(fname
):
3162 f
['filepath'] = fname
3163 downloaded
.append(fname
)
3164 partial_success
, real_download
= self
.dl(fname
, new_info
)
3165 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3166 success
= success
and partial_success
3168 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3169 info_dict
['__postprocessors'].append(merger
)
3170 info_dict
['__files_to_merge'] = downloaded
3171 # Even if there were no downloads, it is being merged only now
3172 info_dict
['__real_download'] = True
3174 for file in downloaded
:
3175 files_to_move
[file] = None
3177 # Just a single file
3178 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3179 if dl_filename
is None or dl_filename
== temp_filename
:
3180 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3181 # So we should try to resume the download
3182 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3183 info_dict
['__real_download'] = real_download
3185 self
.report_file_already_downloaded(dl_filename
)
3187 dl_filename
= dl_filename
or temp_filename
3188 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3190 except network_exceptions
as err
:
3191 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
3193 except OSError as err
:
3194 raise UnavailableVideoError(err
)
3195 except (ContentTooShortError
, ) as err
:
3196 self
.report_error(f
'content too short (expected {err.expected} bytes and served {err.downloaded})')
3199 self
._raise
_pending
_errors
(info_dict
)
3200 if success
and full_filename
!= '-':
3204 fixup_policy
= self
.params
.get('fixup')
3205 vid
= info_dict
['id']
3207 if fixup_policy
in ('ignore', 'never'):
3209 elif fixup_policy
== 'warn':
3211 elif fixup_policy
!= 'force':
3212 assert fixup_policy
in ('detect_or_warn', None)
3213 if not info_dict
.get('__real_download'):
3216 def ffmpeg_fixup(cndn
, msg
, cls
):
3217 if not (do_fixup
and cndn
):
3219 elif do_fixup
== 'warn':
3220 self
.report_warning(f
'{vid}: {msg}')
3224 info_dict
['__postprocessors'].append(pp
)
3226 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3228 stretched_ratio
= info_dict
.get('stretched_ratio')
3229 ffmpeg_fixup(stretched_ratio
not in (1, None),
3230 f
'Non-uniform pixel ratio {stretched_ratio}',
3231 FFmpegFixupStretchedPP
)
3233 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3234 downloader
= downloader
.FD_NAME
if downloader
else None
3236 ext
= info_dict
.get('ext')
3237 postprocessed_by_ffmpeg
= info_dict
.get('requested_formats') or any((
3238 isinstance(pp
, FFmpegVideoConvertorPP
)
3239 and resolve_recode_mapping(ext
, pp
.mapping
)[0] not in (ext
, None)
3240 ) for pp
in self
._pps
['post_process'])
3242 if not postprocessed_by_ffmpeg
:
3243 ffmpeg_fixup(ext
== 'm4a' and info_dict
.get('container') == 'm4a_dash',
3244 'writing DASH m4a. Only some players support this container',
3246 ffmpeg_fixup(downloader
== 'hlsnative' and not self
.params
.get('hls_use_mpegts')
3247 or info_dict
.get('is_live') and self
.params
.get('hls_use_mpegts') is None,
3248 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3250 ffmpeg_fixup(info_dict
.get('is_live') and downloader
== 'DashSegmentsFD',
3251 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3253 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3254 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP
)
3258 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3259 except PostProcessingError
as err
:
3260 self
.report_error('Postprocessing: %s' % str(err
))
3263 for ph
in self
._post
_hooks
:
3264 ph(info_dict
['filepath'])
3265 except Exception as err
:
3266 self
.report_error('post hooks: %s' % str(err
))
3268 info_dict
['__write_download_archive'] = True
3270 assert info_dict
is original_infodict
# Make sure the info_dict was modified in-place
3271 if self
.params
.get('force_write_download_archive'):
3272 info_dict
['__write_download_archive'] = True
3273 check_max_downloads()
3275 def __download_wrapper(self
, func
):
3276 @functools.wraps(func
)
3277 def wrapper(*args
, **kwargs
):
3279 res
= func(*args
, **kwargs
)
3280 except UnavailableVideoError
as e
:
3281 self
.report_error(e
)
3282 except DownloadCancelled
as e
:
3283 self
.to_screen(f
'[info] {e}')
3284 if not self
.params
.get('break_per_url'):
3287 if self
.params
.get('dump_single_json', False):
3288 self
.post_extract(res
)
3289 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3292 def download(self
, url_list
):
3293 """Download a given list of URLs."""
3294 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3295 outtmpl
= self
.params
['outtmpl']['default']
3296 if (len(url_list
) > 1
3298 and '%' not in outtmpl
3299 and self
.params
.get('max_downloads') != 1):
3300 raise SameFileError(outtmpl
)
3302 for url
in url_list
:
3303 self
.__download
_wrapper
(self
.extract_info
)(
3304 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3306 return self
._download
_retcode
3308 def download_with_info_file(self
, info_filename
):
3309 with contextlib
.closing(fileinput
.FileInput(
3310 [info_filename
], mode
='r',
3311 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3312 # FileInput doesn't have a read method, we can't call json.load
3313 info
= self
.sanitize_info(json
.loads('\n'.join(f
)), self
.params
.get('clean_infojson', True))
3315 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3316 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3317 if not isinstance(e
, EntryNotInPlaylist
):
3318 self
.to_stderr('\r')
3319 webpage_url
= info
.get('webpage_url')
3320 if webpage_url
is not None:
3321 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3322 return self
.download([webpage_url
])
3325 return self
._download
_retcode
3328 def sanitize_info(info_dict
, remove_private_keys
=False):
3329 ''' Sanitize the infodict for converting to json '''
3330 if info_dict
is None:
3332 info_dict
.setdefault('epoch', int(time
.time()))
3333 info_dict
.setdefault('_type', 'video')
3335 if remove_private_keys
:
3336 reject
= lambda k
, v
: v
is None or k
.startswith('__') or k
in {
3337 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3338 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3341 reject
= lambda k
, v
: False
3344 if isinstance(obj
, dict):
3345 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3346 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3347 return list(map(filter_fn
, obj
))
3348 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3353 return filter_fn(info_dict
)
3356 def filter_requested_info(info_dict
, actually_filter
=True):
3357 ''' Alias of sanitize_info for backward compatibility '''
3358 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3360 def _delete_downloaded_files(self
, *files_to_delete
, info
={}, msg
=None):
3361 for filename
in set(filter(None, files_to_delete
)):
3363 self
.to_screen(msg
% filename
)
3367 self
.report_warning(f
'Unable to delete file {filename}')
3368 if filename
in info
.get('__files_to_move', []): # NB: Delete even if None
3369 del info
['__files_to_move'][filename
]
3372 def post_extract(info_dict
):
3373 def actual_post_extract(info_dict
):
3374 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3375 for video_dict
in info_dict
.get('entries', {}):
3376 actual_post_extract(video_dict
or {})
3379 post_extractor
= info_dict
.pop('__post_extractor', None) or (lambda: {})
3380 info_dict
.update(post_extractor())
3382 actual_post_extract(info_dict
or {})
3384 def run_pp(self
, pp
, infodict
):
3385 files_to_delete
= []
3386 if '__files_to_move' not in infodict
:
3387 infodict
['__files_to_move'] = {}
3389 files_to_delete
, infodict
= pp
.run(infodict
)
3390 except PostProcessingError
as e
:
3391 # Must be True and not 'only_download'
3392 if self
.params
.get('ignoreerrors') is True:
3393 self
.report_error(e
)
3397 if not files_to_delete
:
3399 if self
.params
.get('keepvideo', False):
3400 for f
in files_to_delete
:
3401 infodict
['__files_to_move'].setdefault(f
, '')
3403 self
._delete
_downloaded
_files
(
3404 *files_to_delete
, info
=infodict
, msg
='Deleting original file %s (pass -k to keep)')
3407 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3408 self
._forceprint
(key
, info
)
3409 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3410 info
= self
.run_pp(pp
, info
)
3413 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3414 info
= dict(ie_info
)
3415 info
['__files_to_move'] = files_to_move
or {}
3417 info
= self
.run_all_pps(key
, info
)
3418 except PostProcessingError
as err
:
3419 msg
= f
'Preprocessing: {err}'
3420 info
.setdefault('__pending_error', msg
)
3421 self
.report_error(msg
, is_error
=False)
3422 return info
, info
.pop('__files_to_move', None)
3424 def post_process(self
, filename
, info
, files_to_move
=None):
3425 """Run all the postprocessors on the given file."""
3426 info
['filepath'] = filename
3427 info
['__files_to_move'] = files_to_move
or {}
3428 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3429 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3430 del info
['__files_to_move']
3431 return self
.run_all_pps('after_move', info
)
3433 def _make_archive_id(self
, info_dict
):
3434 video_id
= info_dict
.get('id')
3437 # Future-proof against any change in case
3438 # and backwards compatibility with prior versions
3439 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3440 if extractor
is None:
3441 url
= str_or_none(info_dict
.get('url'))
3444 # Try to find matching extractor for the URL and take its ie_key
3445 for ie_key
, ie
in self
._ies
.items():
3446 if ie
.suitable(url
):
3451 return f
'{extractor.lower()} {video_id}'
3453 def in_download_archive(self
, info_dict
):
3454 fn
= self
.params
.get('download_archive')
3458 vid_id
= self
._make
_archive
_id
(info_dict
)
3460 return False # Incomplete video information
3462 return vid_id
in self
.archive
3464 def record_download_archive(self
, info_dict
):
3465 fn
= self
.params
.get('download_archive')
3468 vid_id
= self
._make
_archive
_id
(info_dict
)
3470 self
.write_debug(f
'Adding to archive: {vid_id}')
3471 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3472 archive_file
.write(vid_id
+ '\n')
3473 self
.archive
.add(vid_id
)
3476 def format_resolution(format
, default
='unknown'):
3477 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3479 if format
.get('resolution') is not None:
3480 return format
['resolution']
3481 if format
.get('width') and format
.get('height'):
3482 return '%dx%d' % (format
['width'], format
['height'])
3483 elif format
.get('height'):
3484 return '%sp' % format
['height']
3485 elif format
.get('width'):
3486 return '%dx?' % format
['width']
3489 def _list_format_headers(self
, *headers
):
3490 if self
.params
.get('listformats_table', True) is not False:
3491 return [self
._format
_out
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3494 def _format_note(self
, fdict
):
3496 if fdict
.get('ext') in ['f4f', 'f4m']:
3497 res
+= '(unsupported)'
3498 if fdict
.get('language'):
3501 res
+= '[%s]' % fdict
['language']
3502 if fdict
.get('format_note') is not None:
3505 res
+= fdict
['format_note']
3506 if fdict
.get('tbr') is not None:
3509 res
+= '%4dk' % fdict
['tbr']
3510 if fdict
.get('container') is not None:
3513 res
+= '%s container' % fdict
['container']
3514 if (fdict
.get('vcodec') is not None
3515 and fdict
.get('vcodec') != 'none'):
3518 res
+= fdict
['vcodec']
3519 if fdict
.get('vbr') is not None:
3521 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3523 if fdict
.get('vbr') is not None:
3524 res
+= '%4dk' % fdict
['vbr']
3525 if fdict
.get('fps') is not None:
3528 res
+= '%sfps' % fdict
['fps']
3529 if fdict
.get('acodec') is not None:
3532 if fdict
['acodec'] == 'none':
3535 res
+= '%-5s' % fdict
['acodec']
3536 elif fdict
.get('abr') is not None:
3540 if fdict
.get('abr') is not None:
3541 res
+= '@%3dk' % fdict
['abr']
3542 if fdict
.get('asr') is not None:
3543 res
+= ' (%5dHz)' % fdict
['asr']
3544 if fdict
.get('filesize') is not None:
3547 res
+= format_bytes(fdict
['filesize'])
3548 elif fdict
.get('filesize_approx') is not None:
3551 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3554 def render_formats_table(self
, info_dict
):
3555 if not info_dict
.get('formats') and not info_dict
.get('url'):
3558 formats
= info_dict
.get('formats', [info_dict
])
3559 if not self
.params
.get('listformats_table', True) is not False:
3562 format_field(f
, 'format_id'),
3563 format_field(f
, 'ext'),
3564 self
.format_resolution(f
),
3565 self
._format
_note
(f
)
3566 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3567 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3569 def simplified_codec(f
, field
):
3570 assert field
in ('acodec', 'vcodec')
3571 codec
= f
.get(field
, 'unknown')
3574 elif codec
!= 'none':
3575 return '.'.join(codec
.split('.')[:4])
3577 if field
== 'vcodec' and f
.get('acodec') == 'none':
3579 elif field
== 'acodec' and f
.get('vcodec') == 'none':
3581 return self
._format
_out
('audio only' if field
== 'vcodec' else 'video only',
3582 self
.Styles
.SUPPRESS
)
3584 delim
= self
._format
_out
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3587 self
._format
_out
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3588 format_field(f
, 'ext'),
3589 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3590 format_field(f
, 'fps', '\t%d', func
=round),
3591 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3593 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~\t%s', func
=format_bytes
),
3594 format_field(f
, 'tbr', '\t%dk', func
=round),
3595 shorten_protocol_name(f
.get('protocol', '')),
3597 simplified_codec(f
, 'vcodec'),
3598 format_field(f
, 'vbr', '\t%dk', func
=round),
3599 simplified_codec(f
, 'acodec'),
3600 format_field(f
, 'abr', '\t%dk', func
=round),
3601 format_field(f
, 'asr', '\t%s', func
=format_decimal_suffix
),
3603 self
._format
_out
('UNSUPPORTED', 'light red') if f
.get('ext') in ('f4f', 'f4m') else None,
3604 format_field(f
, 'language', '[%s]'),
3605 join_nonempty(format_field(f
, 'format_note'),
3606 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3609 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3610 header_line
= self
._list
_format
_headers
(
3611 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3612 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3614 return render_table(
3615 header_line
, table
, hide_empty
=True,
3616 delim
=self
._format
_out
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3618 def render_thumbnails_table(self
, info_dict
):
3619 thumbnails
= list(info_dict
.get('thumbnails') or [])
3622 return render_table(
3623 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3624 [[t
.get('id'), t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
])
3626 def render_subtitles_table(self
, video_id
, subtitles
):
3627 def _row(lang
, formats
):
3628 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3629 if len(set(names
)) == 1:
3630 names
= [] if names
[0] == 'unknown' else names
[:1]
3631 return [lang
, ', '.join(names
), ', '.join(exts
)]
3635 return render_table(
3636 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3637 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3640 def __list_table(self
, video_id
, name
, func
, *args
):
3643 self
.to_screen(f
'{video_id} has no {name}')
3645 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3646 self
.to_stdout(table
)
3648 def list_formats(self
, info_dict
):
3649 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3651 def list_thumbnails(self
, info_dict
):
3652 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3654 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3655 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3657 def urlopen(self
, req
):
3658 """ Start an HTTP download """
3659 if isinstance(req
, str):
3660 req
= sanitized_Request(req
)
3661 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
3663 def print_debug_header(self
):
3664 if not self
.params
.get('verbose'):
3667 # These imports can be slow. So import them only as needed
3668 from .extractor
.extractors
import _LAZY_LOADER
3669 from .extractor
.extractors
import _PLUGIN_CLASSES
as plugin_extractors
3671 def get_encoding(stream
):
3672 ret
= str(getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__))
3673 if not supports_terminal_sequences(stream
):
3674 from .utils
import WINDOWS_VT_MODE
# Must be imported locally
3675 ret
+= ' (No VT)' if WINDOWS_VT_MODE
is False else ' (No ANSI)'
3678 encoding_str
= 'Encodings: locale %s, fs %s, pref %s, %s' % (
3679 locale
.getpreferredencoding(),
3680 sys
.getfilesystemencoding(),
3681 self
.get_encoding(),
3683 f
'{key} {get_encoding(stream)}' for key
, stream
in self
._out
_files
.items_
3684 if stream
is not None and key
!= 'console')
3687 logger
= self
.params
.get('logger')
3689 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3690 write_debug(encoding_str
)
3692 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
3693 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3695 source
= detect_variant()
3696 write_debug(join_nonempty(
3697 'yt-dlp version', __version__
,
3698 f
'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD
else '',
3699 '' if source
== 'unknown' else f
'({source})',
3701 if not _LAZY_LOADER
:
3702 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3703 write_debug('Lazy loading extractors is forcibly disabled')
3705 write_debug('Lazy loading extractors is disabled')
3706 if plugin_extractors
or plugin_postprocessors
:
3707 write_debug('Plugins: %s' % [
3708 '%s%s' % (klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3709 for name
, klass
in itertools
.chain(plugin_extractors
.items(), plugin_postprocessors
.items())])
3710 if self
.params
['compat_opts']:
3711 write_debug('Compatibility options: %s' % ', '.join(self
.params
['compat_opts']))
3713 if source
== 'source':
3715 stdout
, _
, _
= Popen
.run(
3716 ['git', 'rev-parse', '--short', 'HEAD'],
3717 text
=True, cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)),
3718 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
3719 if re
.fullmatch('[0-9a-f]+', stdout
.strip()):
3720 write_debug(f
'Git HEAD: {stdout.strip()}')
3722 with contextlib
.suppress(Exception):
3725 write_debug(system_identifier())
3727 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
3728 ffmpeg_features
= {key for key, val in ffmpeg_features.items() if val}
3730 exe_versions
['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features
))
3732 exe_versions
['rtmpdump'] = rtmpdump_version()
3733 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3734 exe_str
= ', '.join(
3735 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3737 write_debug('exe versions: %s' % exe_str
)
3739 from .compat
.compat_utils
import get_package_info
3740 from .dependencies
import available_dependencies
3742 write_debug('Optional libraries: %s' % (', '.join(sorted({
3743 join_nonempty(*get_package_info(m
)) for m
in available_dependencies
.values()
3746 self
._setup
_opener
()
3748 for handler
in self
._opener
.handlers
:
3749 if hasattr(handler
, 'proxies'):
3750 proxy_map
.update(handler
.proxies
)
3751 write_debug(f
'Proxy map: {proxy_map}')
3754 if False and self
.params
.get('call_home'):
3755 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode()
3756 write_debug('Public IP address: %s' % ipaddr
)
3757 latest_version
= self
.urlopen(
3758 'https://yt-dl.org/latest/version').read().decode()
3759 if version_tuple(latest_version
) > version_tuple(__version__
):
3760 self
.report_warning(
3761 'You are using an outdated version (newest version: %s)! '
3762 'See https://yt-dl.org/update if you need help updating.' %
3765 def _setup_opener(self
):
3766 if hasattr(self
, '_opener'):
3768 timeout_val
= self
.params
.get('socket_timeout')
3769 self
._socket
_timeout
= 20 if timeout_val
is None else float(timeout_val
)
3771 opts_cookiesfrombrowser
= self
.params
.get('cookiesfrombrowser')
3772 opts_cookiefile
= self
.params
.get('cookiefile')
3773 opts_proxy
= self
.params
.get('proxy')
3775 self
.cookiejar
= load_cookies(opts_cookiefile
, opts_cookiesfrombrowser
, self
)
3777 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
3778 if opts_proxy
is not None:
3779 if opts_proxy
== '':
3782 proxies
= {'http': opts_proxy, 'https': opts_proxy}
3784 proxies
= urllib
.request
.getproxies()
3785 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3786 if 'http' in proxies
and 'https' not in proxies
:
3787 proxies
['https'] = proxies
['http']
3788 proxy_handler
= PerRequestProxyHandler(proxies
)
3790 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
3791 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
3792 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
3793 redirect_handler
= YoutubeDLRedirectHandler()
3794 data_handler
= urllib
.request
.DataHandler()
3796 # When passing our own FileHandler instance, build_opener won't add the
3797 # default FileHandler and allows us to disable the file protocol, which
3798 # can be used for malicious purposes (see
3799 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3800 file_handler
= urllib
.request
.FileHandler()
3802 def file_open(*args
, **kwargs
):
3803 raise urllib
.error
.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3804 file_handler
.file_open
= file_open
3806 opener
= urllib
.request
.build_opener(
3807 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
3809 # Delete the default user-agent header, which would otherwise apply in
3810 # cases where our custom HTTP handler doesn't come into play
3811 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3812 opener
.addheaders
= []
3813 self
._opener
= opener
3815 def encode(self
, s
):
3816 if isinstance(s
, bytes):
3817 return s
# Already encoded
3820 return s
.encode(self
.get_encoding())
3821 except UnicodeEncodeError as err
:
3822 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
3825 def get_encoding(self
):
3826 encoding
= self
.params
.get('encoding')
3827 if encoding
is None:
3828 encoding
= preferredencoding()
3831 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
3832 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3833 if overwrite
is None:
3834 overwrite
= self
.params
.get('overwrites', True)
3835 if not self
.params
.get('writeinfojson'):
3838 self
.write_debug(f
'Skipping writing {label} infojson')
3840 elif not self
._ensure
_dir
_exists
(infofn
):
3842 elif not overwrite
and os
.path
.exists(infofn
):
3843 self
.to_screen(f
'[info] {label.title()} metadata is already present')
3846 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
3848 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
3851 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
3854 def _write_description(self
, label
, ie_result
, descfn
):
3855 ''' Write description and returns True = written, False = skip, None = error '''
3856 if not self
.params
.get('writedescription'):
3859 self
.write_debug(f
'Skipping writing {label} description')
3861 elif not self
._ensure
_dir
_exists
(descfn
):
3863 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
3864 self
.to_screen(f
'[info] {label.title()} description is already present')
3865 elif ie_result
.get('description') is None:
3866 self
.report_warning(f
'There\'s no {label} description to write')
3870 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
3871 with open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
3872 descfile
.write(ie_result
['description'])
3874 self
.report_error(f
'Cannot write {label} description file {descfn}')
3878 def _write_subtitles(self
, info_dict
, filename
):
3879 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3881 subtitles
= info_dict
.get('requested_subtitles')
3882 if not subtitles
or not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
3883 # subtitles download errors are already managed as troubles in relevant IE
3884 # that way it will silently go on when used with unsupporting IE
3887 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
3888 if not sub_filename_base
:
3889 self
.to_screen('[info] Skipping writing video subtitles')
3891 for sub_lang
, sub_info
in subtitles
.items():
3892 sub_format
= sub_info
['ext']
3893 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
3894 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
3895 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
3897 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3898 sub_info
['filepath'] = existing_sub
3899 ret
.append((existing_sub
, sub_filename_final
))
3902 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
3903 if sub_info
.get('data') is not None:
3905 # Use newline='' to prevent conversion of newline characters
3906 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3907 with open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
3908 subfile
.write(sub_info
['data'])
3909 sub_info
['filepath'] = sub_filename
3910 ret
.append((sub_filename
, sub_filename_final
))
3913 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
3917 sub_copy
= sub_info
.copy()
3918 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
3919 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
3920 sub_info
['filepath'] = sub_filename
3921 ret
.append((sub_filename
, sub_filename_final
))
3922 except (DownloadError
, ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
3923 msg
= f
'Unable to download video subtitles for {sub_lang!r}: {err}'
3924 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
3925 if not self
.params
.get('ignoreerrors'):
3926 self
.report_error(msg
)
3927 raise DownloadError(msg
)
3928 self
.report_warning(msg
)
3931 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
3932 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3933 write_all
= self
.params
.get('write_all_thumbnails', False)
3934 thumbnails
, ret
= [], []
3935 if write_all
or self
.params
.get('writethumbnail', False):
3936 thumbnails
= info_dict
.get('thumbnails') or []
3937 multiple
= write_all
and len(thumbnails
) > 1
3939 if thumb_filename_base
is None:
3940 thumb_filename_base
= filename
3941 if thumbnails
and not thumb_filename_base
:
3942 self
.write_debug(f
'Skipping writing {label} thumbnail')
3945 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
3946 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
3947 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
3948 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
3949 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
3951 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
3953 self
.to_screen('[info] %s is already present' % (
3954 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize())
3955 t
['filepath'] = existing_thumb
3956 ret
.append((existing_thumb
, thumb_filename_final
))
3958 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
3960 uf
= self
.urlopen(sanitized_Request(t
['url'], headers
=t
.get('http_headers', {})))
3961 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
3962 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
3963 shutil
.copyfileobj(uf
, thumbf
)
3964 ret
.append((thumb_filename
, thumb_filename_final
))
3965 t
['filepath'] = thumb_filename
3966 except network_exceptions
as err
:
3968 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
3969 if ret
and not write_all
: