26 from string
import ascii_letters
28 from .cache
import Cache
30 HAS_LEGACY
as compat_has_legacy
,
31 compat_get_terminal_size
,
36 compat_urllib_request
,
38 from .cookies
import load_cookies
39 from .downloader
import FFmpegFD
, get_suitable_downloader
, shorten_protocol_name
40 from .downloader
.rtmp
import rtmpdump_version
41 from .extractor
import _LAZY_LOADER
42 from .extractor
import _PLUGIN_CLASSES
as plugin_extractors
43 from .extractor
import gen_extractor_classes
, get_info_extractor
44 from .extractor
.openload
import PhantomJSwrapper
45 from .minicurses
import format_text
46 from .postprocessor
import _PLUGIN_CLASSES
as plugin_postprocessors
47 from .postprocessor
import (
49 FFmpegFixupDuplicateMoovPP
,
50 FFmpegFixupDurationPP
,
53 FFmpegFixupStretchedPP
,
54 FFmpegFixupTimestampPP
,
57 MoveFilesAfterDownloadPP
,
60 from .update
import detect_variant
85 PerRequestProxyHandler
,
91 UnavailableVideoError
,
92 YoutubeDLCookieProcessor
,
94 YoutubeDLRedirectHandler
,
107 format_decimal_suffix
,
125 register_socks_protocols
,
126 remove_terminal_sequences
,
137 supports_terminal_sequences
,
145 windows_enable_vt_mode
,
149 from .version
import RELEASE_GIT_HEAD
, __version__
151 if compat_os_name
== 'nt':
158 YoutubeDL objects are the ones responsible of downloading the
159 actual video file and writing it to disk if the user has requested
160 it, among some other tasks. In most cases there should be one per
161 program. As, given a video URL, the downloader doesn't know how to
162 extract all the needed information, task that InfoExtractors do, it
163 has to pass the URL to one of them.
165 For this, YoutubeDL objects have a method that allows
166 InfoExtractors to be registered in a given order. When it is passed
167 a URL, the YoutubeDL object handles it to the first InfoExtractor it
168 finds that reports being able to handle it. The InfoExtractor extracts
169 all the information about the video or videos the URL refers to, and
170 YoutubeDL process the extracted information, possibly using a File
171 Downloader to download the video.
173 YoutubeDL objects accept a lot of parameters. In order not to saturate
174 the object constructor with arguments, it receives a dictionary of
175 options instead. These options are available through the params
176 attribute for the InfoExtractors to use. The YoutubeDL also
177 registers itself as the downloader in charge for the InfoExtractors
178 that are added to it, so this is a "mutual registration".
182 username: Username for authentication purposes.
183 password: Password for authentication purposes.
184 videopassword: Password for accessing a video.
185 ap_mso: Adobe Pass multiple-system operator identifier.
186 ap_username: Multiple-system operator account username.
187 ap_password: Multiple-system operator account password.
188 usenetrc: Use netrc for authentication instead.
189 verbose: Print additional info to stdout.
190 quiet: Do not print messages to stdout.
191 no_warnings: Do not print out anything for warnings.
192 forceprint: A dict with keys WHEN mapped to a list of templates to
193 print to stdout. The allowed keys are video or any of the
194 items in utils.POSTPROCESS_WHEN.
195 For compatibility, a single list is also accepted
196 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
197 a list of tuples with (template, filename)
198 forcejson: Force printing info_dict as JSON.
199 dump_single_json: Force printing the info_dict of the whole playlist
200 (or video) as a single JSON line.
201 force_write_download_archive: Force writing download archive regardless
202 of 'skip_download' or 'simulate'.
203 simulate: Do not download the video files. If unset (or None),
204 simulate only if listsubtitles, listformats or list_thumbnails is used
205 format: Video format code. see "FORMAT SELECTION" for more details.
206 You can also pass a function. The function takes 'ctx' as
207 argument and returns the formats to download.
208 See "build_format_selector" for an implementation
209 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
210 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
211 extracting metadata even if the video is not actually
212 available for download (experimental)
213 format_sort: A list of fields by which to sort the video formats.
214 See "Sorting Formats" for more details.
215 format_sort_force: Force the given format_sort. see "Sorting Formats"
217 prefer_free_formats: Whether to prefer video formats with free containers
218 over non-free ones of same quality.
219 allow_multiple_video_streams: Allow multiple video streams to be merged
221 allow_multiple_audio_streams: Allow multiple audio streams to be merged
223 check_formats Whether to test if the formats are downloadable.
224 Can be True (check all), False (check none),
225 'selected' (check selected formats),
226 or None (check only if requested by extractor)
227 paths: Dictionary of output paths. The allowed keys are 'home'
228 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
229 outtmpl: Dictionary of templates for output names. Allowed keys
230 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
231 For compatibility with youtube-dl, a single string can also be used
232 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
233 restrictfilenames: Do not allow "&" and spaces in file names
234 trim_file_name: Limit length of filename (extension excluded)
235 windowsfilenames: Force the filenames to be windows compatible
236 ignoreerrors: Do not stop on download/postprocessing errors.
237 Can be 'only_download' to ignore only download errors.
238 Default is 'only_download' for CLI, but False for API
239 skip_playlist_after_errors: Number of allowed failures until the rest of
240 the playlist is skipped
241 force_generic_extractor: Force downloader to use the generic extractor
242 overwrites: Overwrite all video and metadata files if True,
243 overwrite only non-video files if None
244 and don't overwrite any file if False
245 For compatibility with youtube-dl,
246 "nooverwrites" may also be used instead
247 playliststart: Playlist item to start at.
248 playlistend: Playlist item to end at.
249 playlist_items: Specific indices of playlist to download.
250 playlistreverse: Download playlist items in reverse order.
251 playlistrandom: Download playlist items in random order.
252 matchtitle: Download only matching titles.
253 rejecttitle: Reject downloads for matching titles.
254 logger: Log messages to a logging.Logger instance.
255 logtostderr: Log messages to stderr instead of stdout.
256 consoletitle: Display progress in console window's titlebar.
257 writedescription: Write the video description to a .description file
258 writeinfojson: Write the video description to a .info.json file
259 clean_infojson: Remove private fields from the infojson
260 getcomments: Extract video comments. This will not be written to disk
261 unless writeinfojson is also given
262 writeannotations: Write the video annotations to a .annotations.xml file
263 writethumbnail: Write the thumbnail image to a file
264 allow_playlist_files: Whether to write playlists' description, infojson etc
265 also to disk when using the 'write*' options
266 write_all_thumbnails: Write all thumbnail formats to files
267 writelink: Write an internet shortcut file, depending on the
268 current platform (.url/.webloc/.desktop)
269 writeurllink: Write a Windows internet shortcut file (.url)
270 writewebloclink: Write a macOS internet shortcut file (.webloc)
271 writedesktoplink: Write a Linux internet shortcut file (.desktop)
272 writesubtitles: Write the video subtitles to a file
273 writeautomaticsub: Write the automatically generated subtitles to a file
274 listsubtitles: Lists all available subtitles for the video
275 subtitlesformat: The format code for subtitles
276 subtitleslangs: List of languages of the subtitles to download (can be regex).
277 The list may contain "all" to refer to all the available
278 subtitles. The language can be prefixed with a "-" to
279 exclude it from the requested languages. Eg: ['all', '-live_chat']
280 keepvideo: Keep the video file after post-processing
281 daterange: A DateRange object, download only if the upload_date is in the range.
282 skip_download: Skip the actual download of the video file
283 cachedir: Location of the cache files in the filesystem.
284 False to disable filesystem cache.
285 noplaylist: Download single video instead of a playlist if in doubt.
286 age_limit: An integer representing the user's age in years.
287 Unsuitable videos for the given age are skipped.
288 min_views: An integer representing the minimum view count the video
289 must have in order to not be skipped.
290 Videos without view count information are always
291 downloaded. None for no limit.
292 max_views: An integer representing the maximum view count.
293 Videos that are more popular than that are not
295 Videos without view count information are always
296 downloaded. None for no limit.
297 download_archive: File name of a file where all downloads are recorded.
298 Videos already present in the file are not downloaded
300 break_on_existing: Stop the download process after attempting to download a
301 file that is in the archive.
302 break_on_reject: Stop the download process when encountering a video that
303 has been filtered out.
304 break_per_url: Whether break_on_reject and break_on_existing
305 should act on each input URL as opposed to for the entire queue
306 cookiefile: File name or text stream from where cookies should be read and dumped to
307 cookiesfrombrowser: A tuple containing the name of the browser, the profile
308 name/pathfrom where cookies are loaded, and the name of the
309 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
310 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
311 support RFC 5746 secure renegotiation
312 nocheckcertificate: Do not verify SSL certificates
313 client_certificate: Path to client certificate file in PEM format. May include the private key
314 client_certificate_key: Path to private key file for client certificate
315 client_certificate_password: Password for client certificate private key, if encrypted.
316 If not provided and the key is encrypted, yt-dlp will ask interactively
317 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
318 At the moment, this is only supported by YouTube.
319 http_headers: A dictionary of custom headers to be used for all requests
320 proxy: URL of the proxy server to use
321 geo_verification_proxy: URL of the proxy to use for IP address verification
322 on geo-restricted sites.
323 socket_timeout: Time to wait for unresponsive hosts, in seconds
324 bidi_workaround: Work around buggy terminals without bidirectional text
325 support, using fridibi
326 debug_printtraffic:Print out sent and received HTTP traffic
327 default_search: Prepend this string if an input url is not valid.
328 'auto' for elaborate guessing
329 encoding: Use this encoding instead of the system-specified.
330 extract_flat: Do not resolve URLs, return the immediate result.
331 Pass in 'in_playlist' to only show this behavior for
333 wait_for_video: If given, wait for scheduled streams to become available.
334 The value should be a tuple containing the range
335 (min_secs, max_secs) to wait between retries
336 postprocessors: A list of dictionaries, each with an entry
337 * key: The name of the postprocessor. See
338 yt_dlp/postprocessor/__init__.py for a list.
339 * when: When to run the postprocessor. Allowed values are
340 the entries of utils.POSTPROCESS_WHEN
341 Assumed to be 'post_process' if not given
342 progress_hooks: A list of functions that get called on download
343 progress, with a dictionary with the entries
344 * status: One of "downloading", "error", or "finished".
345 Check this first and ignore unknown values.
346 * info_dict: The extracted info_dict
348 If status is one of "downloading", or "finished", the
349 following properties may also be present:
350 * filename: The final filename (always present)
351 * tmpfilename: The filename we're currently writing to
352 * downloaded_bytes: Bytes on disk
353 * total_bytes: Size of the whole file, None if unknown
354 * total_bytes_estimate: Guess of the eventual file size,
356 * elapsed: The number of seconds since download started.
357 * eta: The estimated time in seconds, None if unknown
358 * speed: The download speed in bytes/second, None if
360 * fragment_index: The counter of the currently
361 downloaded video fragment.
362 * fragment_count: The number of fragments (= individual
363 files that will be merged)
365 Progress hooks are guaranteed to be called at least once
366 (with status "finished") if the download is successful.
367 postprocessor_hooks: A list of functions that get called on postprocessing
368 progress, with a dictionary with the entries
369 * status: One of "started", "processing", or "finished".
370 Check this first and ignore unknown values.
371 * postprocessor: Name of the postprocessor
372 * info_dict: The extracted info_dict
374 Progress hooks are guaranteed to be called at least twice
375 (with status "started" and "finished") if the processing is successful.
376 merge_output_format: Extension to use when merging formats.
377 final_ext: Expected final extension; used to detect when the file was
378 already downloaded and converted
379 fixup: Automatically correct known faults of the file.
381 - "never": do nothing
382 - "warn": only emit a warning
383 - "detect_or_warn": check whether we can do anything
384 about it, warn otherwise (default)
385 source_address: Client-side IP address to bind to.
386 sleep_interval_requests: Number of seconds to sleep between requests
388 sleep_interval: Number of seconds to sleep before each download when
389 used alone or a lower bound of a range for randomized
390 sleep before each download (minimum possible number
391 of seconds to sleep) when used along with
393 max_sleep_interval:Upper bound of a range for randomized sleep before each
394 download (maximum possible number of seconds to sleep).
395 Must only be used along with sleep_interval.
396 Actual sleep time will be a random float from range
397 [sleep_interval; max_sleep_interval].
398 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
399 listformats: Print an overview of available video formats and exit.
400 list_thumbnails: Print a table of all thumbnails and exit.
401 match_filter: A function that gets called for every video with the signature
402 (info_dict, *, incomplete: bool) -> Optional[str]
403 For backward compatibility with youtube-dl, the signature
404 (info_dict) -> Optional[str] is also allowed.
405 - If it returns a message, the video is ignored.
406 - If it returns None, the video is downloaded.
407 - If it returns utils.NO_DEFAULT, the user is interactively
408 asked whether to download the video.
409 match_filter_func in utils.py is one example for this.
410 no_color: Do not emit color codes in output.
411 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
414 Two-letter ISO 3166-2 country code that will be used for
415 explicit geographic restriction bypassing via faking
416 X-Forwarded-For HTTP header
418 IP range in CIDR notation that will be used similarly to
420 external_downloader: A dictionary of protocol keys and the executable of the
421 external downloader to use for it. The allowed protocols
422 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
423 Set the value to 'native' to use the native downloader
424 compat_opts: Compatibility options. See "Differences in default behavior".
425 The following options do not work when used through the API:
426 filename, abort-on-error, multistreams, no-live-chat, format-sort
427 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
428 Refer __init__.py for their implementation
429 progress_template: Dictionary of templates for progress outputs.
430 Allowed keys are 'download', 'postprocess',
431 'download-title' (console title) and 'postprocess-title'.
432 The template is mapped on a dictionary with keys 'progress' and 'info'
433 retry_sleep_functions: Dictionary of functions that takes the number of attempts
434 as argument and returns the time to sleep in seconds.
435 Allowed keys are 'http', 'fragment', 'file_access'
436 download_ranges: A function that gets called for every video with the signature
437 (info_dict, *, ydl) -> Iterable[Section].
438 Only the returned sections will be downloaded. Each Section contains:
439 * start_time: Start time of the section in seconds
440 * end_time: End time of the section in seconds
441 * title: Section title (Optional)
442 * index: Section number (Optional)
444 The following parameters are not used by YoutubeDL itself, they are used by
445 the downloader (see yt_dlp/downloader/common.py):
446 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
447 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
448 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
449 external_downloader_args, concurrent_fragment_downloads.
451 The following options are used by the post processors:
452 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
453 to the binary or its containing directory.
454 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
455 and a list of additional command-line arguments for the
456 postprocessor/executable. The dict can also have "PP+EXE" keys
457 which are used when the given exe is used by the given PP.
458 Use 'default' as the name for arguments to passed to all PP
459 For compatibility with youtube-dl, a single list of args
462 The following options are used by the extractors:
463 extractor_retries: Number of times to retry for known errors
464 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
465 hls_split_discontinuity: Split HLS playlists to different formats at
466 discontinuities such as ad breaks (default: False)
467 extractor_args: A dictionary of arguments to be passed to the extractors.
468 See "EXTRACTOR ARGUMENTS" for details.
469 Eg: {'youtube': {'skip': ['dash', 'hls']}}
470 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
472 The following options are deprecated and may be removed in the future:
474 forceurl: - Use forceprint
475 Force printing final URL.
476 forcetitle: - Use forceprint
477 Force printing title.
478 forceid: - Use forceprint
480 forcethumbnail: - Use forceprint
481 Force printing thumbnail URL.
482 forcedescription: - Use forceprint
483 Force printing description.
484 forcefilename: - Use forceprint
485 Force printing final filename.
486 forceduration: - Use forceprint
487 Force printing duration.
488 allsubtitles: - Use subtitleslangs = ['all']
489 Downloads all the subtitles of the video
490 (requires writesubtitles or writeautomaticsub)
491 include_ads: - Doesn't work
493 call_home: - Not implemented
494 Boolean, true iff we are allowed to contact the
495 yt-dlp servers for debugging.
496 post_hooks: - Register a custom postprocessor
497 A list of functions that get called as the final step
498 for each video file, after all postprocessors have been
499 called. The filename will be passed as the only argument.
500 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
501 Use the native HLS downloader instead of ffmpeg/avconv
502 if True, otherwise use ffmpeg/avconv if False, otherwise
503 use downloader suggested by extractor if None.
504 prefer_ffmpeg: - avconv support is deprecated
505 If False, use avconv instead of ffmpeg if both are available,
506 otherwise prefer ffmpeg.
507 youtube_include_dash_manifest: - Use extractor_args
508 If True (default), DASH manifests and related
509 data will be downloaded and processed by extractor.
510 You can reduce network I/O by disabling it if you don't
511 care about DASH. (only for youtube)
512 youtube_include_hls_manifest: - Use extractor_args
513 If True (default), HLS manifests and related
514 data will be downloaded and processed by extractor.
515 You can reduce network I/O by disabling it if you don't
516 care about HLS. (only for youtube)
520 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
521 'timestamp', 'release_timestamp',
522 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
523 'average_rating', 'comment_count', 'age_limit',
524 'start_time', 'end_time',
525 'chapter_number', 'season_number', 'episode_number',
526 'track_number', 'disc_number', 'release_year',
530 # NB: Keep in sync with the docstring of extractor/common.py
531 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
532 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
533 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
534 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
535 'preference', 'language', 'language_preference', 'quality', 'source_preference',
536 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
537 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
539 _format_selection_exts
= {
540 'audio': {'m4a', 'mp3', 'ogg', 'aac'}
,
541 'video': {'mp4', 'flv', 'webm', '3gp'}
,
542 'storyboards': {'mhtml'}
,
545 def __init__(self
, params
=None, auto_init
=True):
546 """Create a FileDownloader object with the given options.
547 @param auto_init Whether to load the default extractors and print header (if verbose).
548 Set to 'no_verbose_header' to not print the header
554 self
._ies
_instances
= {}
555 self
._pps
= {k: [] for k in POSTPROCESS_WHEN}
556 self
._printed
_messages
= set()
557 self
._first
_webpage
_request
= True
558 self
._post
_hooks
= []
559 self
._progress
_hooks
= []
560 self
._postprocessor
_hooks
= []
561 self
._download
_retcode
= 0
562 self
._num
_downloads
= 0
564 self
._playlist
_level
= 0
565 self
._playlist
_urls
= set()
566 self
.cache
= Cache(self
)
568 windows_enable_vt_mode()
569 stdout
= sys
.stderr
if self
.params
.get('logtostderr') else sys
.stdout
570 self
._out
_files
= Namespace(
573 screen
=sys
.stderr
if self
.params
.get('quiet') else stdout
,
574 console
=None if compat_os_name
== 'nt' else next(
575 filter(supports_terminal_sequences
, (sys
.stderr
, sys
.stdout
)), None)
577 self
._allow
_colors
= Namespace(**{
578 type_
: not self
.params
.get('no_color') and supports_terminal_sequences(stream
)
579 for type_
, stream
in self
._out
_files
if type_
!= 'console'
582 if sys
.version_info
< (3, 6):
584 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys
.version_info
[:2])
586 if self
.params
.get('allow_unplayable_formats'):
588 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
589 'This is a developer option intended for debugging. \n'
590 ' If you experience any issues while using this option, '
591 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
593 def check_deprecated(param
, option
, suggestion
):
594 if self
.params
.get(param
) is not None:
595 self
.report_warning(f
'{option} is deprecated. Use {suggestion} instead')
599 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
600 if self
.params
.get('geo_verification_proxy') is None:
601 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
603 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
604 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
605 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
607 for msg
in self
.params
.get('_warnings', []):
608 self
.report_warning(msg
)
609 for msg
in self
.params
.get('_deprecation_warnings', []):
610 self
.deprecation_warning(msg
)
612 self
.params
['compat_opts'] = set(self
.params
.get('compat_opts', ()))
613 if not compat_has_legacy
:
614 self
.params
['compat_opts'].add('no-compat-legacy')
615 if 'list-formats' in self
.params
['compat_opts']:
616 self
.params
['listformats_table'] = False
618 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
619 # nooverwrites was unnecessarily changed to overwrites
620 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
621 # This ensures compatibility with both keys
622 self
.params
['overwrites'] = not self
.params
['nooverwrites']
623 elif self
.params
.get('overwrites') is None:
624 self
.params
.pop('overwrites', None)
626 self
.params
['nooverwrites'] = not self
.params
['overwrites']
628 self
.params
.setdefault('forceprint', {})
629 self
.params
.setdefault('print_to_file', {})
631 # Compatibility with older syntax
632 if not isinstance(params
['forceprint'], dict):
633 self
.params
['forceprint'] = {'video': params['forceprint']}
635 if self
.params
.get('bidi_workaround', False):
638 master
, slave
= pty
.openpty()
639 width
= compat_get_terminal_size().columns
640 width_args
= [] if width
is None else ['-w', str(width
)]
641 sp_kwargs
= {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
643 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
645 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
646 self
._output
_channel
= os
.fdopen(master
, 'rb')
647 except OSError as ose
:
648 if ose
.errno
== errno
.ENOENT
:
650 'Could not find fribidi executable, ignoring --bidi-workaround. '
651 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
656 if auto_init
!= 'no_verbose_header':
657 self
.print_debug_header()
658 self
.add_default_info_extractors()
660 if (sys
.platform
!= 'win32'
661 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
662 and not self
.params
.get('restrictfilenames', False)):
663 # Unicode filesystem API will throw errors (#1474, #13027)
665 'Assuming --restrict-filenames since file system encoding '
666 'cannot encode all characters. '
667 'Set the LC_ALL environment variable to fix this.')
668 self
.params
['restrictfilenames'] = True
670 self
.outtmpl_dict
= self
.parse_outtmpl()
672 # Creating format selector here allows us to catch syntax errors before the extraction
673 self
.format_selector
= (
674 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
675 else self
.params
['format'] if callable(self
.params
['format'])
676 else self
.build_format_selector(self
.params
['format']))
678 # Set http_headers defaults according to std_headers
679 self
.params
['http_headers'] = merge_headers(std_headers
, self
.params
.get('http_headers', {}))
682 'post_hooks': self
.add_post_hook
,
683 'progress_hooks': self
.add_progress_hook
,
684 'postprocessor_hooks': self
.add_postprocessor_hook
,
686 for opt
, fn
in hooks
.items():
687 for ph
in self
.params
.get(opt
, []):
690 for pp_def_raw
in self
.params
.get('postprocessors', []):
691 pp_def
= dict(pp_def_raw
)
692 when
= pp_def
.pop('when', 'post_process')
693 self
.add_post_processor(
694 get_postprocessor(pp_def
.pop('key'))(self
, **pp_def
),
698 register_socks_protocols()
700 def preload_download_archive(fn
):
701 """Preload the archive, if any is specified"""
704 self
.write_debug(f
'Loading archive file {fn!r}')
706 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
707 for line
in archive_file
:
708 self
.archive
.add(line
.strip())
709 except OSError as ioe
:
710 if ioe
.errno
!= errno
.ENOENT
:
716 preload_download_archive(self
.params
.get('download_archive'))
718 def warn_if_short_id(self
, argv
):
719 # short YouTube ID starting with dash?
721 i
for i
, a
in enumerate(argv
)
722 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
726 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
727 + ['--'] + [argv
[i
] for i
in idxs
]
730 'Long argument string detected. '
731 'Use -- to separate parameters and URLs, like this:\n%s' %
732 args_to_str(correct_argv
))
734 def add_info_extractor(self
, ie
):
735 """Add an InfoExtractor object to the end of the list."""
737 self
._ies
[ie_key
] = ie
738 if not isinstance(ie
, type):
739 self
._ies
_instances
[ie_key
] = ie
740 ie
.set_downloader(self
)
742 def _get_info_extractor_class(self
, ie_key
):
743 ie
= self
._ies
.get(ie_key
)
745 ie
= get_info_extractor(ie_key
)
746 self
.add_info_extractor(ie
)
749 def get_info_extractor(self
, ie_key
):
751 Get an instance of an IE with name ie_key, it will try to get one from
752 the _ies list, if there's no instance it will create a new one and add
753 it to the extractor list.
755 ie
= self
._ies
_instances
.get(ie_key
)
757 ie
= get_info_extractor(ie_key
)()
758 self
.add_info_extractor(ie
)
761 def add_default_info_extractors(self
):
763 Add the InfoExtractors returned by gen_extractors to the end of the list
765 for ie
in gen_extractor_classes():
766 self
.add_info_extractor(ie
)
768 def add_post_processor(self
, pp
, when
='post_process'):
769 """Add a PostProcessor object to the end of the chain."""
770 self
._pps
[when
].append(pp
)
771 pp
.set_downloader(self
)
773 def add_post_hook(self
, ph
):
774 """Add the post hook"""
775 self
._post
_hooks
.append(ph
)
777 def add_progress_hook(self
, ph
):
778 """Add the download progress hook"""
779 self
._progress
_hooks
.append(ph
)
781 def add_postprocessor_hook(self
, ph
):
782 """Add the postprocessing progress hook"""
783 self
._postprocessor
_hooks
.append(ph
)
784 for pps
in self
._pps
.values():
786 pp
.add_progress_hook(ph
)
788 def _bidi_workaround(self
, message
):
789 if not hasattr(self
, '_output_channel'):
792 assert hasattr(self
, '_output_process')
793 assert isinstance(message
, compat_str
)
794 line_count
= message
.count('\n') + 1
795 self
._output
_process
.stdin
.write((message
+ '\n').encode())
796 self
._output
_process
.stdin
.flush()
797 res
= ''.join(self
._output
_channel
.readline().decode()
798 for _
in range(line_count
))
799 return res
[:-len('\n')]
801 def _write_string(self
, message
, out
=None, only_once
=False):
803 if message
in self
._printed
_messages
:
805 self
._printed
_messages
.add(message
)
806 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
808 def to_stdout(self
, message
, skip_eol
=False, quiet
=None):
809 """Print message to stdout"""
810 if quiet
is not None:
811 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
812 if skip_eol
is not False:
813 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
814 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.out
)
816 def to_screen(self
, message
, skip_eol
=False, quiet
=None):
817 """Print message to screen if not in quiet mode"""
818 if self
.params
.get('logger'):
819 self
.params
['logger'].debug(message
)
821 if (self
.params
.get('quiet') if quiet
is None else quiet
) and not self
.params
.get('verbose'):
824 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
825 self
._out
_files
.screen
)
827 def to_stderr(self
, message
, only_once
=False):
828 """Print message to stderr"""
829 assert isinstance(message
, compat_str
)
830 if self
.params
.get('logger'):
831 self
.params
['logger'].error(message
)
833 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.error
, only_once
=only_once
)
835 def _send_console_code(self
, code
):
836 if compat_os_name
== 'nt' or not self
._out
_files
.console
:
838 self
._write
_string
(code
, self
._out
_files
.console
)
840 def to_console_title(self
, message
):
841 if not self
.params
.get('consoletitle', False):
843 message
= remove_terminal_sequences(message
)
844 if compat_os_name
== 'nt':
845 if ctypes
.windll
.kernel32
.GetConsoleWindow():
846 # c_wchar_p() might not be necessary if `message` is
847 # already of type unicode()
848 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
850 self
._send
_console
_code
(f
'\033]0;{message}\007')
852 def save_console_title(self
):
853 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
855 self
._send
_console
_code
('\033[22;0t') # Save the title on stack
857 def restore_console_title(self
):
858 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
860 self
._send
_console
_code
('\033[23;0t') # Restore the title from stack
863 self
.save_console_title()
866 def __exit__(self
, *args
):
867 self
.restore_console_title()
869 if self
.params
.get('cookiefile') is not None:
870 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
872 def trouble(self
, message
=None, tb
=None, is_error
=True):
873 """Determine action to take when a download problem appears.
875 Depending on if the downloader has been configured to ignore
876 download errors or not, this method may throw an exception or
877 not when errors are found, after printing the message.
879 @param tb If given, is additional traceback information
880 @param is_error Whether to raise error according to ignorerrors
882 if message
is not None:
883 self
.to_stderr(message
)
884 if self
.params
.get('verbose'):
886 if sys
.exc_info()[0]: # if .trouble has been called from an except block
888 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
889 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
890 tb
+= encode_compat_str(traceback
.format_exc())
892 tb_data
= traceback
.format_list(traceback
.extract_stack())
893 tb
= ''.join(tb_data
)
898 if not self
.params
.get('ignoreerrors'):
899 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
900 exc_info
= sys
.exc_info()[1].exc_info
902 exc_info
= sys
.exc_info()
903 raise DownloadError(message
, exc_info
)
904 self
._download
_retcode
= 1
908 EMPHASIS
='light blue',
914 SUPPRESS
='light black',
917 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
921 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
922 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', None) or 'ascii'
923 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
924 if fallback
is not None and text
!= original_text
:
926 return format_text(text
, f
) if allow_colors
else text
if fallback
is None else fallback
928 def _format_out(self
, *args
, **kwargs
):
929 return self
._format
_text
(self
._out
_files
.out
, self
._allow
_colors
.out
, *args
, **kwargs
)
931 def _format_screen(self
, *args
, **kwargs
):
932 return self
._format
_text
(self
._out
_files
.screen
, self
._allow
_colors
.screen
, *args
, **kwargs
)
934 def _format_err(self
, *args
, **kwargs
):
935 return self
._format
_text
(self
._out
_files
.error
, self
._allow
_colors
.error
, *args
, **kwargs
)
937 def report_warning(self
, message
, only_once
=False):
939 Print the message to stderr, it will be prefixed with 'WARNING:'
940 If stderr is a tty file the 'WARNING:' will be colored
942 if self
.params
.get('logger') is not None:
943 self
.params
['logger'].warning(message
)
945 if self
.params
.get('no_warnings'):
947 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
949 def deprecation_warning(self
, message
):
950 if self
.params
.get('logger') is not None:
951 self
.params
['logger'].warning(f
'DeprecationWarning: {message}')
953 self
.to_stderr(f
'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
955 def report_error(self
, message
, *args
, **kwargs
):
957 Do the same as trouble, but prefixes the message with 'ERROR:', colored
958 in red if stderr is a tty file.
960 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
962 def write_debug(self
, message
, only_once
=False):
963 '''Log debug message or Print message to stderr'''
964 if not self
.params
.get('verbose', False):
966 message
= f
'[debug] {message}'
967 if self
.params
.get('logger'):
968 self
.params
['logger'].debug(message
)
970 self
.to_stderr(message
, only_once
)
972 def report_file_already_downloaded(self
, file_name
):
973 """Report file has already been fully downloaded."""
975 self
.to_screen('[download] %s has already been downloaded' % file_name
)
976 except UnicodeEncodeError:
977 self
.to_screen('[download] The file has already been downloaded')
979 def report_file_delete(self
, file_name
):
980 """Report that existing file will be deleted."""
982 self
.to_screen('Deleting existing file %s' % file_name
)
983 except UnicodeEncodeError:
984 self
.to_screen('Deleting existing file')
986 def raise_no_formats(self
, info
, forced
=False, *, msg
=None):
987 has_drm
= info
.get('_has_drm')
988 ignored
, expected
= self
.params
.get('ignore_no_formats_error'), bool(msg
)
989 msg
= msg
or has_drm
and 'This video is DRM protected' or 'No video formats found!'
990 if forced
or not ignored
:
991 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
992 expected
=has_drm
or ignored
or expected
)
994 self
.report_warning(msg
)
996 def parse_outtmpl(self
):
997 outtmpl_dict
= self
.params
.get('outtmpl', {})
998 if not isinstance(outtmpl_dict
, dict):
999 outtmpl_dict
= {'default': outtmpl_dict}
1000 # Remove spaces in the default template
1001 if self
.params
.get('restrictfilenames'):
1002 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
1004 sanitize
= lambda x
: x
1005 outtmpl_dict
.update({
1006 k
: sanitize(v
) for k
, v
in DEFAULT_OUTTMPL
.items()
1007 if outtmpl_dict
.get(k
) is None})
1008 for _
, val
in outtmpl_dict
.items():
1009 if isinstance(val
, bytes):
1010 self
.report_warning('Parameter outtmpl is bytes, but should be a unicode string')
1013 def get_output_path(self
, dir_type
='', filename
=None):
1014 paths
= self
.params
.get('paths', {})
1015 assert isinstance(paths
, dict)
1016 path
= os
.path
.join(
1017 expand_path(paths
.get('home', '').strip()),
1018 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
1020 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1023 def _outtmpl_expandpath(outtmpl
):
1024 # expand_path translates '%%' into '%' and '$$' into '$'
1025 # correspondingly that is not what we want since we need to keep
1026 # '%%' intact for template dict substitution step. Working around
1027 # with boundary-alike separator hack.
1028 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
1029 outtmpl
= outtmpl
.replace('%%', f
'%{sep}%').replace('$$', f
'${sep}$')
1031 # outtmpl should be expand_path'ed before template dict substitution
1032 # because meta fields may contain env variables we don't want to
1033 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1034 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1035 return expand_path(outtmpl
).replace(sep
, '')
1038 def escape_outtmpl(outtmpl
):
1039 ''' Escape any remaining strings like %s, %abc% etc. '''
1041 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1042 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1046 def validate_outtmpl(cls
, outtmpl
):
1047 ''' @return None or Exception object '''
1049 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljqBUDS]'),
1050 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1051 cls
._outtmpl
_expandpath
(outtmpl
))
1053 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1055 except ValueError as err
:
1059 def _copy_infodict(info_dict
):
1060 info_dict
= dict(info_dict
)
1061 info_dict
.pop('__postprocessors', None)
1062 info_dict
.pop('__pending_error', None)
1065 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1066 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1067 @param sanitize Whether to sanitize the output as a filename.
1068 For backward compatibility, a function can also be passed
1071 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1073 info_dict
= self
._copy
_infodict
(info_dict
)
1074 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1075 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1076 if info_dict
.get('duration', None) is not None
1078 info_dict
['autonumber'] = int(self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
)
1079 info_dict
['video_autonumber'] = self
._num
_videos
1080 if info_dict
.get('resolution') is None:
1081 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1083 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1084 # of %(field)s to %(field)0Nd for backward compatibility
1085 field_size_compat_map
= {
1086 'playlist_index': number_of_digits(info_dict
.get('__last_playlist_index') or 0),
1087 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1088 'autonumber': self
.params
.get('autonumber_size') or 5,
1092 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljqBUDS]'))
1097 # Field is of the form key1.key2...
1098 # where keys (except first) can be string, int or slice
1099 FIELD_RE
= r
'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num
=r
'(?:-?\d+)')
1100 MATH_FIELD_RE
= rf
'(?:{FIELD_RE}|-?{NUMBER_RE})'
1101 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
1102 INTERNAL_FORMAT_RE
= re
.compile(rf
'''(?x)
1104 (?P<fields>{FIELD_RE})
1105 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1106 (?:>(?P<strf_format>.+?))?
1108 (?P<alternate>(?<!\\),[^|&)]+)?
1109 (?:&(?P<replacement>.*?))?
1110 (?:\|(?P<default>.*?))?
1113 def _traverse_infodict(k
):
1117 return traverse_obj(info_dict
, k
, is_user_input
=True, traverse_string
=True)
1119 def get_value(mdict
):
1121 value
= _traverse_infodict(mdict
['fields'])
1124 value
= float_or_none(value
)
1125 if value
is not None:
1128 offset_key
= mdict
['maths']
1130 value
= float_or_none(value
)
1134 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1135 offset_key
).group(0)
1136 offset_key
= offset_key
[len(item
):]
1137 if operator
is None:
1138 operator
= MATH_FUNCTIONS
[item
]
1140 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1141 offset
= float_or_none(item
)
1143 offset
= float_or_none(_traverse_infodict(item
))
1145 value
= operator(value
, multiplier
* offset
)
1146 except (TypeError, ZeroDivisionError):
1149 # Datetime formatting
1150 if mdict
['strf_format']:
1151 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1155 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1157 def filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames')):
1158 return sanitize_filename(str(value
), restricted
=restricted
, is_id
=(
1159 bool(re
.search(r
'(^|[_.])id(\.|$)', key
))
1160 if 'filename-sanitization' in self
.params
['compat_opts']
1163 sanitizer
= sanitize
if callable(sanitize
) else filename_sanitizer
1164 sanitize
= bool(sanitize
)
1166 def _dumpjson_default(obj
):
1167 if isinstance(obj
, (set, LazyList
)):
1171 def create_key(outer_mobj
):
1172 if not outer_mobj
.group('has_key'):
1173 return outer_mobj
.group(0)
1174 key
= outer_mobj
.group('key')
1175 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1176 initial_field
= mobj
.group('fields') if mobj
else ''
1177 value
, replacement
, default
= None, None, na
1179 mobj
= mobj
.groupdict()
1180 default
= mobj
['default'] if mobj
['default'] is not None else default
1181 value
= get_value(mobj
)
1182 replacement
= mobj
['replacement']
1183 if value
is None and mobj
['alternate']:
1184 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['remaining'][1:])
1188 fmt
= outer_mobj
.group('format')
1189 if fmt
== 's' and value
is not None and key
in field_size_compat_map
.keys():
1190 fmt
= f
'0{field_size_compat_map[key]:d}d'
1192 value
= default
if value
is None else value
if replacement
is None else replacement
1194 flags
= outer_mobj
.group('conversion') or ''
1195 str_fmt
= f
'{fmt[:-1]}s'
1196 if fmt
[-1] == 'l': # list
1197 delim
= '\n' if '#' in flags
else ', '
1198 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes)))), str_fmt
1199 elif fmt
[-1] == 'j': # json
1200 value
, fmt
= json
.dumps(value
, default
=_dumpjson_default
, indent
=4 if '#' in flags
else None), str_fmt
1201 elif fmt
[-1] == 'q': # quoted
1202 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1203 value
, fmt
= ' '.join(map(compat_shlex_quote
, value
)), str_fmt
1204 elif fmt
[-1] == 'B': # bytes
1205 value
= f
'%{str_fmt}'.encode() % str(value
).encode()
1206 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1207 elif fmt
[-1] == 'U': # unicode normalized
1208 value
, fmt
= unicodedata
.normalize(
1209 # "+" = compatibility equivalence, "#" = NFD
1210 'NF%s%s' % ('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1212 elif fmt
[-1] == 'D': # decimal suffix
1213 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1214 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1215 factor
=1024 if '#' in flags
else 1000)
1216 elif fmt
[-1] == 'S': # filename sanitization
1217 value
, fmt
= filename_sanitizer(initial_field
, value
, restricted
='#' in flags
), str_fmt
1218 elif fmt
[-1] == 'c':
1220 value
= str(value
)[0]
1223 elif fmt
[-1] not in 'rs': # numeric
1224 value
= float_or_none(value
)
1226 value
, fmt
= default
, 's'
1230 # If value is an object, sanitize might convert it to a string
1231 # So we convert it to repr first
1232 value
, fmt
= repr(value
), str_fmt
1233 if fmt
[-1] in 'csr':
1234 value
= sanitizer(initial_field
, value
)
1236 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1237 TMPL_DICT
[key
] = value
1238 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1240 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1242 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1243 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1244 return self
.escape_outtmpl(outtmpl
) % info_dict
1246 def _prepare_filename(self
, info_dict
, *, outtmpl
=None, tmpl_type
=None):
1247 assert None in (outtmpl
, tmpl_type
), 'outtmpl and tmpl_type are mutually exclusive'
1249 outtmpl
= self
.outtmpl_dict
.get(tmpl_type
or 'default', self
.outtmpl_dict
['default'])
1251 outtmpl
= self
._outtmpl
_expandpath
(outtmpl
)
1252 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1256 if tmpl_type
in ('', 'temp'):
1257 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1258 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1259 filename
= replace_extension(filename
, ext
, final_ext
)
1261 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1263 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1265 # https://github.com/blackjack4494/youtube-dlc/issues/85
1266 trim_file_name
= self
.params
.get('trim_file_name', False)
1268 no_ext
, *ext
= filename
.rsplit('.', 2)
1269 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1272 except ValueError as err
:
1273 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1276 def prepare_filename(self
, info_dict
, dir_type
='', *, outtmpl
=None, warn
=False):
1277 """Generate the output filename"""
1279 assert not dir_type
, 'outtmpl and dir_type are mutually exclusive'
1281 filename
= self
._prepare
_filename
(info_dict
, tmpl_type
=dir_type
, outtmpl
=outtmpl
)
1282 if not filename
and dir_type
not in ('', 'temp'):
1286 if not self
.params
.get('paths'):
1288 elif filename
== '-':
1289 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1290 elif os
.path
.isabs(filename
):
1291 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1292 if filename
== '-' or not filename
:
1295 return self
.get_output_path(dir_type
, filename
)
1297 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1298 """ Returns None if the file should be downloaded """
1300 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
1303 if 'title' in info_dict
:
1304 # This can happen when we're just evaluating the playlist
1305 title
= info_dict
['title']
1306 matchtitle
= self
.params
.get('matchtitle', False)
1308 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1309 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1310 rejecttitle
= self
.params
.get('rejecttitle', False)
1312 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1313 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1314 date
= info_dict
.get('upload_date')
1315 if date
is not None:
1316 dateRange
= self
.params
.get('daterange', DateRange())
1317 if date
not in dateRange
:
1318 return f
'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1319 view_count
= info_dict
.get('view_count')
1320 if view_count
is not None:
1321 min_views
= self
.params
.get('min_views')
1322 if min_views
is not None and view_count
< min_views
:
1323 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1324 max_views
= self
.params
.get('max_views')
1325 if max_views
is not None and view_count
> max_views
:
1326 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1327 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1328 return 'Skipping "%s" because it is age restricted' % video_title
1330 match_filter
= self
.params
.get('match_filter')
1331 if match_filter
is not None:
1333 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1335 # For backward compatibility
1336 ret
= None if incomplete
else match_filter(info_dict
)
1337 if ret
is NO_DEFAULT
:
1339 filename
= self
._format
_screen
(self
.prepare_filename(info_dict
), self
.Styles
.FILENAME
)
1340 reply
= input(self
._format
_screen
(
1341 f
'Download "{filename}"? (Y/n): ', self
.Styles
.EMPHASIS
)).lower().strip()
1342 if reply
in {'y', ''}
:
1345 return f
'Skipping {video_title}'
1346 elif ret
is not None:
1350 if self
.in_download_archive(info_dict
):
1351 reason
= '%s has already been recorded in the archive' % video_title
1352 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1354 reason
= check_filter()
1355 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1356 if reason
is not None:
1358 self
.to_screen('[download] ' + reason
)
1359 if self
.params
.get(break_opt
, False):
1364 def add_extra_info(info_dict
, extra_info
):
1365 '''Set the keys from extra_info in info dict if they are missing'''
1366 for key
, value
in extra_info
.items():
1367 info_dict
.setdefault(key
, value
)
1369 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1370 process
=True, force_generic_extractor
=False):
1372 Return a list with a dictionary for each video extracted.
1375 url -- URL to extract
1378 download -- whether to download videos during extraction
1379 ie_key -- extractor key hint
1380 extra_info -- dictionary containing the extra values to add to each result
1381 process -- whether to resolve all unresolved references (URLs, playlist items),
1382 must be True for download to work.
1383 force_generic_extractor -- force using the generic extractor
1386 if extra_info
is None:
1389 if not ie_key
and force_generic_extractor
:
1393 ies
= {ie_key: self._get_info_extractor_class(ie_key)}
1397 for ie_key
, ie
in ies
.items():
1398 if not ie
.suitable(url
):
1401 if not ie
.working():
1402 self
.report_warning('The program functionality for this site has been marked as broken, '
1403 'and will probably not work.')
1405 temp_id
= ie
.get_temp_id(url
)
1406 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
1407 self
.to_screen(f
'[{ie_key}] {temp_id}: has already been recorded in the archive')
1408 if self
.params
.get('break_on_existing', False):
1409 raise ExistingVideoReached()
1411 return self
.__extract
_info
(url
, self
.get_info_extractor(ie_key
), download
, extra_info
, process
)
1413 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
1415 def __handle_extraction_exceptions(func
):
1416 @functools.wraps(func
)
1417 def wrapper(self
, *args
, **kwargs
):
1420 return func(self
, *args
, **kwargs
)
1421 except (DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1423 except ReExtractInfo
as e
:
1425 self
.to_screen(f
'{e}; Re-extracting data')
1427 self
.to_stderr('\r')
1428 self
.report_warning(f
'{e}; Re-extracting data')
1430 except GeoRestrictedError
as e
:
1433 msg
+= '\nThis video is available in %s.' % ', '.join(
1434 map(ISO3166Utils
.short2full
, e
.countries
))
1435 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1436 self
.report_error(msg
)
1437 except ExtractorError
as e
: # An error we somewhat expected
1438 self
.report_error(str(e
), e
.format_traceback())
1439 except Exception as e
:
1440 if self
.params
.get('ignoreerrors'):
1441 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1447 def _wait_for_video(self
, ie_result
):
1448 if (not self
.params
.get('wait_for_video')
1449 or ie_result
.get('_type', 'video') != 'video'
1450 or ie_result
.get('formats') or ie_result
.get('url')):
1453 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1458 self
.to_screen(msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r', skip_eol
=True)
1461 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1462 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1463 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1464 diff
= round(random
.uniform(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
), 0)
1465 self
.report_warning('Release time of video is not known')
1466 elif (diff
or 0) <= 0:
1467 self
.report_warning('Video should already be available according to extracted info')
1468 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1469 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1471 wait_till
= time
.time() + diff
1474 diff
= wait_till
- time
.time()
1477 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1478 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1480 except KeyboardInterrupt:
1482 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1483 except BaseException
as e
:
1484 if not isinstance(e
, ReExtractInfo
):
1488 @__handle_extraction_exceptions
1489 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1490 ie_result
= ie
.extract(url
)
1491 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1493 if isinstance(ie_result
, list):
1494 # Backwards compatibility: old IE result format
1496 '_type': 'compat_list',
1497 'entries': ie_result
,
1499 if extra_info
.get('original_url'):
1500 ie_result
.setdefault('original_url', extra_info
['original_url'])
1501 self
.add_default_extra_info(ie_result
, ie
, url
)
1503 self
._wait
_for
_video
(ie_result
)
1504 return self
.process_ie_result(ie_result
, download
, extra_info
)
1508 def add_default_extra_info(self
, ie_result
, ie
, url
):
1510 self
.add_extra_info(ie_result
, {
1512 'original_url': url
,
1514 webpage_url
= ie_result
.get('webpage_url')
1516 self
.add_extra_info(ie_result
, {
1517 'webpage_url_basename': url_basename(webpage_url
),
1518 'webpage_url_domain': get_domain(webpage_url
),
1521 self
.add_extra_info(ie_result
, {
1522 'extractor': ie
.IE_NAME
,
1523 'extractor_key': ie
.ie_key(),
1526 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1528 Take the result of the ie(may be modified) and resolve all unresolved
1529 references (URLs, playlist items).
1531 It will also download the videos if 'download'.
1532 Returns the resolved ie_result.
1534 if extra_info
is None:
1536 result_type
= ie_result
.get('_type', 'video')
1538 if result_type
in ('url', 'url_transparent'):
1539 ie_result
['url'] = sanitize_url(ie_result
['url'])
1540 if ie_result
.get('original_url'):
1541 extra_info
.setdefault('original_url', ie_result
['original_url'])
1543 extract_flat
= self
.params
.get('extract_flat', False)
1544 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1545 or extract_flat
is True):
1546 info_copy
= ie_result
.copy()
1547 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1548 if ie
and not ie_result
.get('id'):
1549 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1550 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1551 self
.add_extra_info(info_copy
, extra_info
)
1552 info_copy
, _
= self
.pre_process(info_copy
)
1553 self
.__forced
_printings
(info_copy
, self
.prepare_filename(info_copy
), incomplete
=True)
1554 self
._raise
_pending
_errors
(info_copy
)
1555 if self
.params
.get('force_write_download_archive', False):
1556 self
.record_download_archive(info_copy
)
1559 if result_type
== 'video':
1560 self
.add_extra_info(ie_result
, extra_info
)
1561 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1562 self
._raise
_pending
_errors
(ie_result
)
1563 additional_urls
= (ie_result
or {}).get('additional_urls')
1565 # TODO: Improve MetadataParserPP to allow setting a list
1566 if isinstance(additional_urls
, compat_str
):
1567 additional_urls
= [additional_urls
]
1569 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1570 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1571 ie_result
['additional_entries'] = [
1573 url
, download
, extra_info
=extra_info
,
1574 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1575 for url
in additional_urls
1578 elif result_type
== 'url':
1579 # We have to add extra_info to the results because it may be
1580 # contained in a playlist
1581 return self
.extract_info(
1582 ie_result
['url'], download
,
1583 ie_key
=ie_result
.get('ie_key'),
1584 extra_info
=extra_info
)
1585 elif result_type
== 'url_transparent':
1586 # Use the information from the embedding page
1587 info
= self
.extract_info(
1588 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1589 extra_info
=extra_info
, download
=False, process
=False)
1591 # extract_info may return None when ignoreerrors is enabled and
1592 # extraction failed with an error, don't crash and return early
1597 new_result
= info
.copy()
1598 new_result
.update(filter_dict(ie_result
, lambda k
, v
: (
1599 v
is not None and k
not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'}
)))
1601 # Extracted info may not be a video result (i.e.
1602 # info.get('_type', 'video') != video) but rather an url or
1603 # url_transparent. In such cases outer metadata (from ie_result)
1604 # should be propagated to inner one (info). For this to happen
1605 # _type of info should be overridden with url_transparent. This
1606 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1607 if new_result
.get('_type') == 'url':
1608 new_result
['_type'] = 'url_transparent'
1610 return self
.process_ie_result(
1611 new_result
, download
=download
, extra_info
=extra_info
)
1612 elif result_type
in ('playlist', 'multi_video'):
1613 # Protect from infinite recursion due to recursively nested playlists
1614 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1615 webpage_url
= ie_result
['webpage_url']
1616 if webpage_url
in self
._playlist
_urls
:
1618 '[download] Skipping already downloaded playlist: %s'
1619 % ie_result
.get('title') or ie_result
.get('id'))
1622 self
._playlist
_level
+= 1
1623 self
._playlist
_urls
.add(webpage_url
)
1624 self
._fill
_common
_fields
(ie_result
, False)
1625 self
._sanitize
_thumbnails
(ie_result
)
1627 return self
.__process
_playlist
(ie_result
, download
)
1629 self
._playlist
_level
-= 1
1630 if not self
._playlist
_level
:
1631 self
._playlist
_urls
.clear()
1632 elif result_type
== 'compat_list':
1633 self
.report_warning(
1634 'Extractor %s returned a compat_list result. '
1635 'It needs to be updated.' % ie_result
.get('extractor'))
1638 self
.add_extra_info(r
, {
1639 'extractor': ie_result
['extractor'],
1640 'webpage_url': ie_result
['webpage_url'],
1641 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1642 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1643 'extractor_key': ie_result
['extractor_key'],
1646 ie_result
['entries'] = [
1647 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1648 for r
in ie_result
['entries']
1652 raise Exception('Invalid result type: %s' % result_type
)
1654 def _ensure_dir_exists(self
, path
):
1655 return make_dir(path
, self
.report_error
)
1658 def _playlist_infodict(ie_result
, **kwargs
):
1661 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1662 'playlist_id': ie_result
.get('id'),
1663 'playlist_title': ie_result
.get('title'),
1664 'playlist_uploader': ie_result
.get('uploader'),
1665 'playlist_uploader_id': ie_result
.get('uploader_id'),
1666 'playlist_index': 0,
1670 def __process_playlist(self
, ie_result
, download
):
1671 # We process each entry in the playlist
1672 playlist
= ie_result
.get('title') or ie_result
.get('id')
1673 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1675 if 'entries' not in ie_result
:
1676 raise EntryNotInPlaylist('There are no entries')
1678 MissingEntry
= object()
1679 incomplete_entries
= bool(ie_result
.get('requested_entries'))
1680 if incomplete_entries
:
1681 def fill_missing_entries(entries
, indices
):
1682 ret
= [MissingEntry
] * max(indices
)
1683 for i
, entry
in zip(indices
, entries
):
1686 ie_result
['entries'] = fill_missing_entries(ie_result
['entries'], ie_result
['requested_entries'])
1688 playlist_results
= []
1690 playliststart
= self
.params
.get('playliststart', 1)
1691 playlistend
= self
.params
.get('playlistend')
1692 # For backwards compatibility, interpret -1 as whole list
1693 if playlistend
== -1:
1696 playlistitems_str
= self
.params
.get('playlist_items')
1697 playlistitems
= None
1698 if playlistitems_str
is not None:
1699 def iter_playlistitems(format
):
1700 for string_segment
in format
.split(','):
1701 if '-' in string_segment
:
1702 start
, end
= string_segment
.split('-')
1703 for item
in range(int(start
), int(end
) + 1):
1706 yield int(string_segment
)
1707 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1709 ie_entries
= ie_result
['entries']
1710 if isinstance(ie_entries
, list):
1711 playlist_count
= len(ie_entries
)
1712 msg
= f
'Collected {playlist_count} videos; downloading %d of them'
1713 ie_result
['playlist_count'] = ie_result
.get('playlist_count') or playlist_count
1716 return ie_entries
[i
- 1]
1718 msg
= 'Downloading %d videos'
1719 if not isinstance(ie_entries
, (PagedList
, LazyList
)):
1720 ie_entries
= LazyList(ie_entries
)
1721 elif isinstance(ie_entries
, InAdvancePagedList
):
1722 if ie_entries
._pagesize
== 1:
1723 playlist_count
= ie_entries
._pagecount
1726 return YoutubeDL
.__handle
_extraction
_exceptions
(
1727 lambda self
, i
: ie_entries
[i
- 1]
1730 entries
, broken
= [], False
1731 items
= playlistitems
if playlistitems
is not None else itertools
.count(playliststart
)
1735 if playlistitems
is None and playlistend
is not None and playlistend
< i
:
1739 entry
= get_entry(i
)
1740 if entry
is MissingEntry
:
1741 raise EntryNotInPlaylist()
1742 except (IndexError, EntryNotInPlaylist
):
1743 if incomplete_entries
:
1744 raise EntryNotInPlaylist(f
'Entry {i} cannot be found')
1745 elif not playlistitems
:
1747 entries
.append(entry
)
1749 if entry
is not None:
1750 # TODO: Add auto-generated fields
1751 self
._match
_entry
(entry
, incomplete
=True, silent
=True)
1752 except (ExistingVideoReached
, RejectedVideoReached
):
1755 ie_result
['entries'] = entries
1757 # Save playlist_index before re-ordering
1759 ((playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
- 1), entry
)
1760 for i
, entry
in enumerate(entries
, 1)
1761 if entry
is not None]
1762 n_entries
= len(entries
)
1764 if not (ie_result
.get('playlist_count') or broken
or playlistitems
or playlistend
):
1765 ie_result
['playlist_count'] = n_entries
1767 if not playlistitems
and (playliststart
!= 1 or playlistend
):
1768 playlistitems
= list(range(playliststart
, playliststart
+ n_entries
))
1769 ie_result
['requested_entries'] = playlistitems
1771 _infojson_written
= False
1772 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
1773 if write_playlist_files
and self
.params
.get('list_thumbnails'):
1774 self
.list_thumbnails(ie_result
)
1775 if write_playlist_files
and not self
.params
.get('simulate'):
1776 ie_copy
= self
._playlist
_infodict
(ie_result
, n_entries
=n_entries
)
1777 _infojson_written
= self
._write
_info
_json
(
1778 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
1779 if _infojson_written
is None:
1781 if self
._write
_description
('playlist', ie_result
,
1782 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1784 # TODO: This should be passed to ThumbnailsConvertor if necessary
1785 self
._write
_thumbnails
('playlist', ie_copy
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1787 if self
.params
.get('playlistreverse', False):
1788 entries
= entries
[::-1]
1789 if self
.params
.get('playlistrandom', False):
1790 random
.shuffle(entries
)
1792 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1794 self
.to_screen(f
'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}')
1796 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1797 for i
, entry_tuple
in enumerate(entries
, 1):
1798 playlist_index
, entry
= entry_tuple
1799 if 'playlist-index' in self
.params
['compat_opts']:
1800 playlist_index
= playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
- 1
1801 self
.to_screen('[download] Downloading video %s of %s' % (
1802 self
._format
_screen
(i
, self
.Styles
.ID
), self
._format
_screen
(n_entries
, self
.Styles
.EMPHASIS
)))
1803 # This __x_forwarded_for_ip thing is a bit ugly but requires
1806 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1808 'n_entries': n_entries
,
1809 '__last_playlist_index': max(playlistitems
) if playlistitems
else (playlistend
or n_entries
),
1810 'playlist_count': ie_result
.get('playlist_count'),
1811 'playlist_index': playlist_index
,
1812 'playlist_autonumber': i
,
1813 'playlist': playlist
,
1814 'playlist_id': ie_result
.get('id'),
1815 'playlist_title': ie_result
.get('title'),
1816 'playlist_uploader': ie_result
.get('uploader'),
1817 'playlist_uploader_id': ie_result
.get('uploader_id'),
1818 'extractor': ie_result
['extractor'],
1819 'webpage_url': ie_result
['webpage_url'],
1820 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1821 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1822 'extractor_key': ie_result
['extractor_key'],
1825 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1828 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1829 if not entry_result
:
1831 if failures
>= max_failures
:
1833 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist
, failures
))
1835 playlist_results
.append(entry_result
)
1836 ie_result
['entries'] = playlist_results
1838 # Write the updated info to json
1839 if _infojson_written
is True and self
._write
_info
_json
(
1840 'updated playlist', ie_result
,
1841 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
1844 ie_result
= self
.run_all_pps('playlist', ie_result
)
1845 self
.to_screen(f
'[download] Finished downloading playlist: {playlist}')
1848 @__handle_extraction_exceptions
1849 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1850 return self
.process_ie_result(
1851 entry
, download
=download
, extra_info
=extra_info
)
1853 def _build_format_filter(self
, filter_spec
):
1854 " Returns a function to filter the formats according to the filter_spec "
1864 operator_rex
= re
.compile(r
'''(?x)\s*
1865 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1866 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1867 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1868 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1869 m
= operator_rex
.fullmatch(filter_spec
)
1872 comparison_value
= int(m
.group('value'))
1874 comparison_value
= parse_filesize(m
.group('value'))
1875 if comparison_value
is None:
1876 comparison_value
= parse_filesize(m
.group('value') + 'B')
1877 if comparison_value
is None:
1879 'Invalid value %r in format specification %r' % (
1880 m
.group('value'), filter_spec
))
1881 op
= OPERATORS
[m
.group('op')]
1886 '^=': lambda attr
, value
: attr
.startswith(value
),
1887 '$=': lambda attr
, value
: attr
.endswith(value
),
1888 '*=': lambda attr
, value
: value
in attr
,
1889 '~=': lambda attr
, value
: value
.search(attr
) is not None
1891 str_operator_rex
= re
.compile(r
'''(?x)\s*
1892 (?P<key>[a-zA-Z0-9._-]+)\s*
1893 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1895 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1896 (?(quote)(?P=quote))\s*
1897 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1898 m
= str_operator_rex
.fullmatch(filter_spec
)
1900 if m
.group('op') == '~=':
1901 comparison_value
= re
.compile(m
.group('value'))
1903 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
1904 str_op
= STR_OPERATORS
[m
.group('op')]
1905 if m
.group('negation'):
1906 op
= lambda attr
, value
: not str_op(attr
, value
)
1911 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
1914 actual_value
= f
.get(m
.group('key'))
1915 if actual_value
is None:
1916 return m
.group('none_inclusive')
1917 return op(actual_value
, comparison_value
)
1920 def _check_formats(self
, formats
):
1922 self
.to_screen('[info] Testing format %s' % f
['format_id'])
1923 path
= self
.get_output_path('temp')
1924 if not self
._ensure
_dir
_exists
(f
'{path}/'):
1926 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
1929 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
1930 except (DownloadError
, OSError, ValueError) + network_exceptions
:
1933 if os
.path
.exists(temp_file
.name
):
1935 os
.remove(temp_file
.name
)
1937 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
1941 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
1943 def _default_format_spec(self
, info_dict
, download
=True):
1946 merger
= FFmpegMergerPP(self
)
1947 return merger
.available
and merger
.can_merge()
1950 not self
.params
.get('simulate')
1954 or info_dict
.get('is_live') and not self
.params
.get('live_from_start')
1955 or self
.outtmpl_dict
['default'] == '-'))
1958 or self
.params
.get('allow_multiple_audio_streams', False)
1959 or 'format-spec' in self
.params
['compat_opts'])
1962 'best/bestvideo+bestaudio' if prefer_best
1963 else 'bestvideo*+bestaudio/best' if not compat
1964 else 'bestvideo+bestaudio/best')
1966 def build_format_selector(self
, format_spec
):
1967 def syntax_error(note
, start
):
1969 'Invalid format specification: '
1970 '{}\n\t{}\n\t{}^'.format(note
, format_spec
, ' ' * start
[1]))
1971 return SyntaxError(message
)
1973 PICKFIRST
= 'PICKFIRST'
1977 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1979 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1980 'video': self
.params
.get('allow_multiple_video_streams', False)}
1982 check_formats
= self
.params
.get('check_formats') == 'selected'
1984 def _parse_filter(tokens
):
1986 for type, string
, start
, _
, _
in tokens
:
1987 if type == tokenize
.OP
and string
== ']':
1988 return ''.join(filter_parts
)
1990 filter_parts
.append(string
)
1992 def _remove_unused_ops(tokens
):
1993 # Remove operators that we don't use and join them with the surrounding strings
1994 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1995 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1996 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1997 for type, string
, start
, end
, line
in tokens
:
1998 if type == tokenize
.OP
and string
== '[':
2000 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2002 yield type, string
, start
, end
, line
2003 # everything inside brackets will be handled by _parse_filter
2004 for type, string
, start
, end
, line
in tokens
:
2005 yield type, string
, start
, end
, line
2006 if type == tokenize
.OP
and string
== ']':
2008 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
2010 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2012 yield type, string
, start
, end
, line
2013 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
2015 last_string
= string
2019 last_string
+= string
2021 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2023 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
2025 current_selector
= None
2026 for type, string
, start
, _
, _
in tokens
:
2027 # ENCODING is only defined in python 3.x
2028 if type == getattr(tokenize
, 'ENCODING', None):
2030 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
2031 current_selector
= FormatSelector(SINGLE
, string
, [])
2032 elif type == tokenize
.OP
:
2034 if not inside_group
:
2035 # ')' will be handled by the parentheses group
2036 tokens
.restore_last_token()
2038 elif inside_merge
and string
in ['/', ',']:
2039 tokens
.restore_last_token()
2041 elif inside_choice
and string
== ',':
2042 tokens
.restore_last_token()
2045 if not current_selector
:
2046 raise syntax_error('"," must follow a format selector', start
)
2047 selectors
.append(current_selector
)
2048 current_selector
= None
2050 if not current_selector
:
2051 raise syntax_error('"/" must follow a format selector', start
)
2052 first_choice
= current_selector
2053 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
2054 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
2056 if not current_selector
:
2057 current_selector
= FormatSelector(SINGLE
, 'best', [])
2058 format_filter
= _parse_filter(tokens
)
2059 current_selector
.filters
.append(format_filter
)
2061 if current_selector
:
2062 raise syntax_error('Unexpected "("', start
)
2063 group
= _parse_format_selection(tokens
, inside_group
=True)
2064 current_selector
= FormatSelector(GROUP
, group
, [])
2066 if not current_selector
:
2067 raise syntax_error('Unexpected "+"', start
)
2068 selector_1
= current_selector
2069 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2071 raise syntax_error('Expected a selector', start
)
2072 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2074 raise syntax_error(f
'Operator not recognized: "{string}"', start
)
2075 elif type == tokenize
.ENDMARKER
:
2077 if current_selector
:
2078 selectors
.append(current_selector
)
2081 def _merge(formats_pair
):
2082 format_1
, format_2
= formats_pair
2085 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2086 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2088 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2089 get_no_more
= {'video': False, 'audio': False}
2090 for (i
, fmt_info
) in enumerate(formats_info
):
2091 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2094 for aud_vid
in ['audio', 'video']:
2095 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2096 if get_no_more
[aud_vid
]:
2099 get_no_more
[aud_vid
] = True
2101 if len(formats_info
) == 1:
2102 return formats_info
[0]
2104 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2105 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2107 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2108 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2110 output_ext
= self
.params
.get('merge_output_format')
2113 output_ext
= the_only_video
['ext']
2114 elif the_only_audio
and not video_fmts
:
2115 output_ext
= the_only_audio
['ext']
2119 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2122 'requested_formats': formats_info
,
2123 'format': '+'.join(filtered('format')),
2124 'format_id': '+'.join(filtered('format_id')),
2126 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2127 'language': '+'.join(orderedSet(filtered('language'))) or None,
2128 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2129 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2130 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2135 'width': the_only_video
.get('width'),
2136 'height': the_only_video
.get('height'),
2137 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2138 'fps': the_only_video
.get('fps'),
2139 'dynamic_range': the_only_video
.get('dynamic_range'),
2140 'vcodec': the_only_video
.get('vcodec'),
2141 'vbr': the_only_video
.get('vbr'),
2142 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2147 'acodec': the_only_audio
.get('acodec'),
2148 'abr': the_only_audio
.get('abr'),
2149 'asr': the_only_audio
.get('asr'),
2154 def _check_formats(formats
):
2155 if not check_formats
:
2158 yield from self
._check
_formats
(formats
)
2160 def _build_selector_function(selector
):
2161 if isinstance(selector
, list): # ,
2162 fs
= [_build_selector_function(s
) for s
in selector
]
2164 def selector_function(ctx
):
2167 return selector_function
2169 elif selector
.type == GROUP
: # ()
2170 selector_function
= _build_selector_function(selector
.selector
)
2172 elif selector
.type == PICKFIRST
: # /
2173 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2175 def selector_function(ctx
):
2177 picked_formats
= list(f(ctx
))
2179 return picked_formats
2182 elif selector
.type == MERGE
: # +
2183 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2185 def selector_function(ctx
):
2186 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2189 elif selector
.type == SINGLE
: # atom
2190 format_spec
= selector
.selector
or 'best'
2192 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2193 if format_spec
== 'all':
2194 def selector_function(ctx
):
2195 yield from _check_formats(ctx
['formats'][::-1])
2196 elif format_spec
== 'mergeall':
2197 def selector_function(ctx
):
2198 formats
= list(_check_formats(
2199 f
for f
in ctx
['formats'] if f
.get('vcodec') != 'none' or f
.get('acodec') != 'none'))
2202 merged_format
= formats
[-1]
2203 for f
in formats
[-2::-1]:
2204 merged_format
= _merge((merged_format
, f
))
2208 format_fallback
, seperate_fallback
, format_reverse
, format_idx
= False, None, True, 1
2210 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2212 if mobj
is not None:
2213 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2214 format_reverse
= mobj
.group('bw')[0] == 'b'
2215 format_type
= (mobj
.group('type') or [None])[0]
2216 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
2217 format_modified
= mobj
.group('mod') is not None
2219 format_fallback
= not format_type
and not format_modified
# for b, w
2221 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
2222 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2223 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
2224 if format_type
# bv, ba, wv, wa
2225 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2226 if not format_modified
# b, w
2227 else lambda f
: True) # b*, w*
2228 filter_f
= lambda f
: _filter_f(f
) and (
2229 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2231 if format_spec
in self
._format
_selection
_exts
['audio']:
2232 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2233 elif format_spec
in self
._format
_selection
_exts
['video']:
2234 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2235 seperate_fallback
= lambda f
: f
.get('ext') == format_spec
and f
.get('vcodec') != 'none'
2236 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2237 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2239 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2241 def selector_function(ctx
):
2242 formats
= list(ctx
['formats'])
2243 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2245 if format_fallback
and ctx
['incomplete_formats']:
2246 # for extractors with incomplete formats (audio only (soundcloud)
2247 # or video only (imgur)) best/worst will fallback to
2248 # best/worst {video,audio}-only format
2250 elif seperate_fallback
and not ctx
['has_merged_format']:
2251 # for compatibility with youtube-dl when there is no pre-merged format
2252 matches
= list(filter(seperate_fallback
, formats
))
2253 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2255 yield matches
[format_idx
- 1]
2256 except LazyList
.IndexError:
2259 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2261 def final_selector(ctx
):
2262 ctx_copy
= dict(ctx
)
2263 for _filter
in filters
:
2264 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2265 return selector_function(ctx_copy
)
2266 return final_selector
2268 stream
= io
.BytesIO(format_spec
.encode())
2270 tokens
= list(_remove_unused_ops(tokenize
.tokenize(stream
.readline
)))
2271 except tokenize
.TokenError
:
2272 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2274 class TokenIterator
:
2275 def __init__(self
, tokens
):
2276 self
.tokens
= tokens
2283 if self
.counter
>= len(self
.tokens
):
2284 raise StopIteration()
2285 value
= self
.tokens
[self
.counter
]
2291 def restore_last_token(self
):
2294 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2295 return _build_selector_function(parsed_selector
)
2297 def _calc_headers(self
, info_dict
):
2298 res
= merge_headers(self
.params
['http_headers'], info_dict
.get('http_headers') or {})
2300 cookies
= self
._calc
_cookies
(info_dict
['url'])
2302 res
['Cookie'] = cookies
2304 if 'X-Forwarded-For' not in res
:
2305 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2306 if x_forwarded_for_ip
:
2307 res
['X-Forwarded-For'] = x_forwarded_for_ip
2311 def _calc_cookies(self
, url
):
2312 pr
= sanitized_Request(url
)
2313 self
.cookiejar
.add_cookie_header(pr
)
2314 return pr
.get_header('Cookie')
2316 def _sort_thumbnails(self
, thumbnails
):
2317 thumbnails
.sort(key
=lambda t
: (
2318 t
.get('preference') if t
.get('preference') is not None else -1,
2319 t
.get('width') if t
.get('width') is not None else -1,
2320 t
.get('height') if t
.get('height') is not None else -1,
2321 t
.get('id') if t
.get('id') is not None else '',
2324 def _sanitize_thumbnails(self
, info_dict
):
2325 thumbnails
= info_dict
.get('thumbnails')
2326 if thumbnails
is None:
2327 thumbnail
= info_dict
.get('thumbnail')
2329 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2333 def check_thumbnails(thumbnails
):
2334 for t
in thumbnails
:
2335 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2337 self
.urlopen(HEADRequest(t
['url']))
2338 except network_exceptions
as err
:
2339 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2343 self
._sort
_thumbnails
(thumbnails
)
2344 for i
, t
in enumerate(thumbnails
):
2345 if t
.get('id') is None:
2347 if t
.get('width') and t
.get('height'):
2348 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2349 t
['url'] = sanitize_url(t
['url'])
2351 if self
.params
.get('check_formats') is True:
2352 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2354 info_dict
['thumbnails'] = thumbnails
2356 def _fill_common_fields(self
, info_dict
, is_video
=True):
2357 # TODO: move sanitization here
2359 # playlists are allowed to lack "title"
2360 title
= info_dict
.get('title', NO_DEFAULT
)
2361 if title
is NO_DEFAULT
:
2362 raise ExtractorError('Missing "title" field in extractor result',
2363 video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2364 info_dict
['fulltitle'] = title
2367 self
.write_debug('Extractor gave empty title. Creating a generic title')
2369 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2370 info_dict
['title'] = f
'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2372 if info_dict
.get('duration') is not None:
2373 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2375 for ts_key
, date_key
in (
2376 ('timestamp', 'upload_date'),
2377 ('release_timestamp', 'release_date'),
2378 ('modified_timestamp', 'modified_date'),
2380 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2381 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2382 # see http://bugs.python.org/issue1646728)
2383 with contextlib
.suppress(ValueError, OverflowError, OSError):
2384 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2385 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2387 live_keys
= ('is_live', 'was_live')
2388 live_status
= info_dict
.get('live_status')
2389 if live_status
is None:
2390 for key
in live_keys
:
2391 if info_dict
.get(key
) is False:
2393 if info_dict
.get(key
):
2396 if all(info_dict
.get(key
) is False for key
in live_keys
):
2397 live_status
= 'not_live'
2399 info_dict
['live_status'] = live_status
2400 for key
in live_keys
:
2401 if info_dict
.get(key
) is None:
2402 info_dict
[key
] = (live_status
== key
)
2404 # Auto generate title fields corresponding to the *_number fields when missing
2405 # in order to always have clean titles. This is very common for TV series.
2406 for field
in ('chapter', 'season', 'episode'):
2407 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2408 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2410 def _raise_pending_errors(self
, info
):
2411 err
= info
.pop('__pending_error', None)
2413 self
.report_error(err
, tb
=False)
2415 def process_video_result(self
, info_dict
, download
=True):
2416 assert info_dict
.get('_type', 'video') == 'video'
2417 self
._num
_videos
+= 1
2419 if 'id' not in info_dict
:
2420 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2421 elif not info_dict
.get('id'):
2422 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2424 def report_force_conversion(field
, field_not
, conversion
):
2425 self
.report_warning(
2426 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2427 % (field
, field_not
, conversion
))
2429 def sanitize_string_field(info
, string_field
):
2430 field
= info
.get(string_field
)
2431 if field
is None or isinstance(field
, compat_str
):
2433 report_force_conversion(string_field
, 'a string', 'string')
2434 info
[string_field
] = compat_str(field
)
2436 def sanitize_numeric_fields(info
):
2437 for numeric_field
in self
._NUMERIC
_FIELDS
:
2438 field
= info
.get(numeric_field
)
2439 if field
is None or isinstance(field
, (int, float)):
2441 report_force_conversion(numeric_field
, 'numeric', 'int')
2442 info
[numeric_field
] = int_or_none(field
)
2444 sanitize_string_field(info_dict
, 'id')
2445 sanitize_numeric_fields(info_dict
)
2446 if (info_dict
.get('duration') or 0) <= 0 and info_dict
.pop('duration', None):
2447 self
.report_warning('"duration" field is negative, there is an error in extractor')
2449 if 'playlist' not in info_dict
:
2450 # It isn't part of a playlist
2451 info_dict
['playlist'] = None
2452 info_dict
['playlist_index'] = None
2454 self
._sanitize
_thumbnails
(info_dict
)
2456 thumbnail
= info_dict
.get('thumbnail')
2457 thumbnails
= info_dict
.get('thumbnails')
2459 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2461 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2463 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2464 info_dict
['display_id'] = info_dict
['id']
2466 self
._fill
_common
_fields
(info_dict
)
2468 for cc_kind
in ('subtitles', 'automatic_captions'):
2469 cc
= info_dict
.get(cc_kind
)
2471 for _
, subtitle
in cc
.items():
2472 for subtitle_format
in subtitle
:
2473 if subtitle_format
.get('url'):
2474 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2475 if subtitle_format
.get('ext') is None:
2476 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2478 automatic_captions
= info_dict
.get('automatic_captions')
2479 subtitles
= info_dict
.get('subtitles')
2481 info_dict
['requested_subtitles'] = self
.process_subtitles(
2482 info_dict
['id'], subtitles
, automatic_captions
)
2484 if info_dict
.get('formats') is None:
2485 # There's only one format available
2486 formats
= [info_dict
]
2488 formats
= info_dict
['formats']
2490 # or None ensures --clean-infojson removes it
2491 info_dict
['_has_drm'] = any(f
.get('has_drm') for f
in formats
) or None
2492 if not self
.params
.get('allow_unplayable_formats'):
2493 formats
= [f
for f
in formats
if not f
.get('has_drm')]
2494 if info_dict
['_has_drm'] and all(
2495 f
.get('acodec') == f
.get('vcodec') == 'none' for f
in formats
):
2496 self
.report_warning(
2497 'This video is DRM protected and only images are available for download. '
2498 'Use --list-formats to see them')
2500 get_from_start
= not info_dict
.get('is_live') or bool(self
.params
.get('live_from_start'))
2501 if not get_from_start
:
2502 info_dict
['title'] += ' ' + datetime
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2503 if info_dict
.get('is_live') and formats
:
2504 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2505 if get_from_start
and not formats
:
2506 self
.raise_no_formats(info_dict
, msg
=(
2507 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2508 'If you want to download from the current time, use --no-live-from-start'))
2511 self
.raise_no_formats(info_dict
)
2513 def is_wellformed(f
):
2516 self
.report_warning(
2517 '"url" field is missing or empty - skipping format, '
2518 'there is an error in extractor')
2520 if isinstance(url
, bytes):
2521 sanitize_string_field(f
, 'url')
2524 # Filter out malformed formats for better extraction robustness
2525 formats
= list(filter(is_wellformed
, formats
))
2529 # We check that all the formats have the format and format_id fields
2530 for i
, format
in enumerate(formats
):
2531 sanitize_string_field(format
, 'format_id')
2532 sanitize_numeric_fields(format
)
2533 format
['url'] = sanitize_url(format
['url'])
2534 if not format
.get('format_id'):
2535 format
['format_id'] = compat_str(i
)
2537 # Sanitize format_id from characters used in format selector expression
2538 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2539 format_id
= format
['format_id']
2540 if format_id
not in formats_dict
:
2541 formats_dict
[format_id
] = []
2542 formats_dict
[format_id
].append(format
)
2544 # Make sure all formats have unique format_id
2545 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2546 for format_id
, ambiguous_formats
in formats_dict
.items():
2547 ambigious_id
= len(ambiguous_formats
) > 1
2548 for i
, format
in enumerate(ambiguous_formats
):
2550 format
['format_id'] = '%s-%d' % (format_id
, i
)
2551 if format
.get('ext') is None:
2552 format
['ext'] = determine_ext(format
['url']).lower()
2553 # Ensure there is no conflict between id and ext in format selection
2554 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2555 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2556 format
['format_id'] = 'f%s' % format
['format_id']
2558 for i
, format
in enumerate(formats
):
2559 if format
.get('format') is None:
2560 format
['format'] = '{id} - {res}{note}'.format(
2561 id=format
['format_id'],
2562 res
=self
.format_resolution(format
),
2563 note
=format_field(format
, 'format_note', ' (%s)'),
2565 if format
.get('protocol') is None:
2566 format
['protocol'] = determine_protocol(format
)
2567 if format
.get('resolution') is None:
2568 format
['resolution'] = self
.format_resolution(format
, default
=None)
2569 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2570 format
['dynamic_range'] = 'SDR'
2571 if (info_dict
.get('duration') and format
.get('tbr')
2572 and not format
.get('filesize') and not format
.get('filesize_approx')):
2573 format
['filesize_approx'] = int(info_dict
['duration'] * format
['tbr'] * (1024 / 8))
2575 # Add HTTP headers, so that external programs can use them from the
2577 full_format_info
= info_dict
.copy()
2578 full_format_info
.update(format
)
2579 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
2580 # Remove private housekeeping stuff
2581 if '__x_forwarded_for_ip' in info_dict
:
2582 del info_dict
['__x_forwarded_for_ip']
2584 if self
.params
.get('check_formats') is True:
2585 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2587 if not formats
or formats
[0] is not info_dict
:
2588 # only set the 'formats' fields if the original info_dict list them
2589 # otherwise we end up with a circular reference, the first (and unique)
2590 # element in the 'formats' field in info_dict is info_dict itself,
2591 # which can't be exported to json
2592 info_dict
['formats'] = formats
2594 info_dict
, _
= self
.pre_process(info_dict
)
2596 if self
._match
_entry
(info_dict
, incomplete
=self
._format
_fields
) is not None:
2599 self
.post_extract(info_dict
)
2600 info_dict
, _
= self
.pre_process(info_dict
, 'after_filter')
2602 # The pre-processors may have modified the formats
2603 formats
= info_dict
.get('formats', [info_dict
])
2605 list_only
= self
.params
.get('simulate') is None and (
2606 self
.params
.get('list_thumbnails') or self
.params
.get('listformats') or self
.params
.get('listsubtitles'))
2607 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2608 if self
.params
.get('list_thumbnails'):
2609 self
.list_thumbnails(info_dict
)
2610 if self
.params
.get('listsubtitles'):
2611 if 'automatic_captions' in info_dict
:
2612 self
.list_subtitles(
2613 info_dict
['id'], automatic_captions
, 'automatic captions')
2614 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2615 if self
.params
.get('listformats') or interactive_format_selection
:
2616 self
.list_formats(info_dict
)
2618 # Without this printing, -F --print-json will not work
2619 self
.__forced
_printings
(info_dict
, self
.prepare_filename(info_dict
), incomplete
=True)
2622 format_selector
= self
.format_selector
2623 if format_selector
is None:
2624 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2625 self
.write_debug('Default format spec: %s' % req_format
)
2626 format_selector
= self
.build_format_selector(req_format
)
2629 if interactive_format_selection
:
2631 self
._format
_screen
('\nEnter format selector: ', self
.Styles
.EMPHASIS
))
2633 format_selector
= self
.build_format_selector(req_format
)
2634 except SyntaxError as err
:
2635 self
.report_error(err
, tb
=False, is_error
=False)
2638 formats_to_download
= list(format_selector({
2640 'has_merged_format': any('none' not in (f
.get('acodec'), f
.get('vcodec')) for f
in formats
),
2641 'incomplete_formats': (
2642 # All formats are video-only or
2643 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
2644 # all formats are audio-only
2645 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
)),
2647 if interactive_format_selection
and not formats_to_download
:
2648 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2652 if not formats_to_download
:
2653 if not self
.params
.get('ignore_no_formats_error'):
2654 raise ExtractorError(
2655 'Requested format is not available. Use --list-formats for a list of available formats',
2656 expected
=True, video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2657 self
.report_warning('Requested format is not available')
2658 # Process what we can, even without any available formats.
2659 formats_to_download
= [{}]
2661 requested_ranges
= self
.params
.get('download_ranges')
2662 if requested_ranges
:
2663 requested_ranges
= tuple(requested_ranges(info_dict
, self
))
2665 best_format
, downloaded_formats
= formats_to_download
[-1], []
2668 def to_screen(*msg
):
2669 self
.to_screen(f
'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2671 to_screen(f
'Downloading {len(formats_to_download)} format(s):',
2672 (f
['format_id'] for f
in formats_to_download
))
2673 if requested_ranges
:
2674 to_screen(f
'Downloading {len(requested_ranges)} time ranges:',
2675 (f
'{int(c["start_time"])}-{int(c["end_time"])}' for c
in requested_ranges
))
2676 max_downloads_reached
= False
2678 for fmt
, chapter
in itertools
.product(formats_to_download
, requested_ranges
or [{}]):
2679 new_info
= self
._copy
_infodict
(info_dict
)
2680 new_info
.update(fmt
)
2683 'section_start': chapter
.get('start_time'),
2684 'section_end': chapter
.get('end_time', 0),
2685 'section_title': chapter
.get('title'),
2686 'section_number': chapter
.get('index'),
2688 downloaded_formats
.append(new_info
)
2690 self
.process_info(new_info
)
2691 except MaxDownloadsReached
:
2692 max_downloads_reached
= True
2693 self
._raise
_pending
_errors
(new_info
)
2694 # Remove copied info
2695 for key
, val
in tuple(new_info
.items()):
2696 if info_dict
.get(key
) == val
:
2698 if max_downloads_reached
:
2701 write_archive
= {f.get('__write_download_archive', False) for f in downloaded_formats}
2702 assert write_archive
.issubset({True, False, 'ignore'}
)
2703 if True in write_archive
and False not in write_archive
:
2704 self
.record_download_archive(info_dict
)
2706 info_dict
['requested_downloads'] = downloaded_formats
2707 info_dict
= self
.run_all_pps('after_video', info_dict
)
2708 if max_downloads_reached
:
2709 raise MaxDownloadsReached()
2711 # We update the info dict with the selected best quality format (backwards compatibility)
2712 info_dict
.update(best_format
)
2715 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2716 """Select the requested subtitles and their format"""
2717 available_subs
, normal_sub_langs
= {}, []
2718 if normal_subtitles
and self
.params
.get('writesubtitles'):
2719 available_subs
.update(normal_subtitles
)
2720 normal_sub_langs
= tuple(normal_subtitles
.keys())
2721 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2722 for lang
, cap_info
in automatic_captions
.items():
2723 if lang
not in available_subs
:
2724 available_subs
[lang
] = cap_info
2726 if (not self
.params
.get('writesubtitles') and not
2727 self
.params
.get('writeautomaticsub') or not
2731 all_sub_langs
= tuple(available_subs
.keys())
2732 if self
.params
.get('allsubtitles', False):
2733 requested_langs
= all_sub_langs
2734 elif self
.params
.get('subtitleslangs', False):
2735 # A list is used so that the order of languages will be the same as
2736 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2737 requested_langs
= []
2738 for lang_re
in self
.params
.get('subtitleslangs'):
2739 discard
= lang_re
[0] == '-'
2741 lang_re
= lang_re
[1:]
2742 if lang_re
== 'all':
2744 requested_langs
= []
2746 requested_langs
.extend(all_sub_langs
)
2748 current_langs
= filter(re
.compile(lang_re
+ '$').match
, all_sub_langs
)
2750 for lang
in current_langs
:
2751 while lang
in requested_langs
:
2752 requested_langs
.remove(lang
)
2754 requested_langs
.extend(current_langs
)
2755 requested_langs
= orderedSet(requested_langs
)
2756 elif normal_sub_langs
:
2757 requested_langs
= ['en'] if 'en' in normal_sub_langs
else normal_sub_langs
[:1]
2759 requested_langs
= ['en'] if 'en' in all_sub_langs
else all_sub_langs
[:1]
2761 self
.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs
))
2763 formats_query
= self
.params
.get('subtitlesformat', 'best')
2764 formats_preference
= formats_query
.split('/') if formats_query
else []
2766 for lang
in requested_langs
:
2767 formats
= available_subs
.get(lang
)
2769 self
.report_warning(f
'{lang} subtitles not available for {video_id}')
2771 for ext
in formats_preference
:
2775 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2781 self
.report_warning(
2782 'No subtitle format found matching "%s" for language %s, '
2783 'using %s' % (formats_query
, lang
, f
['ext']))
2787 def _forceprint(self
, key
, info_dict
):
2788 if info_dict
is None:
2790 info_copy
= info_dict
.copy()
2791 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
2792 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
2793 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
2794 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
2796 def format_tmpl(tmpl
):
2797 mobj
= re
.match(r
'\w+(=?)$', tmpl
)
2798 if mobj
and mobj
.group(1):
2799 return f
'{tmpl[:-1]} = %({tmpl[:-1]})r'
2801 return f
'%({tmpl})s'
2804 for tmpl
in self
.params
['forceprint'].get(key
, []):
2805 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
2807 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
2808 filename
= self
.prepare_filename(info_dict
, outtmpl
=file_tmpl
)
2809 tmpl
= format_tmpl(tmpl
)
2810 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
2811 if self
._ensure
_dir
_exists
(filename
):
2812 with open(filename
, 'a', encoding
='utf-8') as f
:
2813 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + '\n')
2815 def __forced_printings(self
, info_dict
, filename
, incomplete
):
2816 def print_mandatory(field
, actual_field
=None):
2817 if actual_field
is None:
2818 actual_field
= field
2819 if (self
.params
.get('force%s' % field
, False)
2820 and (not incomplete
or info_dict
.get(actual_field
) is not None)):
2821 self
.to_stdout(info_dict
[actual_field
])
2823 def print_optional(field
):
2824 if (self
.params
.get('force%s' % field
, False)
2825 and info_dict
.get(field
) is not None):
2826 self
.to_stdout(info_dict
[field
])
2828 info_dict
= info_dict
.copy()
2829 if filename
is not None:
2830 info_dict
['filename'] = filename
2831 if info_dict
.get('requested_formats') is not None:
2832 # For RTMP URLs, also include the playpath
2833 info_dict
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2834 elif info_dict
.get('url'):
2835 info_dict
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
2837 if (self
.params
.get('forcejson')
2838 or self
.params
['forceprint'].get('video')
2839 or self
.params
['print_to_file'].get('video')):
2840 self
.post_extract(info_dict
)
2841 self
._forceprint
('video', info_dict
)
2843 print_mandatory('title')
2844 print_mandatory('id')
2845 print_mandatory('url', 'urls')
2846 print_optional('thumbnail')
2847 print_optional('description')
2848 print_optional('filename')
2849 if self
.params
.get('forceduration') and info_dict
.get('duration') is not None:
2850 self
.to_stdout(formatSeconds(info_dict
['duration']))
2851 print_mandatory('format')
2853 if self
.params
.get('forcejson'):
2854 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
2856 def dl(self
, name
, info
, subtitle
=False, test
=False):
2857 if not info
.get('url'):
2858 self
.raise_no_formats(info
, True)
2861 verbose
= self
.params
.get('verbose')
2864 'quiet': self
.params
.get('quiet') or not verbose
,
2866 'noprogress': not verbose
,
2868 'skip_unavailable_fragments': False,
2869 'keep_fragments': False,
2871 '_no_ytdl_file': True,
2874 params
= self
.params
2875 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
2877 for ph
in self
._progress
_hooks
:
2878 fd
.add_progress_hook(ph
)
2880 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
2881 for f
in info
.get('requested_formats', []) or [info
])
2882 self
.write_debug(f
'Invoking {fd.FD_NAME} downloader on "{urls}"')
2884 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2885 # But it may contain objects that are not deep-copyable
2886 new_info
= self
._copy
_infodict
(info
)
2887 if new_info
.get('http_headers') is None:
2888 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
2889 return fd
.download(name
, new_info
, subtitle
)
2891 def existing_file(self
, filepaths
, *, default_overwrite
=True):
2892 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
2893 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
2894 return existing_files
[0]
2896 for file in existing_files
:
2897 self
.report_file_delete(file)
2901 def process_info(self
, info_dict
):
2902 """Process a single resolved IE result. (Modifies it in-place)"""
2904 assert info_dict
.get('_type', 'video') == 'video'
2905 original_infodict
= info_dict
2907 if 'format' not in info_dict
and 'ext' in info_dict
:
2908 info_dict
['format'] = info_dict
['ext']
2910 # This is mostly just for backward compatibility of process_info
2911 # As a side-effect, this allows for format-specific filters
2912 if self
._match
_entry
(info_dict
) is not None:
2913 info_dict
['__write_download_archive'] = 'ignore'
2916 # Does nothing under normal operation - for backward compatibility of process_info
2917 self
.post_extract(info_dict
)
2918 self
._num
_downloads
+= 1
2920 # info_dict['_filename'] needs to be set for backward compatibility
2921 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2922 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2926 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
2928 def check_max_downloads():
2929 if self
._num
_downloads
>= float(self
.params
.get('max_downloads') or 'inf'):
2930 raise MaxDownloadsReached()
2932 if self
.params
.get('simulate'):
2933 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
2934 check_max_downloads()
2937 if full_filename
is None:
2939 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
2941 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
2944 if self
._write
_description
('video', info_dict
,
2945 self
.prepare_filename(info_dict
, 'description')) is None:
2948 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
2949 if sub_files
is None:
2951 files_to_move
.update(dict(sub_files
))
2953 thumb_files
= self
._write
_thumbnails
(
2954 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
2955 if thumb_files
is None:
2957 files_to_move
.update(dict(thumb_files
))
2959 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2960 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
2961 if _infojson_written
:
2962 info_dict
['infojson_filename'] = infofn
2963 # For backward compatibility, even though it was a private field
2964 info_dict
['__infojson_filename'] = infofn
2965 elif _infojson_written
is None:
2968 # Note: Annotations are deprecated
2970 if self
.params
.get('writeannotations', False):
2971 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2973 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
2975 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2976 self
.to_screen('[info] Video annotations are already present')
2977 elif not info_dict
.get('annotations'):
2978 self
.report_warning('There are no annotations to write.')
2981 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2982 with open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2983 annofile
.write(info_dict
['annotations'])
2984 except (KeyError, TypeError):
2985 self
.report_warning('There are no annotations to write.')
2987 self
.report_error('Cannot write annotations file: ' + annofn
)
2990 # Write internet shortcut files
2991 def _write_link_file(link_type
):
2992 url
= try_get(info_dict
['webpage_url'], iri_to_uri
)
2994 self
.report_warning(
2995 f
'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2997 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
2998 if not self
._ensure
_dir
_exists
(encodeFilename(linkfn
)):
3000 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
3001 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
3004 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3005 with open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8',
3006 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
3007 template_vars
= {'url': url}
3008 if link_type
== 'desktop':
3009 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
3010 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
3012 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
3017 'url': self
.params
.get('writeurllink'),
3018 'webloc': self
.params
.get('writewebloclink'),
3019 'desktop': self
.params
.get('writedesktoplink'),
3021 if self
.params
.get('writelink'):
3022 link_type
= ('webloc' if sys
.platform
== 'darwin'
3023 else 'desktop' if sys
.platform
.startswith('linux')
3025 write_links
[link_type
] = True
3027 if any(should_write
and not _write_link_file(link_type
)
3028 for link_type
, should_write
in write_links
.items()):
3031 def replace_info_dict(new_info
):
3033 if new_info
== info_dict
:
3036 info_dict
.update(new_info
)
3038 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
3039 replace_info_dict(new_info
)
3041 if self
.params
.get('skip_download'):
3042 info_dict
['filepath'] = temp_filename
3043 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3044 info_dict
['__files_to_move'] = files_to_move
3045 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
3046 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3049 info_dict
.setdefault('__postprocessors', [])
3052 def existing_video_file(*filepaths
):
3053 ext
= info_dict
.get('ext')
3054 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
3055 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
3056 default_overwrite
=False)
3058 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
3062 merger
, fd
= FFmpegMergerPP(self
), None
3063 if info_dict
.get('url'):
3064 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3065 if fd
is not FFmpegFD
and (
3066 info_dict
.get('section_start') or info_dict
.get('section_end')):
3067 msg
= ('This format cannot be partially downloaded' if merger
.available
3068 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3069 self
.report_error(f
'{msg}. Aborting')
3072 if info_dict
.get('requested_formats') is not None:
3074 def compatible_formats(formats
):
3075 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3076 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
3077 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
3078 if len(video_formats
) > 2 or len(audio_formats
) > 2:
3082 exts
= {format.get('ext') for format in formats}
3084 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'}
,
3087 for ext_sets
in COMPATIBLE_EXTS
:
3088 if ext_sets
.issuperset(exts
):
3090 # TODO: Check acodec/vcodec
3093 requested_formats
= info_dict
['requested_formats']
3094 old_ext
= info_dict
['ext']
3095 if self
.params
.get('merge_output_format') is None:
3096 if not compatible_formats(requested_formats
):
3097 info_dict
['ext'] = 'mkv'
3098 self
.report_warning(
3099 'Requested formats are incompatible for merge and will be merged into mkv')
3100 if (info_dict
['ext'] == 'webm'
3101 and info_dict
.get('thumbnails')
3102 # check with type instead of pp_key, __name__, or isinstance
3103 # since we dont want any custom PPs to trigger this
3104 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])): # noqa: E721
3105 info_dict
['ext'] = 'mkv'
3106 self
.report_warning(
3107 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3108 new_ext
= info_dict
['ext']
3110 def correct_ext(filename
, ext
=new_ext
):
3113 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3115 os
.path
.splitext(filename
)[0]
3116 if filename_real_ext
in (old_ext
, new_ext
)
3118 return f
'{filename_wo_ext}.{ext}'
3120 # Ensure filename always has a correct extension for successful merge
3121 full_filename
= correct_ext(full_filename
)
3122 temp_filename
= correct_ext(temp_filename
)
3123 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3124 info_dict
['__real_download'] = False
3127 if dl_filename
is not None:
3128 self
.report_file_already_downloaded(dl_filename
)
3130 for f
in requested_formats
if fd
!= FFmpegFD
else []:
3131 f
['filepath'] = fname
= prepend_extension(
3132 correct_ext(temp_filename
, info_dict
['ext']),
3133 'f%s' % f
['format_id'], info_dict
['ext'])
3134 downloaded
.append(fname
)
3135 info_dict
['url'] = '\n'.join(f
['url'] for f
in requested_formats
)
3136 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3137 info_dict
['__real_download'] = real_download
3139 if self
.params
.get('allow_unplayable_formats'):
3140 self
.report_warning(
3141 'You have requested merging of multiple formats '
3142 'while also allowing unplayable formats to be downloaded. '
3143 'The formats won\'t be merged to prevent data corruption.')
3144 elif not merger
.available
:
3145 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3146 if not self
.params
.get('ignoreerrors'):
3147 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3149 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3151 if temp_filename
== '-':
3152 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3153 else 'but the formats are incompatible for simultaneous download' if merger
.available
3154 else 'but ffmpeg is not installed')
3155 self
.report_warning(
3156 f
'You have requested downloading multiple formats to stdout {reason}. '
3157 'The formats will be streamed one after the other')
3158 fname
= temp_filename
3159 for f
in requested_formats
:
3160 new_info
= dict(info_dict
)
3161 del new_info
['requested_formats']
3163 if temp_filename
!= '-':
3164 fname
= prepend_extension(
3165 correct_ext(temp_filename
, new_info
['ext']),
3166 'f%s' % f
['format_id'], new_info
['ext'])
3167 if not self
._ensure
_dir
_exists
(fname
):
3169 f
['filepath'] = fname
3170 downloaded
.append(fname
)
3171 partial_success
, real_download
= self
.dl(fname
, new_info
)
3172 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3173 success
= success
and partial_success
3175 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3176 info_dict
['__postprocessors'].append(merger
)
3177 info_dict
['__files_to_merge'] = downloaded
3178 # Even if there were no downloads, it is being merged only now
3179 info_dict
['__real_download'] = True
3181 for file in downloaded
:
3182 files_to_move
[file] = None
3184 # Just a single file
3185 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3186 if dl_filename
is None or dl_filename
== temp_filename
:
3187 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3188 # So we should try to resume the download
3189 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3190 info_dict
['__real_download'] = real_download
3192 self
.report_file_already_downloaded(dl_filename
)
3194 dl_filename
= dl_filename
or temp_filename
3195 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3197 except network_exceptions
as err
:
3198 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
3200 except OSError as err
:
3201 raise UnavailableVideoError(err
)
3202 except (ContentTooShortError
, ) as err
:
3203 self
.report_error(f
'content too short (expected {err.expected} bytes and served {err.downloaded})')
3206 self
._raise
_pending
_errors
(info_dict
)
3207 if success
and full_filename
!= '-':
3211 fixup_policy
= self
.params
.get('fixup')
3212 vid
= info_dict
['id']
3214 if fixup_policy
in ('ignore', 'never'):
3216 elif fixup_policy
== 'warn':
3218 elif fixup_policy
!= 'force':
3219 assert fixup_policy
in ('detect_or_warn', None)
3220 if not info_dict
.get('__real_download'):
3223 def ffmpeg_fixup(cndn
, msg
, cls
):
3224 if not (do_fixup
and cndn
):
3226 elif do_fixup
== 'warn':
3227 self
.report_warning(f
'{vid}: {msg}')
3231 info_dict
['__postprocessors'].append(pp
)
3233 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3235 stretched_ratio
= info_dict
.get('stretched_ratio')
3237 stretched_ratio
not in (1, None),
3238 f
'Non-uniform pixel ratio {stretched_ratio}',
3239 FFmpegFixupStretchedPP
)
3242 (info_dict
.get('requested_formats') is None
3243 and info_dict
.get('container') == 'm4a_dash'
3244 and info_dict
.get('ext') == 'm4a'),
3245 'writing DASH m4a. Only some players support this container',
3248 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3249 downloader
= downloader
.FD_NAME
if downloader
else None
3251 if info_dict
.get('requested_formats') is None: # Not necessary if doing merger
3252 ffmpeg_fixup(downloader
== 'hlsnative' and not self
.params
.get('hls_use_mpegts')
3253 or info_dict
.get('is_live') and self
.params
.get('hls_use_mpegts') is None,
3254 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3256 ffmpeg_fixup(info_dict
.get('is_live') and downloader
== 'DashSegmentsFD',
3257 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3259 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3260 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP
)
3264 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3265 except PostProcessingError
as err
:
3266 self
.report_error('Postprocessing: %s' % str(err
))
3269 for ph
in self
._post
_hooks
:
3270 ph(info_dict
['filepath'])
3271 except Exception as err
:
3272 self
.report_error('post hooks: %s' % str(err
))
3274 info_dict
['__write_download_archive'] = True
3276 assert info_dict
is original_infodict
# Make sure the info_dict was modified in-place
3277 if self
.params
.get('force_write_download_archive'):
3278 info_dict
['__write_download_archive'] = True
3279 check_max_downloads()
3281 def __download_wrapper(self
, func
):
3282 @functools.wraps(func
)
3283 def wrapper(*args
, **kwargs
):
3285 res
= func(*args
, **kwargs
)
3286 except UnavailableVideoError
as e
:
3287 self
.report_error(e
)
3288 except DownloadCancelled
as e
:
3289 self
.to_screen(f
'[info] {e}')
3290 if not self
.params
.get('break_per_url'):
3293 if self
.params
.get('dump_single_json', False):
3294 self
.post_extract(res
)
3295 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3298 def download(self
, url_list
):
3299 """Download a given list of URLs."""
3300 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3301 outtmpl
= self
.outtmpl_dict
['default']
3302 if (len(url_list
) > 1
3304 and '%' not in outtmpl
3305 and self
.params
.get('max_downloads') != 1):
3306 raise SameFileError(outtmpl
)
3308 for url
in url_list
:
3309 self
.__download
_wrapper
(self
.extract_info
)(
3310 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3312 return self
._download
_retcode
3314 def download_with_info_file(self
, info_filename
):
3315 with contextlib
.closing(fileinput
.FileInput(
3316 [info_filename
], mode
='r',
3317 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3318 # FileInput doesn't have a read method, we can't call json.load
3319 info
= self
.sanitize_info(json
.loads('\n'.join(f
)), self
.params
.get('clean_infojson', True))
3321 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3322 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3323 if not isinstance(e
, EntryNotInPlaylist
):
3324 self
.to_stderr('\r')
3325 webpage_url
= info
.get('webpage_url')
3326 if webpage_url
is not None:
3327 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3328 return self
.download([webpage_url
])
3331 return self
._download
_retcode
3334 def sanitize_info(info_dict
, remove_private_keys
=False):
3335 ''' Sanitize the infodict for converting to json '''
3336 if info_dict
is None:
3338 info_dict
.setdefault('epoch', int(time
.time()))
3339 info_dict
.setdefault('_type', 'video')
3341 if remove_private_keys
:
3342 reject
= lambda k
, v
: v
is None or k
.startswith('__') or k
in {
3343 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3344 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3347 reject
= lambda k
, v
: False
3350 if isinstance(obj
, dict):
3351 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3352 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3353 return list(map(filter_fn
, obj
))
3354 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3359 return filter_fn(info_dict
)
3362 def filter_requested_info(info_dict
, actually_filter
=True):
3363 ''' Alias of sanitize_info for backward compatibility '''
3364 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3366 def _delete_downloaded_files(self
, *files_to_delete
, info
={}, msg
=None):
3367 for filename
in set(filter(None, files_to_delete
)):
3369 self
.to_screen(msg
% filename
)
3373 self
.report_warning(f
'Unable to delete file {filename}')
3374 if filename
in info
.get('__files_to_move', []): # NB: Delete even if None
3375 del info
['__files_to_move'][filename
]
3378 def post_extract(info_dict
):
3379 def actual_post_extract(info_dict
):
3380 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3381 for video_dict
in info_dict
.get('entries', {}):
3382 actual_post_extract(video_dict
or {})
3385 post_extractor
= info_dict
.pop('__post_extractor', None) or (lambda: {})
3386 info_dict
.update(post_extractor())
3388 actual_post_extract(info_dict
or {})
3390 def run_pp(self
, pp
, infodict
):
3391 files_to_delete
= []
3392 if '__files_to_move' not in infodict
:
3393 infodict
['__files_to_move'] = {}
3395 files_to_delete
, infodict
= pp
.run(infodict
)
3396 except PostProcessingError
as e
:
3397 # Must be True and not 'only_download'
3398 if self
.params
.get('ignoreerrors') is True:
3399 self
.report_error(e
)
3403 if not files_to_delete
:
3405 if self
.params
.get('keepvideo', False):
3406 for f
in files_to_delete
:
3407 infodict
['__files_to_move'].setdefault(f
, '')
3409 self
._delete
_downloaded
_files
(
3410 *files_to_delete
, info
=infodict
, msg
='Deleting original file %s (pass -k to keep)')
3413 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3414 self
._forceprint
(key
, info
)
3415 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3416 info
= self
.run_pp(pp
, info
)
3419 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3420 info
= dict(ie_info
)
3421 info
['__files_to_move'] = files_to_move
or {}
3423 info
= self
.run_all_pps(key
, info
)
3424 except PostProcessingError
as err
:
3425 msg
= f
'Preprocessing: {err}'
3426 info
.setdefault('__pending_error', msg
)
3427 self
.report_error(msg
, is_error
=False)
3428 return info
, info
.pop('__files_to_move', None)
3430 def post_process(self
, filename
, info
, files_to_move
=None):
3431 """Run all the postprocessors on the given file."""
3432 info
['filepath'] = filename
3433 info
['__files_to_move'] = files_to_move
or {}
3434 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3435 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3436 del info
['__files_to_move']
3437 return self
.run_all_pps('after_move', info
)
3439 def _make_archive_id(self
, info_dict
):
3440 video_id
= info_dict
.get('id')
3443 # Future-proof against any change in case
3444 # and backwards compatibility with prior versions
3445 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3446 if extractor
is None:
3447 url
= str_or_none(info_dict
.get('url'))
3450 # Try to find matching extractor for the URL and take its ie_key
3451 for ie_key
, ie
in self
._ies
.items():
3452 if ie
.suitable(url
):
3457 return f
'{extractor.lower()} {video_id}'
3459 def in_download_archive(self
, info_dict
):
3460 fn
= self
.params
.get('download_archive')
3464 vid_id
= self
._make
_archive
_id
(info_dict
)
3466 return False # Incomplete video information
3468 return vid_id
in self
.archive
3470 def record_download_archive(self
, info_dict
):
3471 fn
= self
.params
.get('download_archive')
3474 vid_id
= self
._make
_archive
_id
(info_dict
)
3476 self
.write_debug(f
'Adding to archive: {vid_id}')
3477 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3478 archive_file
.write(vid_id
+ '\n')
3479 self
.archive
.add(vid_id
)
3482 def format_resolution(format
, default
='unknown'):
3483 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3485 if format
.get('resolution') is not None:
3486 return format
['resolution']
3487 if format
.get('width') and format
.get('height'):
3488 return '%dx%d' % (format
['width'], format
['height'])
3489 elif format
.get('height'):
3490 return '%sp' % format
['height']
3491 elif format
.get('width'):
3492 return '%dx?' % format
['width']
3495 def _list_format_headers(self
, *headers
):
3496 if self
.params
.get('listformats_table', True) is not False:
3497 return [self
._format
_out
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3500 def _format_note(self
, fdict
):
3502 if fdict
.get('ext') in ['f4f', 'f4m']:
3503 res
+= '(unsupported)'
3504 if fdict
.get('language'):
3507 res
+= '[%s]' % fdict
['language']
3508 if fdict
.get('format_note') is not None:
3511 res
+= fdict
['format_note']
3512 if fdict
.get('tbr') is not None:
3515 res
+= '%4dk' % fdict
['tbr']
3516 if fdict
.get('container') is not None:
3519 res
+= '%s container' % fdict
['container']
3520 if (fdict
.get('vcodec') is not None
3521 and fdict
.get('vcodec') != 'none'):
3524 res
+= fdict
['vcodec']
3525 if fdict
.get('vbr') is not None:
3527 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3529 if fdict
.get('vbr') is not None:
3530 res
+= '%4dk' % fdict
['vbr']
3531 if fdict
.get('fps') is not None:
3534 res
+= '%sfps' % fdict
['fps']
3535 if fdict
.get('acodec') is not None:
3538 if fdict
['acodec'] == 'none':
3541 res
+= '%-5s' % fdict
['acodec']
3542 elif fdict
.get('abr') is not None:
3546 if fdict
.get('abr') is not None:
3547 res
+= '@%3dk' % fdict
['abr']
3548 if fdict
.get('asr') is not None:
3549 res
+= ' (%5dHz)' % fdict
['asr']
3550 if fdict
.get('filesize') is not None:
3553 res
+= format_bytes(fdict
['filesize'])
3554 elif fdict
.get('filesize_approx') is not None:
3557 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3560 def render_formats_table(self
, info_dict
):
3561 if not info_dict
.get('formats') and not info_dict
.get('url'):
3564 formats
= info_dict
.get('formats', [info_dict
])
3565 if not self
.params
.get('listformats_table', True) is not False:
3568 format_field(f
, 'format_id'),
3569 format_field(f
, 'ext'),
3570 self
.format_resolution(f
),
3571 self
._format
_note
(f
)
3572 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3573 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3575 delim
= self
._format
_out
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3578 self
._format
_out
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3579 format_field(f
, 'ext'),
3580 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3581 format_field(f
, 'fps', '\t%d'),
3582 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3584 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~\t%s', func
=format_bytes
),
3585 format_field(f
, 'tbr', '\t%dk'),
3586 shorten_protocol_name(f
.get('protocol', '')),
3588 format_field(f
, 'vcodec', default
='unknown').replace(
3589 'none', 'images' if f
.get('acodec') == 'none'
3590 else self
._format
_out
('audio only', self
.Styles
.SUPPRESS
)),
3591 format_field(f
, 'vbr', '\t%dk'),
3592 format_field(f
, 'acodec', default
='unknown').replace(
3593 'none', '' if f
.get('vcodec') == 'none'
3594 else self
._format
_out
('video only', self
.Styles
.SUPPRESS
)),
3595 format_field(f
, 'abr', '\t%dk'),
3596 format_field(f
, 'asr', '\t%dHz'),
3598 self
._format
_out
('UNSUPPORTED', 'light red') if f
.get('ext') in ('f4f', 'f4m') else None,
3599 format_field(f
, 'language', '[%s]'),
3600 join_nonempty(format_field(f
, 'format_note'),
3601 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3604 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3605 header_line
= self
._list
_format
_headers
(
3606 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3607 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3609 return render_table(
3610 header_line
, table
, hide_empty
=True,
3611 delim
=self
._format
_out
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3613 def render_thumbnails_table(self
, info_dict
):
3614 thumbnails
= list(info_dict
.get('thumbnails') or [])
3617 return render_table(
3618 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3619 [[t
.get('id'), t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
])
3621 def render_subtitles_table(self
, video_id
, subtitles
):
3622 def _row(lang
, formats
):
3623 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3624 if len(set(names
)) == 1:
3625 names
= [] if names
[0] == 'unknown' else names
[:1]
3626 return [lang
, ', '.join(names
), ', '.join(exts
)]
3630 return render_table(
3631 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3632 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3635 def __list_table(self
, video_id
, name
, func
, *args
):
3638 self
.to_screen(f
'{video_id} has no {name}')
3640 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3641 self
.to_stdout(table
)
3643 def list_formats(self
, info_dict
):
3644 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3646 def list_thumbnails(self
, info_dict
):
3647 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3649 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3650 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3652 def urlopen(self
, req
):
3653 """ Start an HTTP download """
3654 if isinstance(req
, str):
3655 req
= sanitized_Request(req
)
3656 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
3658 def print_debug_header(self
):
3659 if not self
.params
.get('verbose'):
3662 def get_encoding(stream
):
3663 ret
= str(getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__))
3664 if not supports_terminal_sequences(stream
):
3665 from .utils
import WINDOWS_VT_MODE
# Must be imported locally
3666 ret
+= ' (No VT)' if WINDOWS_VT_MODE
is False else ' (No ANSI)'
3669 encoding_str
= 'Encodings: locale %s, fs %s, pref %s, %s' % (
3670 locale
.getpreferredencoding(),
3671 sys
.getfilesystemencoding(),
3672 self
.get_encoding(),
3674 f
'{key} {get_encoding(stream)}' for key
, stream
in self
._out
_files
3675 if stream
is not None and key
!= 'console')
3678 logger
= self
.params
.get('logger')
3680 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3681 write_debug(encoding_str
)
3683 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
3684 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3686 source
= detect_variant()
3687 write_debug(join_nonempty(
3688 'yt-dlp version', __version__
,
3689 f
'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD
else '',
3690 '' if source
== 'unknown' else f
'({source})',
3692 if not _LAZY_LOADER
:
3693 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3694 write_debug('Lazy loading extractors is forcibly disabled')
3696 write_debug('Lazy loading extractors is disabled')
3697 if plugin_extractors
or plugin_postprocessors
:
3698 write_debug('Plugins: %s' % [
3699 '%s%s' % (klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3700 for name
, klass
in itertools
.chain(plugin_extractors
.items(), plugin_postprocessors
.items())])
3701 if self
.params
['compat_opts']:
3702 write_debug('Compatibility options: %s' % ', '.join(self
.params
['compat_opts']))
3704 if source
== 'source':
3707 ['git', 'rev-parse', '--short', 'HEAD'],
3708 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
3709 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
3710 out
, err
= sp
.communicate_or_kill()
3711 out
= out
.decode().strip()
3712 if re
.match('[0-9a-f]+', out
):
3713 write_debug('Git HEAD: %s' % out
)
3715 with contextlib
.suppress(Exception):
3718 def python_implementation():
3719 impl_name
= platform
.python_implementation()
3720 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
3721 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
3724 write_debug('Python version %s (%s %s) - %s' % (
3725 platform
.python_version(),
3726 python_implementation(),
3727 platform
.architecture()[0],
3730 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
3731 ffmpeg_features
= {key for key, val in ffmpeg_features.items() if val}
3733 exe_versions
['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features
))
3735 exe_versions
['rtmpdump'] = rtmpdump_version()
3736 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3737 exe_str
= ', '.join(
3738 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3740 write_debug('exe versions: %s' % exe_str
)
3742 from .compat
.compat_utils
import get_package_info
3743 from .dependencies
import available_dependencies
3745 write_debug('Optional libraries: %s' % (', '.join(sorted({
3746 join_nonempty(*get_package_info(m
)) for m
in available_dependencies
.values()
3749 self
._setup
_opener
()
3751 for handler
in self
._opener
.handlers
:
3752 if hasattr(handler
, 'proxies'):
3753 proxy_map
.update(handler
.proxies
)
3754 write_debug(f
'Proxy map: {proxy_map}')
3757 if False and self
.params
.get('call_home'):
3758 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode()
3759 write_debug('Public IP address: %s' % ipaddr
)
3760 latest_version
= self
.urlopen(
3761 'https://yt-dl.org/latest/version').read().decode()
3762 if version_tuple(latest_version
) > version_tuple(__version__
):
3763 self
.report_warning(
3764 'You are using an outdated version (newest version: %s)! '
3765 'See https://yt-dl.org/update if you need help updating.' %
3768 def _setup_opener(self
):
3769 if hasattr(self
, '_opener'):
3771 timeout_val
= self
.params
.get('socket_timeout')
3772 self
._socket
_timeout
= 20 if timeout_val
is None else float(timeout_val
)
3774 opts_cookiesfrombrowser
= self
.params
.get('cookiesfrombrowser')
3775 opts_cookiefile
= self
.params
.get('cookiefile')
3776 opts_proxy
= self
.params
.get('proxy')
3778 self
.cookiejar
= load_cookies(opts_cookiefile
, opts_cookiesfrombrowser
, self
)
3780 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
3781 if opts_proxy
is not None:
3782 if opts_proxy
== '':
3785 proxies
= {'http': opts_proxy, 'https': opts_proxy}
3787 proxies
= compat_urllib_request
.getproxies()
3788 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3789 if 'http' in proxies
and 'https' not in proxies
:
3790 proxies
['https'] = proxies
['http']
3791 proxy_handler
= PerRequestProxyHandler(proxies
)
3793 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
3794 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
3795 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
3796 redirect_handler
= YoutubeDLRedirectHandler()
3797 data_handler
= urllib
.request
.DataHandler()
3799 # When passing our own FileHandler instance, build_opener won't add the
3800 # default FileHandler and allows us to disable the file protocol, which
3801 # can be used for malicious purposes (see
3802 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3803 file_handler
= compat_urllib_request
.FileHandler()
3805 def file_open(*args
, **kwargs
):
3806 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3807 file_handler
.file_open
= file_open
3809 opener
= compat_urllib_request
.build_opener(
3810 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
3812 # Delete the default user-agent header, which would otherwise apply in
3813 # cases where our custom HTTP handler doesn't come into play
3814 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3815 opener
.addheaders
= []
3816 self
._opener
= opener
3818 def encode(self
, s
):
3819 if isinstance(s
, bytes):
3820 return s
# Already encoded
3823 return s
.encode(self
.get_encoding())
3824 except UnicodeEncodeError as err
:
3825 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
3828 def get_encoding(self
):
3829 encoding
= self
.params
.get('encoding')
3830 if encoding
is None:
3831 encoding
= preferredencoding()
3834 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
3835 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3836 if overwrite
is None:
3837 overwrite
= self
.params
.get('overwrites', True)
3838 if not self
.params
.get('writeinfojson'):
3841 self
.write_debug(f
'Skipping writing {label} infojson')
3843 elif not self
._ensure
_dir
_exists
(infofn
):
3845 elif not overwrite
and os
.path
.exists(infofn
):
3846 self
.to_screen(f
'[info] {label.title()} metadata is already present')
3849 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
3851 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
3854 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
3857 def _write_description(self
, label
, ie_result
, descfn
):
3858 ''' Write description and returns True = written, False = skip, None = error '''
3859 if not self
.params
.get('writedescription'):
3862 self
.write_debug(f
'Skipping writing {label} description')
3864 elif not self
._ensure
_dir
_exists
(descfn
):
3866 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
3867 self
.to_screen(f
'[info] {label.title()} description is already present')
3868 elif ie_result
.get('description') is None:
3869 self
.report_warning(f
'There\'s no {label} description to write')
3873 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
3874 with open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
3875 descfile
.write(ie_result
['description'])
3877 self
.report_error(f
'Cannot write {label} description file {descfn}')
3881 def _write_subtitles(self
, info_dict
, filename
):
3882 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3884 subtitles
= info_dict
.get('requested_subtitles')
3885 if not subtitles
or not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
3886 # subtitles download errors are already managed as troubles in relevant IE
3887 # that way it will silently go on when used with unsupporting IE
3890 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
3891 if not sub_filename_base
:
3892 self
.to_screen('[info] Skipping writing video subtitles')
3894 for sub_lang
, sub_info
in subtitles
.items():
3895 sub_format
= sub_info
['ext']
3896 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
3897 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
3898 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
3900 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3901 sub_info
['filepath'] = existing_sub
3902 ret
.append((existing_sub
, sub_filename_final
))
3905 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
3906 if sub_info
.get('data') is not None:
3908 # Use newline='' to prevent conversion of newline characters
3909 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3910 with open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
3911 subfile
.write(sub_info
['data'])
3912 sub_info
['filepath'] = sub_filename
3913 ret
.append((sub_filename
, sub_filename_final
))
3916 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
3920 sub_copy
= sub_info
.copy()
3921 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
3922 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
3923 sub_info
['filepath'] = sub_filename
3924 ret
.append((sub_filename
, sub_filename_final
))
3925 except (DownloadError
, ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
3926 msg
= f
'Unable to download video subtitles for {sub_lang!r}: {err}'
3927 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
3928 if not self
.params
.get('ignoreerrors'):
3929 self
.report_error(msg
)
3930 raise DownloadError(msg
)
3931 self
.report_warning(msg
)
3934 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
3935 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3936 write_all
= self
.params
.get('write_all_thumbnails', False)
3937 thumbnails
, ret
= [], []
3938 if write_all
or self
.params
.get('writethumbnail', False):
3939 thumbnails
= info_dict
.get('thumbnails') or []
3940 multiple
= write_all
and len(thumbnails
) > 1
3942 if thumb_filename_base
is None:
3943 thumb_filename_base
= filename
3944 if thumbnails
and not thumb_filename_base
:
3945 self
.write_debug(f
'Skipping writing {label} thumbnail')
3948 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
3949 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
3950 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
3951 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
3952 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
3954 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
3956 self
.to_screen('[info] %s is already present' % (
3957 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize())
3958 t
['filepath'] = existing_thumb
3959 ret
.append((existing_thumb
, thumb_filename_final
))
3961 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
3963 uf
= self
.urlopen(sanitized_Request(t
['url'], headers
=t
.get('http_headers', {})))
3964 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
3965 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
3966 shutil
.copyfileobj(uf
, thumbf
)
3967 ret
.append((thumb_filename
, thumb_filename_final
))
3968 t
['filepath'] = thumb_filename
3969 except network_exceptions
as err
:
3971 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
3972 if ret
and not write_all
: