4 from __future__
import absolute_import
, unicode_literals
31 from string
import ascii_letters
35 compat_get_terminal_size
,
42 compat_tokenize_tokenize
,
44 compat_urllib_request
,
45 compat_urllib_request_DataHandler
,
46 windows_enable_vt_mode
,
48 from .cookies
import load_cookies
70 format_decimal_suffix
,
92 PerRequestProxyHandler
,
100 register_socks_protocols
,
101 RejectedVideoReached
,
102 remove_terminal_sequences
,
116 supports_terminal_sequences
,
121 UnavailableVideoError
,
127 YoutubeDLCookieProcessor
,
129 YoutubeDLRedirectHandler
,
131 from .cache
import Cache
132 from .minicurses
import format_text
133 from .extractor
import (
134 gen_extractor_classes
,
137 _PLUGIN_CLASSES
as plugin_extractors
139 from .extractor
.openload
import PhantomJSwrapper
140 from .downloader
import (
142 get_suitable_downloader
,
143 shorten_protocol_name
145 from .downloader
.rtmp
import rtmpdump_version
146 from .postprocessor
import (
149 FFmpegFixupDuplicateMoovPP
,
150 FFmpegFixupDurationPP
,
153 FFmpegFixupStretchedPP
,
154 FFmpegFixupTimestampPP
,
157 MoveFilesAfterDownloadPP
,
158 _PLUGIN_CLASSES
as plugin_postprocessors
160 from .update
import detect_variant
161 from .version
import __version__
, RELEASE_GIT_HEAD
163 if compat_os_name
== 'nt':
167 class YoutubeDL(object):
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 verbose: Print additional info to stdout.
202 quiet: Do not print messages to stdout.
203 no_warnings: Do not print out anything for warnings.
204 forceprint: A dict with keys WHEN mapped to a list of templates to
205 print to stdout. The allowed keys are video or any of the
206 items in utils.POSTPROCESS_WHEN.
207 For compatibility, a single list is also accepted
208 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
209 a list of tuples with (template, filename)
210 forceurl: Force printing final URL. (Deprecated)
211 forcetitle: Force printing title. (Deprecated)
212 forceid: Force printing ID. (Deprecated)
213 forcethumbnail: Force printing thumbnail URL. (Deprecated)
214 forcedescription: Force printing description. (Deprecated)
215 forcefilename: Force printing final filename. (Deprecated)
216 forceduration: Force printing duration. (Deprecated)
217 forcejson: Force printing info_dict as JSON.
218 dump_single_json: Force printing the info_dict of the whole playlist
219 (or video) as a single JSON line.
220 force_write_download_archive: Force writing download archive regardless
221 of 'skip_download' or 'simulate'.
222 simulate: Do not download the video files. If unset (or None),
223 simulate only if listsubtitles, listformats or list_thumbnails is used
224 format: Video format code. see "FORMAT SELECTION" for more details.
225 You can also pass a function. The function takes 'ctx' as
226 argument and returns the formats to download.
227 See "build_format_selector" for an implementation
228 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
229 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
230 extracting metadata even if the video is not actually
231 available for download (experimental)
232 format_sort: A list of fields by which to sort the video formats.
233 See "Sorting Formats" for more details.
234 format_sort_force: Force the given format_sort. see "Sorting Formats"
236 allow_multiple_video_streams: Allow multiple video streams to be merged
238 allow_multiple_audio_streams: Allow multiple audio streams to be merged
240 check_formats Whether to test if the formats are downloadable.
241 Can be True (check all), False (check none),
242 'selected' (check selected formats),
243 or None (check only if requested by extractor)
244 paths: Dictionary of output paths. The allowed keys are 'home'
245 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
246 outtmpl: Dictionary of templates for output names. Allowed keys
247 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
248 For compatibility with youtube-dl, a single string can also be used
249 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
250 restrictfilenames: Do not allow "&" and spaces in file names
251 trim_file_name: Limit length of filename (extension excluded)
252 windowsfilenames: Force the filenames to be windows compatible
253 ignoreerrors: Do not stop on download/postprocessing errors.
254 Can be 'only_download' to ignore only download errors.
255 Default is 'only_download' for CLI, but False for API
256 skip_playlist_after_errors: Number of allowed failures until the rest of
257 the playlist is skipped
258 force_generic_extractor: Force downloader to use the generic extractor
259 overwrites: Overwrite all video and metadata files if True,
260 overwrite only non-video files if None
261 and don't overwrite any file if False
262 For compatibility with youtube-dl,
263 "nooverwrites" may also be used instead
264 playliststart: Playlist item to start at.
265 playlistend: Playlist item to end at.
266 playlist_items: Specific indices of playlist to download.
267 playlistreverse: Download playlist items in reverse order.
268 playlistrandom: Download playlist items in random order.
269 matchtitle: Download only matching titles.
270 rejecttitle: Reject downloads for matching titles.
271 logger: Log messages to a logging.Logger instance.
272 logtostderr: Log messages to stderr instead of stdout.
273 consoletitle: Display progress in console window's titlebar.
274 writedescription: Write the video description to a .description file
275 writeinfojson: Write the video description to a .info.json file
276 clean_infojson: Remove private fields from the infojson
277 getcomments: Extract video comments. This will not be written to disk
278 unless writeinfojson is also given
279 writeannotations: Write the video annotations to a .annotations.xml file
280 writethumbnail: Write the thumbnail image to a file
281 allow_playlist_files: Whether to write playlists' description, infojson etc
282 also to disk when using the 'write*' options
283 write_all_thumbnails: Write all thumbnail formats to files
284 writelink: Write an internet shortcut file, depending on the
285 current platform (.url/.webloc/.desktop)
286 writeurllink: Write a Windows internet shortcut file (.url)
287 writewebloclink: Write a macOS internet shortcut file (.webloc)
288 writedesktoplink: Write a Linux internet shortcut file (.desktop)
289 writesubtitles: Write the video subtitles to a file
290 writeautomaticsub: Write the automatically generated subtitles to a file
291 allsubtitles: Deprecated - Use subtitleslangs = ['all']
292 Downloads all the subtitles of the video
293 (requires writesubtitles or writeautomaticsub)
294 listsubtitles: Lists all available subtitles for the video
295 subtitlesformat: The format code for subtitles
296 subtitleslangs: List of languages of the subtitles to download (can be regex).
297 The list may contain "all" to refer to all the available
298 subtitles. The language can be prefixed with a "-" to
299 exclude it from the requested languages. Eg: ['all', '-live_chat']
300 keepvideo: Keep the video file after post-processing
301 daterange: A DateRange object, download only if the upload_date is in the range.
302 skip_download: Skip the actual download of the video file
303 cachedir: Location of the cache files in the filesystem.
304 False to disable filesystem cache.
305 noplaylist: Download single video instead of a playlist if in doubt.
306 age_limit: An integer representing the user's age in years.
307 Unsuitable videos for the given age are skipped.
308 min_views: An integer representing the minimum view count the video
309 must have in order to not be skipped.
310 Videos without view count information are always
311 downloaded. None for no limit.
312 max_views: An integer representing the maximum view count.
313 Videos that are more popular than that are not
315 Videos without view count information are always
316 downloaded. None for no limit.
317 download_archive: File name of a file where all downloads are recorded.
318 Videos already present in the file are not downloaded
320 break_on_existing: Stop the download process after attempting to download a
321 file that is in the archive.
322 break_on_reject: Stop the download process when encountering a video that
323 has been filtered out.
324 break_per_url: Whether break_on_reject and break_on_existing
325 should act on each input URL as opposed to for the entire queue
326 cookiefile: File name where cookies should be read from and dumped to
327 cookiesfrombrowser: A tuple containing the name of the browser, the profile
328 name/pathfrom where cookies are loaded, and the name of the
329 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
330 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
331 support RFC 5746 secure renegotiation
332 nocheckcertificate: Do not verify SSL certificates
333 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
334 At the moment, this is only supported by YouTube.
335 proxy: URL of the proxy server to use
336 geo_verification_proxy: URL of the proxy to use for IP address verification
337 on geo-restricted sites.
338 socket_timeout: Time to wait for unresponsive hosts, in seconds
339 bidi_workaround: Work around buggy terminals without bidirectional text
340 support, using fridibi
341 debug_printtraffic:Print out sent and received HTTP traffic
342 include_ads: Download ads as well (deprecated)
343 default_search: Prepend this string if an input url is not valid.
344 'auto' for elaborate guessing
345 encoding: Use this encoding instead of the system-specified.
346 extract_flat: Do not resolve URLs, return the immediate result.
347 Pass in 'in_playlist' to only show this behavior for
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 post_hooks: Deprecated - Register a custom postprocessor instead
359 A list of functions that get called as the final step
360 for each video file, after all postprocessors have been
361 called. The filename will be passed as the only argument.
362 progress_hooks: A list of functions that get called on download
363 progress, with a dictionary with the entries
364 * status: One of "downloading", "error", or "finished".
365 Check this first and ignore unknown values.
366 * info_dict: The extracted info_dict
368 If status is one of "downloading", or "finished", the
369 following properties may also be present:
370 * filename: The final filename (always present)
371 * tmpfilename: The filename we're currently writing to
372 * downloaded_bytes: Bytes on disk
373 * total_bytes: Size of the whole file, None if unknown
374 * total_bytes_estimate: Guess of the eventual file size,
376 * elapsed: The number of seconds since download started.
377 * eta: The estimated time in seconds, None if unknown
378 * speed: The download speed in bytes/second, None if
380 * fragment_index: The counter of the currently
381 downloaded video fragment.
382 * fragment_count: The number of fragments (= individual
383 files that will be merged)
385 Progress hooks are guaranteed to be called at least once
386 (with status "finished") if the download is successful.
387 postprocessor_hooks: A list of functions that get called on postprocessing
388 progress, with a dictionary with the entries
389 * status: One of "started", "processing", or "finished".
390 Check this first and ignore unknown values.
391 * postprocessor: Name of the postprocessor
392 * info_dict: The extracted info_dict
394 Progress hooks are guaranteed to be called at least twice
395 (with status "started" and "finished") if the processing is successful.
396 merge_output_format: Extension to use when merging formats.
397 final_ext: Expected final extension; used to detect when the file was
398 already downloaded and converted
399 fixup: Automatically correct known faults of the file.
401 - "never": do nothing
402 - "warn": only emit a warning
403 - "detect_or_warn": check whether we can do anything
404 about it, warn otherwise (default)
405 source_address: Client-side IP address to bind to.
406 call_home: Boolean, true iff we are allowed to contact the
407 yt-dlp servers for debugging. (BROKEN)
408 sleep_interval_requests: Number of seconds to sleep between requests
410 sleep_interval: Number of seconds to sleep before each download when
411 used alone or a lower bound of a range for randomized
412 sleep before each download (minimum possible number
413 of seconds to sleep) when used along with
415 max_sleep_interval:Upper bound of a range for randomized sleep before each
416 download (maximum possible number of seconds to sleep).
417 Must only be used along with sleep_interval.
418 Actual sleep time will be a random float from range
419 [sleep_interval; max_sleep_interval].
420 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
421 listformats: Print an overview of available video formats and exit.
422 list_thumbnails: Print a table of all thumbnails and exit.
423 match_filter: A function that gets called with the info_dict of
425 If it returns a message, the video is ignored.
426 If it returns None, the video is downloaded.
427 match_filter_func in utils.py is one example for this.
428 no_color: Do not emit color codes in output.
429 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
432 Two-letter ISO 3166-2 country code that will be used for
433 explicit geographic restriction bypassing via faking
434 X-Forwarded-For HTTP header
436 IP range in CIDR notation that will be used similarly to
439 The following options determine which downloader is picked:
440 external_downloader: A dictionary of protocol keys and the executable of the
441 external downloader to use for it. The allowed protocols
442 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
443 Set the value to 'native' to use the native downloader
444 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
445 or {'m3u8': 'ffmpeg'} instead.
446 Use the native HLS downloader instead of ffmpeg/avconv
447 if True, otherwise use ffmpeg/avconv if False, otherwise
448 use downloader suggested by extractor if None.
449 compat_opts: Compatibility options. See "Differences in default behavior".
450 The following options do not work when used through the API:
451 filename, abort-on-error, multistreams, no-live-chat, format-sort
452 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
453 Refer __init__.py for their implementation
454 progress_template: Dictionary of templates for progress outputs.
455 Allowed keys are 'download', 'postprocess',
456 'download-title' (console title) and 'postprocess-title'.
457 The template is mapped on a dictionary with keys 'progress' and 'info'
459 The following parameters are not used by YoutubeDL itself, they are used by
460 the downloader (see yt_dlp/downloader/common.py):
461 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
462 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
463 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
464 external_downloader_args, concurrent_fragment_downloads.
466 The following options are used by the post processors:
467 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
468 otherwise prefer ffmpeg. (avconv support is deprecated)
469 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
470 to the binary or its containing directory.
471 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
472 and a list of additional command-line arguments for the
473 postprocessor/executable. The dict can also have "PP+EXE" keys
474 which are used when the given exe is used by the given PP.
475 Use 'default' as the name for arguments to passed to all PP
476 For compatibility with youtube-dl, a single list of args
479 The following options are used by the extractors:
480 extractor_retries: Number of times to retry for known errors
481 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
482 hls_split_discontinuity: Split HLS playlists to different formats at
483 discontinuities such as ad breaks (default: False)
484 extractor_args: A dictionary of arguments to be passed to the extractors.
485 See "EXTRACTOR ARGUMENTS" for details.
486 Eg: {'youtube': {'skip': ['dash', 'hls']}}
487 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
488 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
489 If True (default), DASH manifests and related
490 data will be downloaded and processed by extractor.
491 You can reduce network I/O by disabling it if you don't
492 care about DASH. (only for youtube)
493 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
494 If True (default), HLS manifests and related
495 data will be downloaded and processed by extractor.
496 You can reduce network I/O by disabling it if you don't
497 care about HLS. (only for youtube)
500 _NUMERIC_FIELDS
= set((
501 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
502 'timestamp', 'release_timestamp',
503 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
504 'average_rating', 'comment_count', 'age_limit',
505 'start_time', 'end_time',
506 'chapter_number', 'season_number', 'episode_number',
507 'track_number', 'disc_number', 'release_year',
510 _format_selection_exts
= {
511 'audio': {'m4a', 'mp3', 'ogg', 'aac'}
,
512 'video': {'mp4', 'flv', 'webm', '3gp'}
,
513 'storyboards': {'mhtml'}
,
518 _pps
= {k: [] for k in POSTPROCESS_WHEN}
519 _printed_messages
= set()
520 _first_webpage_request
= True
521 _download_retcode
= None
522 _num_downloads
= None
524 _playlist_urls
= set()
527 def __init__(self
, params
=None, auto_init
=True):
528 """Create a FileDownloader object with the given options.
529 @param auto_init Whether to load the default extractors and print header (if verbose).
530 Set to 'no_verbose_header' to not print the header
535 self
._ies
_instances
= {}
536 self
._pps
= {k: [] for k in POSTPROCESS_WHEN}
537 self
._printed
_messages
= set()
538 self
._first
_webpage
_request
= True
539 self
._post
_hooks
= []
540 self
._progress
_hooks
= []
541 self
._postprocessor
_hooks
= []
542 self
._download
_retcode
= 0
543 self
._num
_downloads
= 0
545 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
546 self
._err
_file
= sys
.stderr
548 self
.cache
= Cache(self
)
550 windows_enable_vt_mode()
551 self
._allow
_colors
= {
552 'screen': not self
.params
.get('no_color') and supports_terminal_sequences(self
._screen
_file
),
553 'err': not self
.params
.get('no_color') and supports_terminal_sequences(self
._err
_file
),
556 if sys
.version_info
< (3, 6):
558 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys
.version_info
[:2])
560 if self
.params
.get('allow_unplayable_formats'):
562 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
563 'This is a developer option intended for debugging. \n'
564 ' If you experience any issues while using this option, '
565 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
567 def check_deprecated(param
, option
, suggestion
):
568 if self
.params
.get(param
) is not None:
569 self
.report_warning('%s is deprecated. Use %s instead' % (option
, suggestion
))
573 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
574 if self
.params
.get('geo_verification_proxy') is None:
575 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
577 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
578 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
579 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
581 for msg
in self
.params
.get('_warnings', []):
582 self
.report_warning(msg
)
583 for msg
in self
.params
.get('_deprecation_warnings', []):
584 self
.deprecation_warning(msg
)
586 if 'list-formats' in self
.params
.get('compat_opts', []):
587 self
.params
['listformats_table'] = False
589 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
590 # nooverwrites was unnecessarily changed to overwrites
591 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
592 # This ensures compatibility with both keys
593 self
.params
['overwrites'] = not self
.params
['nooverwrites']
594 elif self
.params
.get('overwrites') is None:
595 self
.params
.pop('overwrites', None)
597 self
.params
['nooverwrites'] = not self
.params
['overwrites']
599 self
.params
.setdefault('forceprint', {})
600 self
.params
.setdefault('print_to_file', {})
602 # Compatibility with older syntax
603 if not isinstance(params
['forceprint'], dict):
604 self
.params
['forceprint'] = {'video': params['forceprint']}
606 if self
.params
.get('bidi_workaround', False):
609 master
, slave
= pty
.openpty()
610 width
= compat_get_terminal_size().columns
614 width_args
= ['-w', str(width
)]
616 stdin
=subprocess
.PIPE
,
618 stderr
=self
._err
_file
)
620 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
622 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
623 self
._output
_channel
= os
.fdopen(master
, 'rb')
624 except OSError as ose
:
625 if ose
.errno
== errno
.ENOENT
:
627 'Could not find fribidi executable, ignoring --bidi-workaround. '
628 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
632 if (sys
.platform
!= 'win32'
633 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
634 and not self
.params
.get('restrictfilenames', False)):
635 # Unicode filesystem API will throw errors (#1474, #13027)
637 'Assuming --restrict-filenames since file system encoding '
638 'cannot encode all characters. '
639 'Set the LC_ALL environment variable to fix this.')
640 self
.params
['restrictfilenames'] = True
642 self
.outtmpl_dict
= self
.parse_outtmpl()
644 # Creating format selector here allows us to catch syntax errors before the extraction
645 self
.format_selector
= (
646 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
647 else self
.params
['format'] if callable(self
.params
['format'])
648 else self
.build_format_selector(self
.params
['format']))
653 if auto_init
!= 'no_verbose_header':
654 self
.print_debug_header()
655 self
.add_default_info_extractors()
658 'post_hooks': self
.add_post_hook
,
659 'progress_hooks': self
.add_progress_hook
,
660 'postprocessor_hooks': self
.add_postprocessor_hook
,
662 for opt
, fn
in hooks
.items():
663 for ph
in self
.params
.get(opt
, []):
666 for pp_def_raw
in self
.params
.get('postprocessors', []):
667 pp_def
= dict(pp_def_raw
)
668 when
= pp_def
.pop('when', 'post_process')
669 self
.add_post_processor(
670 get_postprocessor(pp_def
.pop('key'))(self
, **compat_kwargs(pp_def
)),
673 register_socks_protocols()
675 def preload_download_archive(fn
):
676 """Preload the archive, if any is specified"""
679 self
.write_debug(f
'Loading archive file {fn!r}')
681 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
682 for line
in archive_file
:
683 self
.archive
.add(line
.strip())
684 except IOError as ioe
:
685 if ioe
.errno
!= errno
.ENOENT
:
691 preload_download_archive(self
.params
.get('download_archive'))
693 def warn_if_short_id(self
, argv
):
694 # short YouTube ID starting with dash?
696 i
for i
, a
in enumerate(argv
)
697 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
701 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
702 + ['--'] + [argv
[i
] for i
in idxs
]
705 'Long argument string detected. '
706 'Use -- to separate parameters and URLs, like this:\n%s' %
707 args_to_str(correct_argv
))
709 def add_info_extractor(self
, ie
):
710 """Add an InfoExtractor object to the end of the list."""
712 self
._ies
[ie_key
] = ie
713 if not isinstance(ie
, type):
714 self
._ies
_instances
[ie_key
] = ie
715 ie
.set_downloader(self
)
717 def _get_info_extractor_class(self
, ie_key
):
718 ie
= self
._ies
.get(ie_key
)
720 ie
= get_info_extractor(ie_key
)
721 self
.add_info_extractor(ie
)
724 def get_info_extractor(self
, ie_key
):
726 Get an instance of an IE with name ie_key, it will try to get one from
727 the _ies list, if there's no instance it will create a new one and add
728 it to the extractor list.
730 ie
= self
._ies
_instances
.get(ie_key
)
732 ie
= get_info_extractor(ie_key
)()
733 self
.add_info_extractor(ie
)
736 def add_default_info_extractors(self
):
738 Add the InfoExtractors returned by gen_extractors to the end of the list
740 for ie
in gen_extractor_classes():
741 self
.add_info_extractor(ie
)
743 def add_post_processor(self
, pp
, when
='post_process'):
744 """Add a PostProcessor object to the end of the chain."""
745 self
._pps
[when
].append(pp
)
746 pp
.set_downloader(self
)
748 def add_post_hook(self
, ph
):
749 """Add the post hook"""
750 self
._post
_hooks
.append(ph
)
752 def add_progress_hook(self
, ph
):
753 """Add the download progress hook"""
754 self
._progress
_hooks
.append(ph
)
756 def add_postprocessor_hook(self
, ph
):
757 """Add the postprocessing progress hook"""
758 self
._postprocessor
_hooks
.append(ph
)
759 for pps
in self
._pps
.values():
761 pp
.add_progress_hook(ph
)
763 def _bidi_workaround(self
, message
):
764 if not hasattr(self
, '_output_channel'):
767 assert hasattr(self
, '_output_process')
768 assert isinstance(message
, compat_str
)
769 line_count
= message
.count('\n') + 1
770 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
771 self
._output
_process
.stdin
.flush()
772 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
773 for _
in range(line_count
))
774 return res
[:-len('\n')]
776 def _write_string(self
, message
, out
=None, only_once
=False):
778 if message
in self
._printed
_messages
:
780 self
._printed
_messages
.add(message
)
781 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
783 def to_stdout(self
, message
, skip_eol
=False, quiet
=False):
784 """Print message to stdout"""
785 if self
.params
.get('logger'):
786 self
.params
['logger'].debug(message
)
787 elif not quiet
or self
.params
.get('verbose'):
789 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
790 self
._err
_file
if quiet
else self
._screen
_file
)
792 def to_stderr(self
, message
, only_once
=False):
793 """Print message to stderr"""
794 assert isinstance(message
, compat_str
)
795 if self
.params
.get('logger'):
796 self
.params
['logger'].error(message
)
798 self
._write
_string
('%s\n' % self
._bidi
_workaround
(message
), self
._err
_file
, only_once
=only_once
)
800 def to_console_title(self
, message
):
801 if not self
.params
.get('consoletitle', False):
803 message
= remove_terminal_sequences(message
)
804 if compat_os_name
== 'nt':
805 if ctypes
.windll
.kernel32
.GetConsoleWindow():
806 # c_wchar_p() might not be necessary if `message` is
807 # already of type unicode()
808 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
809 elif 'TERM' in os
.environ
:
810 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
812 def save_console_title(self
):
813 if not self
.params
.get('consoletitle', False):
815 if self
.params
.get('simulate'):
817 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
818 # Save the title on stack
819 self
._write
_string
('\033[22;0t', self
._screen
_file
)
821 def restore_console_title(self
):
822 if not self
.params
.get('consoletitle', False):
824 if self
.params
.get('simulate'):
826 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
827 # Restore the title from stack
828 self
._write
_string
('\033[23;0t', self
._screen
_file
)
831 self
.save_console_title()
834 def __exit__(self
, *args
):
835 self
.restore_console_title()
837 if self
.params
.get('cookiefile') is not None:
838 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
840 def trouble(self
, message
=None, tb
=None, is_error
=True):
841 """Determine action to take when a download problem appears.
843 Depending on if the downloader has been configured to ignore
844 download errors or not, this method may throw an exception or
845 not when errors are found, after printing the message.
847 @param tb If given, is additional traceback information
848 @param is_error Whether to raise error according to ignorerrors
850 if message
is not None:
851 self
.to_stderr(message
)
852 if self
.params
.get('verbose'):
854 if sys
.exc_info()[0]: # if .trouble has been called from an except block
856 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
857 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
858 tb
+= encode_compat_str(traceback
.format_exc())
860 tb_data
= traceback
.format_list(traceback
.extract_stack())
861 tb
= ''.join(tb_data
)
866 if not self
.params
.get('ignoreerrors'):
867 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
868 exc_info
= sys
.exc_info()[1].exc_info
870 exc_info
= sys
.exc_info()
871 raise DownloadError(message
, exc_info
)
872 self
._download
_retcode
= 1
874 def to_screen(self
, message
, skip_eol
=False):
875 """Print message to stdout if not in quiet mode"""
877 message
, skip_eol
, quiet
=self
.params
.get('quiet', False))
881 EMPHASIS
= 'light blue'
886 SUPPRESS
= 'light black'
888 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
891 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', 'ascii')
892 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
893 if fallback
is not None and text
!= original_text
:
895 if isinstance(f
, self
.Styles
):
897 return format_text(text
, f
) if allow_colors
else text
if fallback
is None else fallback
899 def _format_screen(self
, *args
, **kwargs
):
900 return self
._format
_text
(
901 self
._screen
_file
, self
._allow
_colors
['screen'], *args
, **kwargs
)
903 def _format_err(self
, *args
, **kwargs
):
904 return self
._format
_text
(
905 self
._err
_file
, self
._allow
_colors
['err'], *args
, **kwargs
)
907 def report_warning(self
, message
, only_once
=False):
909 Print the message to stderr, it will be prefixed with 'WARNING:'
910 If stderr is a tty file the 'WARNING:' will be colored
912 if self
.params
.get('logger') is not None:
913 self
.params
['logger'].warning(message
)
915 if self
.params
.get('no_warnings'):
917 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
919 def deprecation_warning(self
, message
):
920 if self
.params
.get('logger') is not None:
921 self
.params
['logger'].warning('DeprecationWarning: {message}')
923 self
.to_stderr(f
'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
925 def report_error(self
, message
, *args
, **kwargs
):
927 Do the same as trouble, but prefixes the message with 'ERROR:', colored
928 in red if stderr is a tty file.
930 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
932 def write_debug(self
, message
, only_once
=False):
933 '''Log debug message or Print message to stderr'''
934 if not self
.params
.get('verbose', False):
936 message
= '[debug] %s' % message
937 if self
.params
.get('logger'):
938 self
.params
['logger'].debug(message
)
940 self
.to_stderr(message
, only_once
)
942 def report_file_already_downloaded(self
, file_name
):
943 """Report file has already been fully downloaded."""
945 self
.to_screen('[download] %s has already been downloaded' % file_name
)
946 except UnicodeEncodeError:
947 self
.to_screen('[download] The file has already been downloaded')
949 def report_file_delete(self
, file_name
):
950 """Report that existing file will be deleted."""
952 self
.to_screen('Deleting existing file %s' % file_name
)
953 except UnicodeEncodeError:
954 self
.to_screen('Deleting existing file')
956 def raise_no_formats(self
, info
, forced
=False):
957 has_drm
= info
.get('__has_drm')
958 msg
= 'This video is DRM protected' if has_drm
else 'No video formats found!'
959 expected
= self
.params
.get('ignore_no_formats_error')
960 if forced
or not expected
:
961 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
962 expected
=has_drm
or expected
)
964 self
.report_warning(msg
)
966 def parse_outtmpl(self
):
967 outtmpl_dict
= self
.params
.get('outtmpl', {})
968 if not isinstance(outtmpl_dict
, dict):
969 outtmpl_dict
= {'default': outtmpl_dict}
970 # Remove spaces in the default template
971 if self
.params
.get('restrictfilenames'):
972 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
974 sanitize
= lambda x
: x
975 outtmpl_dict
.update({
976 k
: sanitize(v
) for k
, v
in DEFAULT_OUTTMPL
.items()
977 if outtmpl_dict
.get(k
) is None})
978 for key
, val
in outtmpl_dict
.items():
979 if isinstance(val
, bytes):
981 'Parameter outtmpl is bytes, but should be a unicode string. '
982 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
985 def get_output_path(self
, dir_type
='', filename
=None):
986 paths
= self
.params
.get('paths', {})
987 assert isinstance(paths
, dict)
989 expand_path(paths
.get('home', '').strip()),
990 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
993 # Temporary fix for #4787
994 # 'Treat' all problem characters by passing filename through preferredencoding
995 # to workaround encoding issues with subprocess on python2 @ Windows
996 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
997 path
= encodeFilename(path
, True).decode(preferredencoding())
998 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1001 def _outtmpl_expandpath(outtmpl
):
1002 # expand_path translates '%%' into '%' and '$$' into '$'
1003 # correspondingly that is not what we want since we need to keep
1004 # '%%' intact for template dict substitution step. Working around
1005 # with boundary-alike separator hack.
1006 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
1007 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
1009 # outtmpl should be expand_path'ed before template dict substitution
1010 # because meta fields may contain env variables we don't want to
1011 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1012 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1013 return expand_path(outtmpl
).replace(sep
, '')
1016 def escape_outtmpl(outtmpl
):
1017 ''' Escape any remaining strings like %s, %abc% etc. '''
1019 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1020 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1024 def validate_outtmpl(cls
, outtmpl
):
1025 ''' @return None or Exception object '''
1027 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljqBUDS]'),
1028 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1029 cls
._outtmpl
_expandpath
(outtmpl
))
1031 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1033 except ValueError as err
:
1037 def _copy_infodict(info_dict
):
1038 info_dict
= dict(info_dict
)
1039 for key
in ('__original_infodict', '__postprocessors'):
1040 info_dict
.pop(key
, None)
1043 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1044 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1045 @param sanitize Whether to sanitize the output as a filename.
1046 For backward compatibility, a function can also be passed
1049 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1051 info_dict
= self
._copy
_infodict
(info_dict
)
1052 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1053 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1054 if info_dict
.get('duration', None) is not None
1056 info_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
1057 info_dict
['video_autonumber'] = self
._num
_videos
1058 if info_dict
.get('resolution') is None:
1059 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1061 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1062 # of %(field)s to %(field)0Nd for backward compatibility
1063 field_size_compat_map
= {
1064 'playlist_index': number_of_digits(info_dict
.get('_last_playlist_index') or 0),
1065 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1066 'autonumber': self
.params
.get('autonumber_size') or 5,
1070 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljqBUDS]'))
1075 # Field is of the form key1.key2...
1076 # where keys (except first) can be string, int or slice
1077 FIELD_RE
= r
'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num
=r
'(?:-?\d+)')
1078 MATH_FIELD_RE
= r
'''(?:{field}|{num})'''.format(field
=FIELD_RE
, num
=r
'-?\d+(?:.\d+)?')
1079 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
1080 INTERNAL_FORMAT_RE
= re
.compile(r
'''(?x)
1083 (?P<maths>(?:{math_op}{math_field})*)
1084 (?:>(?P<strf_format>.+?))?
1085 (?P<alternate>(?<!\\),[^|&)]+)?
1086 (?:&(?P<replacement>.*?))?
1087 (?:\|(?P<default>.*?))?
1088 $'''.format(field
=FIELD_RE
, math_op
=MATH_OPERATORS_RE
, math_field
=MATH_FIELD_RE
))
1090 def _traverse_infodict(k
):
1094 return traverse_obj(info_dict
, k
, is_user_input
=True, traverse_string
=True)
1096 def get_value(mdict
):
1098 value
= _traverse_infodict(mdict
['fields'])
1101 value
= float_or_none(value
)
1102 if value
is not None:
1105 offset_key
= mdict
['maths']
1107 value
= float_or_none(value
)
1111 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1112 offset_key
).group(0)
1113 offset_key
= offset_key
[len(item
):]
1114 if operator
is None:
1115 operator
= MATH_FUNCTIONS
[item
]
1117 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1118 offset
= float_or_none(item
)
1120 offset
= float_or_none(_traverse_infodict(item
))
1122 value
= operator(value
, multiplier
* offset
)
1123 except (TypeError, ZeroDivisionError):
1126 # Datetime formatting
1127 if mdict
['strf_format']:
1128 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1132 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1134 def filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames')):
1135 return sanitize_filename(str(value
), restricted
=restricted
,
1136 is_id
=re
.search(r
'(^|[_.])id(\.|$)', key
))
1138 sanitizer
= sanitize
if callable(sanitize
) else filename_sanitizer
1139 sanitize
= bool(sanitize
)
1141 def _dumpjson_default(obj
):
1142 if isinstance(obj
, (set, LazyList
)):
1146 def create_key(outer_mobj
):
1147 if not outer_mobj
.group('has_key'):
1148 return outer_mobj
.group(0)
1149 key
= outer_mobj
.group('key')
1150 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1151 initial_field
= mobj
.group('fields') if mobj
else ''
1152 value
, replacement
, default
= None, None, na
1154 mobj
= mobj
.groupdict()
1155 default
= mobj
['default'] if mobj
['default'] is not None else default
1156 value
= get_value(mobj
)
1157 replacement
= mobj
['replacement']
1158 if value
is None and mobj
['alternate']:
1159 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['alternate'][1:])
1163 fmt
= outer_mobj
.group('format')
1164 if fmt
== 's' and value
is not None and key
in field_size_compat_map
.keys():
1165 fmt
= '0{:d}d'.format(field_size_compat_map
[key
])
1167 value
= default
if value
is None else value
if replacement
is None else replacement
1169 flags
= outer_mobj
.group('conversion') or ''
1170 str_fmt
= f
'{fmt[:-1]}s'
1171 if fmt
[-1] == 'l': # list
1172 delim
= '\n' if '#' in flags
else ', '
1173 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes)))), str_fmt
1174 elif fmt
[-1] == 'j': # json
1175 value
, fmt
= json
.dumps(value
, default
=_dumpjson_default
, indent
=4 if '#' in flags
else None), str_fmt
1176 elif fmt
[-1] == 'q': # quoted
1177 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1178 value
, fmt
= ' '.join(map(compat_shlex_quote
, value
)), str_fmt
1179 elif fmt
[-1] == 'B': # bytes
1180 value
= f
'%{str_fmt}'.encode('utf-8') % str(value
).encode('utf-8')
1181 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1182 elif fmt
[-1] == 'U': # unicode normalized
1183 value
, fmt
= unicodedata
.normalize(
1184 # "+" = compatibility equivalence, "#" = NFD
1185 'NF%s%s' % ('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1187 elif fmt
[-1] == 'D': # decimal suffix
1188 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1189 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1190 factor
=1024 if '#' in flags
else 1000)
1191 elif fmt
[-1] == 'S': # filename sanitization
1192 value
, fmt
= filename_sanitizer(initial_field
, value
, restricted
='#' in flags
), str_fmt
1193 elif fmt
[-1] == 'c':
1195 value
= str(value
)[0]
1198 elif fmt
[-1] not in 'rs': # numeric
1199 value
= float_or_none(value
)
1201 value
, fmt
= default
, 's'
1205 # If value is an object, sanitize might convert it to a string
1206 # So we convert it to repr first
1207 value
, fmt
= repr(value
), str_fmt
1208 if fmt
[-1] in 'csr':
1209 value
= sanitizer(initial_field
, value
)
1211 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1212 TMPL_DICT
[key
] = value
1213 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1215 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1217 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1218 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1219 return self
.escape_outtmpl(outtmpl
) % info_dict
1221 def _prepare_filename(self
, info_dict
, tmpl_type
='default'):
1223 outtmpl
= self
._outtmpl
_expandpath
(self
.outtmpl_dict
.get(tmpl_type
, self
.outtmpl_dict
['default']))
1224 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1228 if tmpl_type
in ('default', 'temp'):
1229 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1230 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1231 filename
= replace_extension(filename
, ext
, final_ext
)
1233 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1235 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1237 # https://github.com/blackjack4494/youtube-dlc/issues/85
1238 trim_file_name
= self
.params
.get('trim_file_name', False)
1240 no_ext
, *ext
= filename
.rsplit('.', 2)
1241 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1244 except ValueError as err
:
1245 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1248 def prepare_filename(self
, info_dict
, dir_type
='', warn
=False):
1249 """Generate the output filename."""
1251 filename
= self
._prepare
_filename
(info_dict
, dir_type
or 'default')
1252 if not filename
and dir_type
not in ('', 'temp'):
1256 if not self
.params
.get('paths'):
1258 elif filename
== '-':
1259 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1260 elif os
.path
.isabs(filename
):
1261 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1262 if filename
== '-' or not filename
:
1265 return self
.get_output_path(dir_type
, filename
)
1267 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1268 """ Returns None if the file should be downloaded """
1270 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
1273 if 'title' in info_dict
:
1274 # This can happen when we're just evaluating the playlist
1275 title
= info_dict
['title']
1276 matchtitle
= self
.params
.get('matchtitle', False)
1278 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1279 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1280 rejecttitle
= self
.params
.get('rejecttitle', False)
1282 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1283 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1284 date
= info_dict
.get('upload_date')
1285 if date
is not None:
1286 dateRange
= self
.params
.get('daterange', DateRange())
1287 if date
not in dateRange
:
1288 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
1289 view_count
= info_dict
.get('view_count')
1290 if view_count
is not None:
1291 min_views
= self
.params
.get('min_views')
1292 if min_views
is not None and view_count
< min_views
:
1293 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1294 max_views
= self
.params
.get('max_views')
1295 if max_views
is not None and view_count
> max_views
:
1296 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1297 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1298 return 'Skipping "%s" because it is age restricted' % video_title
1300 match_filter
= self
.params
.get('match_filter')
1301 if match_filter
is not None:
1303 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1305 # For backward compatibility
1306 ret
= None if incomplete
else match_filter(info_dict
)
1311 if self
.in_download_archive(info_dict
):
1312 reason
= '%s has already been recorded in the archive' % video_title
1313 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1315 reason
= check_filter()
1316 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1317 if reason
is not None:
1319 self
.to_screen('[download] ' + reason
)
1320 if self
.params
.get(break_opt
, False):
1325 def add_extra_info(info_dict
, extra_info
):
1326 '''Set the keys from extra_info in info dict if they are missing'''
1327 for key
, value
in extra_info
.items():
1328 info_dict
.setdefault(key
, value
)
1330 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1331 process
=True, force_generic_extractor
=False):
1333 Return a list with a dictionary for each video extracted.
1336 url -- URL to extract
1339 download -- whether to download videos during extraction
1340 ie_key -- extractor key hint
1341 extra_info -- dictionary containing the extra values to add to each result
1342 process -- whether to resolve all unresolved references (URLs, playlist items),
1343 must be True for download to work.
1344 force_generic_extractor -- force using the generic extractor
1347 if extra_info
is None:
1350 if not ie_key
and force_generic_extractor
:
1354 ies
= {ie_key: self._get_info_extractor_class(ie_key)}
1358 for ie_key
, ie
in ies
.items():
1359 if not ie
.suitable(url
):
1362 if not ie
.working():
1363 self
.report_warning('The program functionality for this site has been marked as broken, '
1364 'and will probably not work.')
1366 temp_id
= ie
.get_temp_id(url
)
1367 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
1368 self
.to_screen(f
'[{ie_key}] {temp_id}: has already been recorded in the archive')
1369 if self
.params
.get('break_on_existing', False):
1370 raise ExistingVideoReached()
1372 return self
.__extract
_info
(url
, self
.get_info_extractor(ie_key
), download
, extra_info
, process
)
1374 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
1376 def __handle_extraction_exceptions(func
):
1377 @functools.wraps(func
)
1378 def wrapper(self
, *args
, **kwargs
):
1381 return func(self
, *args
, **kwargs
)
1382 except (DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1384 except ReExtractInfo
as e
:
1386 self
.to_screen(f
'{e}; Re-extracting data')
1388 self
.to_stderr('\r')
1389 self
.report_warning(f
'{e}; Re-extracting data')
1391 except GeoRestrictedError
as e
:
1394 msg
+= '\nThis video is available in %s.' % ', '.join(
1395 map(ISO3166Utils
.short2full
, e
.countries
))
1396 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1397 self
.report_error(msg
)
1398 except ExtractorError
as e
: # An error we somewhat expected
1399 self
.report_error(str(e
), e
.format_traceback())
1400 except Exception as e
:
1401 if self
.params
.get('ignoreerrors'):
1402 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1408 def _wait_for_video(self
, ie_result
):
1409 if (not self
.params
.get('wait_for_video')
1410 or ie_result
.get('_type', 'video') != 'video'
1411 or ie_result
.get('formats') or ie_result
.get('url')):
1414 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1419 self
.to_screen(msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r', skip_eol
=True)
1422 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1423 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1424 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1425 diff
= random
.randrange(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
)
1426 self
.report_warning('Release time of video is not known')
1427 elif (diff
or 0) <= 0:
1428 self
.report_warning('Video should already be available according to extracted info')
1429 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1430 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1432 wait_till
= time
.time() + diff
1435 diff
= wait_till
- time
.time()
1438 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1439 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1441 except KeyboardInterrupt:
1443 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1444 except BaseException
as e
:
1445 if not isinstance(e
, ReExtractInfo
):
1449 @__handle_extraction_exceptions
1450 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1451 ie_result
= ie
.extract(url
)
1452 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1454 if isinstance(ie_result
, list):
1455 # Backwards compatibility: old IE result format
1457 '_type': 'compat_list',
1458 'entries': ie_result
,
1460 if extra_info
.get('original_url'):
1461 ie_result
.setdefault('original_url', extra_info
['original_url'])
1462 self
.add_default_extra_info(ie_result
, ie
, url
)
1464 self
._wait
_for
_video
(ie_result
)
1465 return self
.process_ie_result(ie_result
, download
, extra_info
)
1469 def add_default_extra_info(self
, ie_result
, ie
, url
):
1471 self
.add_extra_info(ie_result
, {
1473 'original_url': url
,
1475 webpage_url
= ie_result
.get('webpage_url')
1477 self
.add_extra_info(ie_result
, {
1478 'webpage_url_basename': url_basename(webpage_url
),
1479 'webpage_url_domain': get_domain(webpage_url
),
1482 self
.add_extra_info(ie_result
, {
1483 'extractor': ie
.IE_NAME
,
1484 'extractor_key': ie
.ie_key(),
1487 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1489 Take the result of the ie(may be modified) and resolve all unresolved
1490 references (URLs, playlist items).
1492 It will also download the videos if 'download'.
1493 Returns the resolved ie_result.
1495 if extra_info
is None:
1497 result_type
= ie_result
.get('_type', 'video')
1499 if result_type
in ('url', 'url_transparent'):
1500 ie_result
['url'] = sanitize_url(ie_result
['url'])
1501 if ie_result
.get('original_url'):
1502 extra_info
.setdefault('original_url', ie_result
['original_url'])
1504 extract_flat
= self
.params
.get('extract_flat', False)
1505 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1506 or extract_flat
is True):
1507 info_copy
= ie_result
.copy()
1508 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1509 if ie
and not ie_result
.get('id'):
1510 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1511 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1512 self
.add_extra_info(info_copy
, extra_info
)
1513 info_copy
, _
= self
.pre_process(info_copy
)
1514 self
.__forced
_printings
(info_copy
, self
.prepare_filename(info_copy
), incomplete
=True)
1515 if self
.params
.get('force_write_download_archive', False):
1516 self
.record_download_archive(info_copy
)
1519 if result_type
== 'video':
1520 self
.add_extra_info(ie_result
, extra_info
)
1521 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1522 additional_urls
= (ie_result
or {}).get('additional_urls')
1524 # TODO: Improve MetadataParserPP to allow setting a list
1525 if isinstance(additional_urls
, compat_str
):
1526 additional_urls
= [additional_urls
]
1528 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1529 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1530 ie_result
['additional_entries'] = [
1532 url
, download
, extra_info
=extra_info
,
1533 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1534 for url
in additional_urls
1537 elif result_type
== 'url':
1538 # We have to add extra_info to the results because it may be
1539 # contained in a playlist
1540 return self
.extract_info(
1541 ie_result
['url'], download
,
1542 ie_key
=ie_result
.get('ie_key'),
1543 extra_info
=extra_info
)
1544 elif result_type
== 'url_transparent':
1545 # Use the information from the embedding page
1546 info
= self
.extract_info(
1547 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1548 extra_info
=extra_info
, download
=False, process
=False)
1550 # extract_info may return None when ignoreerrors is enabled and
1551 # extraction failed with an error, don't crash and return early
1556 force_properties
= dict(
1557 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
1558 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1559 if f
in force_properties
:
1560 del force_properties
[f
]
1561 new_result
= info
.copy()
1562 new_result
.update(force_properties
)
1564 # Extracted info may not be a video result (i.e.
1565 # info.get('_type', 'video') != video) but rather an url or
1566 # url_transparent. In such cases outer metadata (from ie_result)
1567 # should be propagated to inner one (info). For this to happen
1568 # _type of info should be overridden with url_transparent. This
1569 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1570 if new_result
.get('_type') == 'url':
1571 new_result
['_type'] = 'url_transparent'
1573 return self
.process_ie_result(
1574 new_result
, download
=download
, extra_info
=extra_info
)
1575 elif result_type
in ('playlist', 'multi_video'):
1576 # Protect from infinite recursion due to recursively nested playlists
1577 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1578 webpage_url
= ie_result
['webpage_url']
1579 if webpage_url
in self
._playlist
_urls
:
1581 '[download] Skipping already downloaded playlist: %s'
1582 % ie_result
.get('title') or ie_result
.get('id'))
1585 self
._playlist
_level
+= 1
1586 self
._playlist
_urls
.add(webpage_url
)
1587 self
._sanitize
_thumbnails
(ie_result
)
1589 return self
.__process
_playlist
(ie_result
, download
)
1591 self
._playlist
_level
-= 1
1592 if not self
._playlist
_level
:
1593 self
._playlist
_urls
.clear()
1594 elif result_type
== 'compat_list':
1595 self
.report_warning(
1596 'Extractor %s returned a compat_list result. '
1597 'It needs to be updated.' % ie_result
.get('extractor'))
1600 self
.add_extra_info(r
, {
1601 'extractor': ie_result
['extractor'],
1602 'webpage_url': ie_result
['webpage_url'],
1603 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1604 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1605 'extractor_key': ie_result
['extractor_key'],
1608 ie_result
['entries'] = [
1609 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1610 for r
in ie_result
['entries']
1614 raise Exception('Invalid result type: %s' % result_type
)
1616 def _ensure_dir_exists(self
, path
):
1617 return make_dir(path
, self
.report_error
)
1620 def _playlist_infodict(ie_result
, **kwargs
):
1623 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1624 'playlist_id': ie_result
.get('id'),
1625 'playlist_title': ie_result
.get('title'),
1626 'playlist_uploader': ie_result
.get('uploader'),
1627 'playlist_uploader_id': ie_result
.get('uploader_id'),
1628 'playlist_index': 0,
1632 def __process_playlist(self
, ie_result
, download
):
1633 # We process each entry in the playlist
1634 playlist
= ie_result
.get('title') or ie_result
.get('id')
1635 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1637 if 'entries' not in ie_result
:
1638 raise EntryNotInPlaylist('There are no entries')
1640 MissingEntry
= object()
1641 incomplete_entries
= bool(ie_result
.get('requested_entries'))
1642 if incomplete_entries
:
1643 def fill_missing_entries(entries
, indices
):
1644 ret
= [MissingEntry
] * max(indices
)
1645 for i
, entry
in zip(indices
, entries
):
1648 ie_result
['entries'] = fill_missing_entries(ie_result
['entries'], ie_result
['requested_entries'])
1650 playlist_results
= []
1652 playliststart
= self
.params
.get('playliststart', 1)
1653 playlistend
= self
.params
.get('playlistend')
1654 # For backwards compatibility, interpret -1 as whole list
1655 if playlistend
== -1:
1658 playlistitems_str
= self
.params
.get('playlist_items')
1659 playlistitems
= None
1660 if playlistitems_str
is not None:
1661 def iter_playlistitems(format
):
1662 for string_segment
in format
.split(','):
1663 if '-' in string_segment
:
1664 start
, end
= string_segment
.split('-')
1665 for item
in range(int(start
), int(end
) + 1):
1668 yield int(string_segment
)
1669 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1671 ie_entries
= ie_result
['entries']
1672 if isinstance(ie_entries
, list):
1673 playlist_count
= len(ie_entries
)
1674 msg
= f
'Collected {playlist_count} videos; downloading %d of them'
1675 ie_result
['playlist_count'] = ie_result
.get('playlist_count') or playlist_count
1678 return ie_entries
[i
- 1]
1680 msg
= 'Downloading %d videos'
1681 if not isinstance(ie_entries
, (PagedList
, LazyList
)):
1682 ie_entries
= LazyList(ie_entries
)
1683 elif isinstance(ie_entries
, InAdvancePagedList
):
1684 if ie_entries
._pagesize
== 1:
1685 playlist_count
= ie_entries
._pagecount
1688 return YoutubeDL
.__handle
_extraction
_exceptions
(
1689 lambda self
, i
: ie_entries
[i
- 1]
1692 entries
, broken
= [], False
1693 items
= playlistitems
if playlistitems
is not None else itertools
.count(playliststart
)
1697 if playlistitems
is None and playlistend
is not None and playlistend
< i
:
1701 entry
= get_entry(i
)
1702 if entry
is MissingEntry
:
1703 raise EntryNotInPlaylist()
1704 except (IndexError, EntryNotInPlaylist
):
1705 if incomplete_entries
:
1706 raise EntryNotInPlaylist(f
'Entry {i} cannot be found')
1707 elif not playlistitems
:
1709 entries
.append(entry
)
1711 if entry
is not None:
1712 self
._match
_entry
(entry
, incomplete
=True, silent
=True)
1713 except (ExistingVideoReached
, RejectedVideoReached
):
1716 ie_result
['entries'] = entries
1718 # Save playlist_index before re-ordering
1720 ((playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
- 1), entry
)
1721 for i
, entry
in enumerate(entries
, 1)
1722 if entry
is not None]
1723 n_entries
= len(entries
)
1725 if not (ie_result
.get('playlist_count') or broken
or playlistitems
or playlistend
):
1726 ie_result
['playlist_count'] = n_entries
1728 if not playlistitems
and (playliststart
!= 1 or playlistend
):
1729 playlistitems
= list(range(playliststart
, playliststart
+ n_entries
))
1730 ie_result
['requested_entries'] = playlistitems
1732 _infojson_written
= False
1733 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
1734 if write_playlist_files
and self
.params
.get('list_thumbnails'):
1735 self
.list_thumbnails(ie_result
)
1736 if write_playlist_files
and not self
.params
.get('simulate'):
1737 ie_copy
= self
._playlist
_infodict
(ie_result
, n_entries
=n_entries
)
1738 _infojson_written
= self
._write
_info
_json
(
1739 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
1740 if _infojson_written
is None:
1742 if self
._write
_description
('playlist', ie_result
,
1743 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1745 # TODO: This should be passed to ThumbnailsConvertor if necessary
1746 self
._write
_thumbnails
('playlist', ie_copy
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1748 if self
.params
.get('playlistreverse', False):
1749 entries
= entries
[::-1]
1750 if self
.params
.get('playlistrandom', False):
1751 random
.shuffle(entries
)
1753 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1755 self
.to_screen('[%s] playlist %s: %s' % (ie_result
['extractor'], playlist
, msg
% n_entries
))
1757 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1758 for i
, entry_tuple
in enumerate(entries
, 1):
1759 playlist_index
, entry
= entry_tuple
1760 if 'playlist-index' in self
.params
.get('compat_opts', []):
1761 playlist_index
= playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
- 1
1762 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1763 # This __x_forwarded_for_ip thing is a bit ugly but requires
1766 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1768 'n_entries': n_entries
,
1769 '_last_playlist_index': max(playlistitems
) if playlistitems
else (playlistend
or n_entries
),
1770 'playlist_count': ie_result
.get('playlist_count'),
1771 'playlist_index': playlist_index
,
1772 'playlist_autonumber': i
,
1773 'playlist': playlist
,
1774 'playlist_id': ie_result
.get('id'),
1775 'playlist_title': ie_result
.get('title'),
1776 'playlist_uploader': ie_result
.get('uploader'),
1777 'playlist_uploader_id': ie_result
.get('uploader_id'),
1778 'extractor': ie_result
['extractor'],
1779 'webpage_url': ie_result
['webpage_url'],
1780 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1781 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1782 'extractor_key': ie_result
['extractor_key'],
1785 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1788 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1789 if not entry_result
:
1791 if failures
>= max_failures
:
1793 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist
, failures
))
1795 playlist_results
.append(entry_result
)
1796 ie_result
['entries'] = playlist_results
1798 # Write the updated info to json
1799 if _infojson_written
and self
._write
_info
_json
(
1800 'updated playlist', ie_result
,
1801 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
1804 ie_result
= self
.run_all_pps('playlist', ie_result
)
1805 self
.to_screen(f
'[download] Finished downloading playlist: {playlist}')
1808 @__handle_extraction_exceptions
1809 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1810 return self
.process_ie_result(
1811 entry
, download
=download
, extra_info
=extra_info
)
1813 def _build_format_filter(self
, filter_spec
):
1814 " Returns a function to filter the formats according to the filter_spec "
1824 operator_rex
= re
.compile(r
'''(?x)\s*
1825 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1826 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1827 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1828 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1829 m
= operator_rex
.fullmatch(filter_spec
)
1832 comparison_value
= int(m
.group('value'))
1834 comparison_value
= parse_filesize(m
.group('value'))
1835 if comparison_value
is None:
1836 comparison_value
= parse_filesize(m
.group('value') + 'B')
1837 if comparison_value
is None:
1839 'Invalid value %r in format specification %r' % (
1840 m
.group('value'), filter_spec
))
1841 op
= OPERATORS
[m
.group('op')]
1846 '^=': lambda attr
, value
: attr
.startswith(value
),
1847 '$=': lambda attr
, value
: attr
.endswith(value
),
1848 '*=': lambda attr
, value
: value
in attr
,
1849 '~=': lambda attr
, value
: value
.search(attr
) is not None
1851 str_operator_rex
= re
.compile(r
'''(?x)\s*
1852 (?P<key>[a-zA-Z0-9._-]+)\s*
1853 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1855 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1856 (?(quote)(?P=quote))\s*
1857 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1858 m
= str_operator_rex
.fullmatch(filter_spec
)
1860 if m
.group('op') == '~=':
1861 comparison_value
= re
.compile(m
.group('value'))
1863 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
1864 str_op
= STR_OPERATORS
[m
.group('op')]
1865 if m
.group('negation'):
1866 op
= lambda attr
, value
: not str_op(attr
, value
)
1871 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
1874 actual_value
= f
.get(m
.group('key'))
1875 if actual_value
is None:
1876 return m
.group('none_inclusive')
1877 return op(actual_value
, comparison_value
)
1880 def _check_formats(self
, formats
):
1882 self
.to_screen('[info] Testing format %s' % f
['format_id'])
1883 path
= self
.get_output_path('temp')
1884 if not self
._ensure
_dir
_exists
(f
'{path}/'):
1886 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
1889 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
1890 except (DownloadError
, IOError, OSError, ValueError) + network_exceptions
:
1893 if os
.path
.exists(temp_file
.name
):
1895 os
.remove(temp_file
.name
)
1897 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
1901 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
1903 def _default_format_spec(self
, info_dict
, download
=True):
1906 merger
= FFmpegMergerPP(self
)
1907 return merger
.available
and merger
.can_merge()
1910 not self
.params
.get('simulate')
1914 or info_dict
.get('is_live', False)
1915 or self
.outtmpl_dict
['default'] == '-'))
1918 or self
.params
.get('allow_multiple_audio_streams', False)
1919 or 'format-spec' in self
.params
.get('compat_opts', []))
1922 'best/bestvideo+bestaudio' if prefer_best
1923 else 'bestvideo*+bestaudio/best' if not compat
1924 else 'bestvideo+bestaudio/best')
1926 def build_format_selector(self
, format_spec
):
1927 def syntax_error(note
, start
):
1929 'Invalid format specification: '
1930 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1931 return SyntaxError(message
)
1933 PICKFIRST
= 'PICKFIRST'
1937 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1939 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1940 'video': self
.params
.get('allow_multiple_video_streams', False)}
1942 check_formats
= self
.params
.get('check_formats') == 'selected'
1944 def _parse_filter(tokens
):
1946 for type, string
, start
, _
, _
in tokens
:
1947 if type == tokenize
.OP
and string
== ']':
1948 return ''.join(filter_parts
)
1950 filter_parts
.append(string
)
1952 def _remove_unused_ops(tokens
):
1953 # Remove operators that we don't use and join them with the surrounding strings
1954 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1955 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1956 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1957 for type, string
, start
, end
, line
in tokens
:
1958 if type == tokenize
.OP
and string
== '[':
1960 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1962 yield type, string
, start
, end
, line
1963 # everything inside brackets will be handled by _parse_filter
1964 for type, string
, start
, end
, line
in tokens
:
1965 yield type, string
, start
, end
, line
1966 if type == tokenize
.OP
and string
== ']':
1968 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1970 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1972 yield type, string
, start
, end
, line
1973 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1975 last_string
= string
1979 last_string
+= string
1981 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1983 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1985 current_selector
= None
1986 for type, string
, start
, _
, _
in tokens
:
1987 # ENCODING is only defined in python 3.x
1988 if type == getattr(tokenize
, 'ENCODING', None):
1990 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1991 current_selector
= FormatSelector(SINGLE
, string
, [])
1992 elif type == tokenize
.OP
:
1994 if not inside_group
:
1995 # ')' will be handled by the parentheses group
1996 tokens
.restore_last_token()
1998 elif inside_merge
and string
in ['/', ',']:
1999 tokens
.restore_last_token()
2001 elif inside_choice
and string
== ',':
2002 tokens
.restore_last_token()
2005 if not current_selector
:
2006 raise syntax_error('"," must follow a format selector', start
)
2007 selectors
.append(current_selector
)
2008 current_selector
= None
2010 if not current_selector
:
2011 raise syntax_error('"/" must follow a format selector', start
)
2012 first_choice
= current_selector
2013 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
2014 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
2016 if not current_selector
:
2017 current_selector
= FormatSelector(SINGLE
, 'best', [])
2018 format_filter
= _parse_filter(tokens
)
2019 current_selector
.filters
.append(format_filter
)
2021 if current_selector
:
2022 raise syntax_error('Unexpected "("', start
)
2023 group
= _parse_format_selection(tokens
, inside_group
=True)
2024 current_selector
= FormatSelector(GROUP
, group
, [])
2026 if not current_selector
:
2027 raise syntax_error('Unexpected "+"', start
)
2028 selector_1
= current_selector
2029 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2031 raise syntax_error('Expected a selector', start
)
2032 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2034 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
2035 elif type == tokenize
.ENDMARKER
:
2037 if current_selector
:
2038 selectors
.append(current_selector
)
2041 def _merge(formats_pair
):
2042 format_1
, format_2
= formats_pair
2045 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2046 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2048 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2049 get_no_more
= {'video': False, 'audio': False}
2050 for (i
, fmt_info
) in enumerate(formats_info
):
2051 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2054 for aud_vid
in ['audio', 'video']:
2055 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2056 if get_no_more
[aud_vid
]:
2059 get_no_more
[aud_vid
] = True
2061 if len(formats_info
) == 1:
2062 return formats_info
[0]
2064 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2065 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2067 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2068 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2070 output_ext
= self
.params
.get('merge_output_format')
2073 output_ext
= the_only_video
['ext']
2074 elif the_only_audio
and not video_fmts
:
2075 output_ext
= the_only_audio
['ext']
2079 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2082 'requested_formats': formats_info
,
2083 'format': '+'.join(filtered('format')),
2084 'format_id': '+'.join(filtered('format_id')),
2086 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2087 'language': '+'.join(orderedSet(filtered('language'))) or None,
2088 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2089 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2090 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2095 'width': the_only_video
.get('width'),
2096 'height': the_only_video
.get('height'),
2097 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2098 'fps': the_only_video
.get('fps'),
2099 'dynamic_range': the_only_video
.get('dynamic_range'),
2100 'vcodec': the_only_video
.get('vcodec'),
2101 'vbr': the_only_video
.get('vbr'),
2102 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2107 'acodec': the_only_audio
.get('acodec'),
2108 'abr': the_only_audio
.get('abr'),
2109 'asr': the_only_audio
.get('asr'),
2114 def _check_formats(formats
):
2115 if not check_formats
:
2118 yield from self
._check
_formats
(formats
)
2120 def _build_selector_function(selector
):
2121 if isinstance(selector
, list): # ,
2122 fs
= [_build_selector_function(s
) for s
in selector
]
2124 def selector_function(ctx
):
2127 return selector_function
2129 elif selector
.type == GROUP
: # ()
2130 selector_function
= _build_selector_function(selector
.selector
)
2132 elif selector
.type == PICKFIRST
: # /
2133 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2135 def selector_function(ctx
):
2137 picked_formats
= list(f(ctx
))
2139 return picked_formats
2142 elif selector
.type == MERGE
: # +
2143 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2145 def selector_function(ctx
):
2146 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2149 elif selector
.type == SINGLE
: # atom
2150 format_spec
= selector
.selector
or 'best'
2152 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2153 if format_spec
== 'all':
2154 def selector_function(ctx
):
2155 yield from _check_formats(ctx
['formats'][::-1])
2156 elif format_spec
== 'mergeall':
2157 def selector_function(ctx
):
2158 formats
= list(_check_formats(ctx
['formats']))
2161 merged_format
= formats
[-1]
2162 for f
in formats
[-2::-1]:
2163 merged_format
= _merge((merged_format
, f
))
2167 format_fallback
, format_reverse
, format_idx
= False, True, 1
2169 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2171 if mobj
is not None:
2172 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2173 format_reverse
= mobj
.group('bw')[0] == 'b'
2174 format_type
= (mobj
.group('type') or [None])[0]
2175 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
2176 format_modified
= mobj
.group('mod') is not None
2178 format_fallback
= not format_type
and not format_modified
# for b, w
2180 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
2181 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2182 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
2183 if format_type
# bv, ba, wv, wa
2184 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2185 if not format_modified
# b, w
2186 else lambda f
: True) # b*, w*
2187 filter_f
= lambda f
: _filter_f(f
) and (
2188 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2190 if format_spec
in self
._format
_selection
_exts
['audio']:
2191 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2192 elif format_spec
in self
._format
_selection
_exts
['video']:
2193 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2194 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2195 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2197 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2199 def selector_function(ctx
):
2200 formats
= list(ctx
['formats'])
2201 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2202 if format_fallback
and ctx
['incomplete_formats'] and not matches
:
2203 # for extractors with incomplete formats (audio only (soundcloud)
2204 # or video only (imgur)) best/worst will fallback to
2205 # best/worst {video,audio}-only format
2207 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2209 yield matches
[format_idx
- 1]
2213 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2215 def final_selector(ctx
):
2216 ctx_copy
= dict(ctx
)
2217 for _filter
in filters
:
2218 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2219 return selector_function(ctx_copy
)
2220 return final_selector
2222 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
2224 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
2225 except tokenize
.TokenError
:
2226 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2228 class TokenIterator(object):
2229 def __init__(self
, tokens
):
2230 self
.tokens
= tokens
2237 if self
.counter
>= len(self
.tokens
):
2238 raise StopIteration()
2239 value
= self
.tokens
[self
.counter
]
2245 def restore_last_token(self
):
2248 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2249 return _build_selector_function(parsed_selector
)
2251 def _calc_headers(self
, info_dict
):
2252 res
= std_headers
.copy()
2253 res
.update(info_dict
.get('http_headers') or {})
2255 cookies
= self
._calc
_cookies
(info_dict
)
2257 res
['Cookie'] = cookies
2259 if 'X-Forwarded-For' not in res
:
2260 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2261 if x_forwarded_for_ip
:
2262 res
['X-Forwarded-For'] = x_forwarded_for_ip
2266 def _calc_cookies(self
, info_dict
):
2267 pr
= sanitized_Request(info_dict
['url'])
2268 self
.cookiejar
.add_cookie_header(pr
)
2269 return pr
.get_header('Cookie')
2271 def _sort_thumbnails(self
, thumbnails
):
2272 thumbnails
.sort(key
=lambda t
: (
2273 t
.get('preference') if t
.get('preference') is not None else -1,
2274 t
.get('width') if t
.get('width') is not None else -1,
2275 t
.get('height') if t
.get('height') is not None else -1,
2276 t
.get('id') if t
.get('id') is not None else '',
2279 def _sanitize_thumbnails(self
, info_dict
):
2280 thumbnails
= info_dict
.get('thumbnails')
2281 if thumbnails
is None:
2282 thumbnail
= info_dict
.get('thumbnail')
2284 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2288 def check_thumbnails(thumbnails
):
2289 for t
in thumbnails
:
2290 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2292 self
.urlopen(HEADRequest(t
['url']))
2293 except network_exceptions
as err
:
2294 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2298 self
._sort
_thumbnails
(thumbnails
)
2299 for i
, t
in enumerate(thumbnails
):
2300 if t
.get('id') is None:
2302 if t
.get('width') and t
.get('height'):
2303 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2304 t
['url'] = sanitize_url(t
['url'])
2306 if self
.params
.get('check_formats') is True:
2307 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2309 info_dict
['thumbnails'] = thumbnails
2311 def process_video_result(self
, info_dict
, download
=True):
2312 assert info_dict
.get('_type', 'video') == 'video'
2313 self
._num
_videos
+= 1
2315 if 'id' not in info_dict
:
2316 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2317 elif not info_dict
.get('id'):
2318 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2320 info_dict
['fulltitle'] = info_dict
.get('title')
2321 if 'title' not in info_dict
:
2322 raise ExtractorError('Missing "title" field in extractor result',
2323 video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2324 elif not info_dict
.get('title'):
2325 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2326 info_dict
['title'] = f
'{info_dict["extractor"]} video #{info_dict["id"]}'
2328 def report_force_conversion(field
, field_not
, conversion
):
2329 self
.report_warning(
2330 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2331 % (field
, field_not
, conversion
))
2333 def sanitize_string_field(info
, string_field
):
2334 field
= info
.get(string_field
)
2335 if field
is None or isinstance(field
, compat_str
):
2337 report_force_conversion(string_field
, 'a string', 'string')
2338 info
[string_field
] = compat_str(field
)
2340 def sanitize_numeric_fields(info
):
2341 for numeric_field
in self
._NUMERIC
_FIELDS
:
2342 field
= info
.get(numeric_field
)
2343 if field
is None or isinstance(field
, compat_numeric_types
):
2345 report_force_conversion(numeric_field
, 'numeric', 'int')
2346 info
[numeric_field
] = int_or_none(field
)
2348 sanitize_string_field(info_dict
, 'id')
2349 sanitize_numeric_fields(info_dict
)
2351 if 'playlist' not in info_dict
:
2352 # It isn't part of a playlist
2353 info_dict
['playlist'] = None
2354 info_dict
['playlist_index'] = None
2356 self
._sanitize
_thumbnails
(info_dict
)
2358 thumbnail
= info_dict
.get('thumbnail')
2359 thumbnails
= info_dict
.get('thumbnails')
2361 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2363 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2365 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2366 info_dict
['display_id'] = info_dict
['id']
2368 if info_dict
.get('duration') is not None:
2369 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2371 for ts_key
, date_key
in (
2372 ('timestamp', 'upload_date'),
2373 ('release_timestamp', 'release_date'),
2374 ('modified_timestamp', 'modified_date'),
2376 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2377 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2378 # see http://bugs.python.org/issue1646728)
2380 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2381 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2382 except (ValueError, OverflowError, OSError):
2385 live_keys
= ('is_live', 'was_live')
2386 live_status
= info_dict
.get('live_status')
2387 if live_status
is None:
2388 for key
in live_keys
:
2389 if info_dict
.get(key
) is False:
2391 if info_dict
.get(key
):
2394 if all(info_dict
.get(key
) is False for key
in live_keys
):
2395 live_status
= 'not_live'
2397 info_dict
['live_status'] = live_status
2398 for key
in live_keys
:
2399 if info_dict
.get(key
) is None:
2400 info_dict
[key
] = (live_status
== key
)
2402 # Auto generate title fields corresponding to the *_number fields when missing
2403 # in order to always have clean titles. This is very common for TV series.
2404 for field
in ('chapter', 'season', 'episode'):
2405 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2406 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2408 for cc_kind
in ('subtitles', 'automatic_captions'):
2409 cc
= info_dict
.get(cc_kind
)
2411 for _
, subtitle
in cc
.items():
2412 for subtitle_format
in subtitle
:
2413 if subtitle_format
.get('url'):
2414 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2415 if subtitle_format
.get('ext') is None:
2416 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2418 automatic_captions
= info_dict
.get('automatic_captions')
2419 subtitles
= info_dict
.get('subtitles')
2421 info_dict
['requested_subtitles'] = self
.process_subtitles(
2422 info_dict
['id'], subtitles
, automatic_captions
)
2424 if info_dict
.get('formats') is None:
2425 # There's only one format available
2426 formats
= [info_dict
]
2428 formats
= info_dict
['formats']
2430 info_dict
['__has_drm'] = any(f
.get('has_drm') for f
in formats
)
2431 if not self
.params
.get('allow_unplayable_formats'):
2432 formats
= [f
for f
in formats
if not f
.get('has_drm')]
2434 if info_dict
.get('is_live'):
2435 get_from_start
= bool(self
.params
.get('live_from_start'))
2436 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2437 if not get_from_start
:
2438 info_dict
['title'] += ' ' + datetime
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2441 self
.raise_no_formats(info_dict
)
2443 def is_wellformed(f
):
2446 self
.report_warning(
2447 '"url" field is missing or empty - skipping format, '
2448 'there is an error in extractor')
2450 if isinstance(url
, bytes):
2451 sanitize_string_field(f
, 'url')
2454 # Filter out malformed formats for better extraction robustness
2455 formats
= list(filter(is_wellformed
, formats
))
2459 # We check that all the formats have the format and format_id fields
2460 for i
, format
in enumerate(formats
):
2461 sanitize_string_field(format
, 'format_id')
2462 sanitize_numeric_fields(format
)
2463 format
['url'] = sanitize_url(format
['url'])
2464 if not format
.get('format_id'):
2465 format
['format_id'] = compat_str(i
)
2467 # Sanitize format_id from characters used in format selector expression
2468 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2469 format_id
= format
['format_id']
2470 if format_id
not in formats_dict
:
2471 formats_dict
[format_id
] = []
2472 formats_dict
[format_id
].append(format
)
2474 # Make sure all formats have unique format_id
2475 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2476 for format_id
, ambiguous_formats
in formats_dict
.items():
2477 ambigious_id
= len(ambiguous_formats
) > 1
2478 for i
, format
in enumerate(ambiguous_formats
):
2480 format
['format_id'] = '%s-%d' % (format_id
, i
)
2481 if format
.get('ext') is None:
2482 format
['ext'] = determine_ext(format
['url']).lower()
2483 # Ensure there is no conflict between id and ext in format selection
2484 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2485 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2486 format
['format_id'] = 'f%s' % format
['format_id']
2488 for i
, format
in enumerate(formats
):
2489 if format
.get('format') is None:
2490 format
['format'] = '{id} - {res}{note}'.format(
2491 id=format
['format_id'],
2492 res
=self
.format_resolution(format
),
2493 note
=format_field(format
, 'format_note', ' (%s)'),
2495 if format
.get('protocol') is None:
2496 format
['protocol'] = determine_protocol(format
)
2497 if format
.get('resolution') is None:
2498 format
['resolution'] = self
.format_resolution(format
, default
=None)
2499 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2500 format
['dynamic_range'] = 'SDR'
2501 if (info_dict
.get('duration') and format
.get('tbr')
2502 and not format
.get('filesize') and not format
.get('filesize_approx')):
2503 format
['filesize_approx'] = info_dict
['duration'] * format
['tbr'] * (1024 / 8)
2505 # Add HTTP headers, so that external programs can use them from the
2507 full_format_info
= info_dict
.copy()
2508 full_format_info
.update(format
)
2509 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
2510 # Remove private housekeeping stuff
2511 if '__x_forwarded_for_ip' in info_dict
:
2512 del info_dict
['__x_forwarded_for_ip']
2514 # TODO Central sorting goes here
2516 if self
.params
.get('check_formats') is True:
2517 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2519 if not formats
or formats
[0] is not info_dict
:
2520 # only set the 'formats' fields if the original info_dict list them
2521 # otherwise we end up with a circular reference, the first (and unique)
2522 # element in the 'formats' field in info_dict is info_dict itself,
2523 # which can't be exported to json
2524 info_dict
['formats'] = formats
2526 info_dict
, _
= self
.pre_process(info_dict
)
2528 # The pre-processors may have modified the formats
2529 formats
= info_dict
.get('formats', [info_dict
])
2531 list_only
= self
.params
.get('simulate') is None and (
2532 self
.params
.get('list_thumbnails') or self
.params
.get('listformats') or self
.params
.get('listsubtitles'))
2533 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2534 if self
.params
.get('list_thumbnails'):
2535 self
.list_thumbnails(info_dict
)
2536 if self
.params
.get('listsubtitles'):
2537 if 'automatic_captions' in info_dict
:
2538 self
.list_subtitles(
2539 info_dict
['id'], automatic_captions
, 'automatic captions')
2540 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2541 if self
.params
.get('listformats') or interactive_format_selection
:
2542 self
.list_formats(info_dict
)
2544 # Without this printing, -F --print-json will not work
2545 self
.__forced
_printings
(info_dict
, self
.prepare_filename(info_dict
), incomplete
=True)
2548 format_selector
= self
.format_selector
2549 if format_selector
is None:
2550 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2551 self
.write_debug('Default format spec: %s' % req_format
)
2552 format_selector
= self
.build_format_selector(req_format
)
2555 if interactive_format_selection
:
2557 self
._format
_screen
('\nEnter format selector: ', self
.Styles
.EMPHASIS
))
2559 format_selector
= self
.build_format_selector(req_format
)
2560 except SyntaxError as err
:
2561 self
.report_error(err
, tb
=False, is_error
=False)
2564 # While in format selection we may need to have an access to the original
2565 # format set in order to calculate some metrics or do some processing.
2566 # For now we need to be able to guess whether original formats provided
2567 # by extractor are incomplete or not (i.e. whether extractor provides only
2568 # video-only or audio-only formats) for proper formats selection for
2569 # extractors with such incomplete formats (see
2570 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2571 # Since formats may be filtered during format selection and may not match
2572 # the original formats the results may be incorrect. Thus original formats
2573 # or pre-calculated metrics should be passed to format selection routines
2575 # We will pass a context object containing all necessary additional data
2576 # instead of just formats.
2577 # This fixes incorrect format selection issue (see
2578 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2579 incomplete_formats
= (
2580 # All formats are video-only or
2581 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
2582 # all formats are audio-only
2583 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
2587 'incomplete_formats': incomplete_formats
,
2590 formats_to_download
= list(format_selector(ctx
))
2591 if interactive_format_selection
and not formats_to_download
:
2592 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2596 if not formats_to_download
:
2597 if not self
.params
.get('ignore_no_formats_error'):
2598 raise ExtractorError('Requested format is not available', expected
=True,
2599 video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2600 self
.report_warning('Requested format is not available')
2601 # Process what we can, even without any available formats.
2602 formats_to_download
= [{}]
2604 best_format
= formats_to_download
[-1]
2608 f
'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2609 + ', '.join([f
['format_id'] for f
in formats_to_download
]))
2610 max_downloads_reached
= False
2611 for i
, fmt
in enumerate(formats_to_download
):
2612 formats_to_download
[i
] = new_info
= dict(info_dict
)
2613 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2614 new_info
.update(fmt
)
2615 new_info
['__original_infodict'] = info_dict
2617 self
.process_info(new_info
)
2618 except MaxDownloadsReached
:
2619 max_downloads_reached
= True
2620 new_info
.pop('__original_infodict')
2621 # Remove copied info
2622 for key
, val
in tuple(new_info
.items()):
2623 if info_dict
.get(key
) == val
:
2625 if max_downloads_reached
:
2628 write_archive
= set(f
.get('__write_download_archive', False) for f
in formats_to_download
)
2629 assert write_archive
.issubset({True, False, 'ignore'}
)
2630 if True in write_archive
and False not in write_archive
:
2631 self
.record_download_archive(info_dict
)
2633 info_dict
['requested_downloads'] = formats_to_download
2634 info_dict
= self
.run_all_pps('after_video', info_dict
)
2635 if max_downloads_reached
:
2636 raise MaxDownloadsReached()
2638 # We update the info dict with the selected best quality format (backwards compatibility)
2639 info_dict
.update(best_format
)
2642 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2643 """Select the requested subtitles and their format"""
2645 if normal_subtitles
and self
.params
.get('writesubtitles'):
2646 available_subs
.update(normal_subtitles
)
2647 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2648 for lang
, cap_info
in automatic_captions
.items():
2649 if lang
not in available_subs
:
2650 available_subs
[lang
] = cap_info
2652 if (not self
.params
.get('writesubtitles') and not
2653 self
.params
.get('writeautomaticsub') or not
2657 all_sub_langs
= available_subs
.keys()
2658 if self
.params
.get('allsubtitles', False):
2659 requested_langs
= all_sub_langs
2660 elif self
.params
.get('subtitleslangs', False):
2661 # A list is used so that the order of languages will be the same as
2662 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2663 requested_langs
= []
2664 for lang_re
in self
.params
.get('subtitleslangs'):
2665 if lang_re
== 'all':
2666 requested_langs
.extend(all_sub_langs
)
2668 discard
= lang_re
[0] == '-'
2670 lang_re
= lang_re
[1:]
2671 current_langs
= filter(re
.compile(lang_re
+ '$').match
, all_sub_langs
)
2673 for lang
in current_langs
:
2674 while lang
in requested_langs
:
2675 requested_langs
.remove(lang
)
2677 requested_langs
.extend(current_langs
)
2678 requested_langs
= orderedSet(requested_langs
)
2679 elif 'en' in available_subs
:
2680 requested_langs
= ['en']
2682 requested_langs
= [list(all_sub_langs
)[0]]
2684 self
.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs
))
2686 formats_query
= self
.params
.get('subtitlesformat', 'best')
2687 formats_preference
= formats_query
.split('/') if formats_query
else []
2689 for lang
in requested_langs
:
2690 formats
= available_subs
.get(lang
)
2692 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
2694 for ext
in formats_preference
:
2698 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2704 self
.report_warning(
2705 'No subtitle format found matching "%s" for language %s, '
2706 'using %s' % (formats_query
, lang
, f
['ext']))
2710 def _forceprint(self
, key
, info_dict
):
2711 if info_dict
is None:
2713 info_copy
= info_dict
.copy()
2714 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
2715 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
2716 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
2717 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
2719 def format_tmpl(tmpl
):
2720 mobj
= re
.match(r
'\w+(=?)$', tmpl
)
2721 if mobj
and mobj
.group(1):
2722 return f
'{tmpl[:-1]} = %({tmpl[:-1]})r'
2724 return f
'%({tmpl})s'
2727 for tmpl
in self
.params
['forceprint'].get(key
, []):
2728 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
2730 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
2731 filename
= self
.evaluate_outtmpl(file_tmpl
, info_dict
)
2732 tmpl
= format_tmpl(tmpl
)
2733 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
2734 with io
.open(filename
, 'a', encoding
='utf-8') as f
:
2735 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + '\n')
2737 def __forced_printings(self
, info_dict
, filename
, incomplete
):
2738 def print_mandatory(field
, actual_field
=None):
2739 if actual_field
is None:
2740 actual_field
= field
2741 if (self
.params
.get('force%s' % field
, False)
2742 and (not incomplete
or info_dict
.get(actual_field
) is not None)):
2743 self
.to_stdout(info_dict
[actual_field
])
2745 def print_optional(field
):
2746 if (self
.params
.get('force%s' % field
, False)
2747 and info_dict
.get(field
) is not None):
2748 self
.to_stdout(info_dict
[field
])
2750 info_dict
= info_dict
.copy()
2751 if filename
is not None:
2752 info_dict
['filename'] = filename
2753 if info_dict
.get('requested_formats') is not None:
2754 # For RTMP URLs, also include the playpath
2755 info_dict
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2756 elif 'url' in info_dict
:
2757 info_dict
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
2759 if (self
.params
.get('forcejson')
2760 or self
.params
['forceprint'].get('video')
2761 or self
.params
['print_to_file'].get('video')):
2762 self
.post_extract(info_dict
)
2763 self
._forceprint
('video', info_dict
)
2765 print_mandatory('title')
2766 print_mandatory('id')
2767 print_mandatory('url', 'urls')
2768 print_optional('thumbnail')
2769 print_optional('description')
2770 print_optional('filename')
2771 if self
.params
.get('forceduration') and info_dict
.get('duration') is not None:
2772 self
.to_stdout(formatSeconds(info_dict
['duration']))
2773 print_mandatory('format')
2775 if self
.params
.get('forcejson'):
2776 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
2778 def dl(self
, name
, info
, subtitle
=False, test
=False):
2779 if not info
.get('url'):
2780 self
.raise_no_formats(info
, True)
2783 verbose
= self
.params
.get('verbose')
2786 'quiet': self
.params
.get('quiet') or not verbose
,
2788 'noprogress': not verbose
,
2790 'skip_unavailable_fragments': False,
2791 'keep_fragments': False,
2793 '_no_ytdl_file': True,
2796 params
= self
.params
2797 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
2799 for ph
in self
._progress
_hooks
:
2800 fd
.add_progress_hook(ph
)
2802 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
2803 for f
in info
.get('requested_formats', []) or [info
])
2804 self
.write_debug('Invoking downloader on "%s"' % urls
)
2806 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2807 # But it may contain objects that are not deep-copyable
2808 new_info
= self
._copy
_infodict
(info
)
2809 if new_info
.get('http_headers') is None:
2810 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
2811 return fd
.download(name
, new_info
, subtitle
)
2813 def existing_file(self
, filepaths
, *, default_overwrite
=True):
2814 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
2815 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
2816 return existing_files
[0]
2818 for file in existing_files
:
2819 self
.report_file_delete(file)
2823 def process_info(self
, info_dict
):
2824 """Process a single resolved IE result. (Modified it in-place)"""
2826 assert info_dict
.get('_type', 'video') == 'video'
2827 original_infodict
= info_dict
2829 if 'format' not in info_dict
and 'ext' in info_dict
:
2830 info_dict
['format'] = info_dict
['ext']
2832 if self
._match
_entry
(info_dict
) is not None:
2833 info_dict
['__write_download_archive'] = 'ignore'
2836 self
.post_extract(info_dict
)
2837 self
._num
_downloads
+= 1
2839 # info_dict['_filename'] needs to be set for backward compatibility
2840 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2841 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2845 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
2847 if self
.params
.get('simulate'):
2848 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
2851 if full_filename
is None:
2853 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
2855 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
2858 if self
._write
_description
('video', info_dict
,
2859 self
.prepare_filename(info_dict
, 'description')) is None:
2862 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
2863 if sub_files
is None:
2865 files_to_move
.update(dict(sub_files
))
2867 thumb_files
= self
._write
_thumbnails
(
2868 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
2869 if thumb_files
is None:
2871 files_to_move
.update(dict(thumb_files
))
2873 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2874 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
2875 if _infojson_written
:
2876 info_dict
['infojson_filename'] = infofn
2877 # For backward compatibility, even though it was a private field
2878 info_dict
['__infojson_filename'] = infofn
2879 elif _infojson_written
is None:
2882 # Note: Annotations are deprecated
2884 if self
.params
.get('writeannotations', False):
2885 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2887 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
2889 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2890 self
.to_screen('[info] Video annotations are already present')
2891 elif not info_dict
.get('annotations'):
2892 self
.report_warning('There are no annotations to write.')
2895 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2896 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2897 annofile
.write(info_dict
['annotations'])
2898 except (KeyError, TypeError):
2899 self
.report_warning('There are no annotations to write.')
2900 except (OSError, IOError):
2901 self
.report_error('Cannot write annotations file: ' + annofn
)
2904 # Write internet shortcut files
2905 def _write_link_file(link_type
):
2906 if 'webpage_url' not in info_dict
:
2907 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2909 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
2910 if not self
._ensure
_dir
_exists
(encodeFilename(linkfn
)):
2912 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2913 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
2916 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2917 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8',
2918 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
2919 template_vars
= {'url': iri_to_uri(info_dict['webpage_url'])}
2920 if link_type
== 'desktop':
2921 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
2922 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
2923 except (OSError, IOError):
2924 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
2929 'url': self
.params
.get('writeurllink'),
2930 'webloc': self
.params
.get('writewebloclink'),
2931 'desktop': self
.params
.get('writedesktoplink'),
2933 if self
.params
.get('writelink'):
2934 link_type
= ('webloc' if sys
.platform
== 'darwin'
2935 else 'desktop' if sys
.platform
.startswith('linux')
2937 write_links
[link_type
] = True
2939 if any(should_write
and not _write_link_file(link_type
)
2940 for link_type
, should_write
in write_links
.items()):
2943 def replace_info_dict(new_info
):
2945 if new_info
== info_dict
:
2948 info_dict
.update(new_info
)
2951 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
2952 replace_info_dict(new_info
)
2953 except PostProcessingError
as err
:
2954 self
.report_error('Preprocessing: %s' % str(err
))
2957 if self
.params
.get('skip_download'):
2958 info_dict
['filepath'] = temp_filename
2959 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2960 info_dict
['__files_to_move'] = files_to_move
2961 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
2962 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
2965 info_dict
.setdefault('__postprocessors', [])
2968 def existing_video_file(*filepaths
):
2969 ext
= info_dict
.get('ext')
2970 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
2971 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
2972 default_overwrite
=False)
2974 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
2978 if info_dict
.get('requested_formats') is not None:
2980 def compatible_formats(formats
):
2981 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2982 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2983 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2984 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2988 exts
= set(format
.get('ext') for format
in formats
)
2990 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2993 for ext_sets
in COMPATIBLE_EXTS
:
2994 if ext_sets
.issuperset(exts
):
2996 # TODO: Check acodec/vcodec
2999 requested_formats
= info_dict
['requested_formats']
3000 old_ext
= info_dict
['ext']
3001 if self
.params
.get('merge_output_format') is None:
3002 if not compatible_formats(requested_formats
):
3003 info_dict
['ext'] = 'mkv'
3004 self
.report_warning(
3005 'Requested formats are incompatible for merge and will be merged into mkv')
3006 if (info_dict
['ext'] == 'webm'
3007 and info_dict
.get('thumbnails')
3008 # check with type instead of pp_key, __name__, or isinstance
3009 # since we dont want any custom PPs to trigger this
3010 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])):
3011 info_dict
['ext'] = 'mkv'
3012 self
.report_warning(
3013 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3014 new_ext
= info_dict
['ext']
3016 def correct_ext(filename
, ext
=new_ext
):
3019 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3021 os
.path
.splitext(filename
)[0]
3022 if filename_real_ext
in (old_ext
, new_ext
)
3024 return '%s.%s' % (filename_wo_ext
, ext
)
3026 # Ensure filename always has a correct extension for successful merge
3027 full_filename
= correct_ext(full_filename
)
3028 temp_filename
= correct_ext(temp_filename
)
3029 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3030 info_dict
['__real_download'] = False
3033 merger
= FFmpegMergerPP(self
)
3035 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3036 if dl_filename
is not None:
3037 self
.report_file_already_downloaded(dl_filename
)
3039 for f
in requested_formats
if fd
!= FFmpegFD
else []:
3040 f
['filepath'] = fname
= prepend_extension(
3041 correct_ext(temp_filename
, info_dict
['ext']),
3042 'f%s' % f
['format_id'], info_dict
['ext'])
3043 downloaded
.append(fname
)
3044 info_dict
['url'] = '\n'.join(f
['url'] for f
in requested_formats
)
3045 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3046 info_dict
['__real_download'] = real_download
3048 if self
.params
.get('allow_unplayable_formats'):
3049 self
.report_warning(
3050 'You have requested merging of multiple formats '
3051 'while also allowing unplayable formats to be downloaded. '
3052 'The formats won\'t be merged to prevent data corruption.')
3053 elif not merger
.available
:
3054 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3055 if not self
.params
.get('ignoreerrors'):
3056 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3058 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3060 if temp_filename
== '-':
3061 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3062 else 'but the formats are incompatible for simultaneous download' if merger
.available
3063 else 'but ffmpeg is not installed')
3064 self
.report_warning(
3065 f
'You have requested downloading multiple formats to stdout {reason}. '
3066 'The formats will be streamed one after the other')
3067 fname
= temp_filename
3068 for f
in requested_formats
:
3069 new_info
= dict(info_dict
)
3070 del new_info
['requested_formats']
3072 if temp_filename
!= '-':
3073 fname
= prepend_extension(
3074 correct_ext(temp_filename
, new_info
['ext']),
3075 'f%s' % f
['format_id'], new_info
['ext'])
3076 if not self
._ensure
_dir
_exists
(fname
):
3078 f
['filepath'] = fname
3079 downloaded
.append(fname
)
3080 partial_success
, real_download
= self
.dl(fname
, new_info
)
3081 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3082 success
= success
and partial_success
3084 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3085 info_dict
['__postprocessors'].append(merger
)
3086 info_dict
['__files_to_merge'] = downloaded
3087 # Even if there were no downloads, it is being merged only now
3088 info_dict
['__real_download'] = True
3090 for file in downloaded
:
3091 files_to_move
[file] = None
3093 # Just a single file
3094 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3095 if dl_filename
is None or dl_filename
== temp_filename
:
3096 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3097 # So we should try to resume the download
3098 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3099 info_dict
['__real_download'] = real_download
3101 self
.report_file_already_downloaded(dl_filename
)
3103 dl_filename
= dl_filename
or temp_filename
3104 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
3106 except network_exceptions
as err
:
3107 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
3109 except (OSError, IOError) as err
:
3110 raise UnavailableVideoError(err
)
3111 except (ContentTooShortError
, ) as err
:
3112 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
3115 if success
and full_filename
!= '-':
3119 fixup_policy
= self
.params
.get('fixup')
3120 vid
= info_dict
['id']
3122 if fixup_policy
in ('ignore', 'never'):
3124 elif fixup_policy
== 'warn':
3126 elif fixup_policy
!= 'force':
3127 assert fixup_policy
in ('detect_or_warn', None)
3128 if not info_dict
.get('__real_download'):
3131 def ffmpeg_fixup(cndn
, msg
, cls
):
3135 self
.report_warning(f
'{vid}: {msg}')
3139 info_dict
['__postprocessors'].append(pp
)
3141 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3143 stretched_ratio
= info_dict
.get('stretched_ratio')
3145 stretched_ratio
not in (1, None),
3146 f
'Non-uniform pixel ratio {stretched_ratio}',
3147 FFmpegFixupStretchedPP
)
3150 (info_dict
.get('requested_formats') is None
3151 and info_dict
.get('container') == 'm4a_dash'
3152 and info_dict
.get('ext') == 'm4a'),
3153 'writing DASH m4a. Only some players support this container',
3156 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3157 downloader
= downloader
.__name
__ if downloader
else None
3159 if info_dict
.get('requested_formats') is None: # Not necessary if doing merger
3160 ffmpeg_fixup(downloader
== 'HlsFD',
3161 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3163 ffmpeg_fixup(info_dict
.get('is_live') and downloader
== 'DashSegmentsFD',
3164 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3166 ffmpeg_fixup(downloader
== 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3167 ffmpeg_fixup(downloader
== 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP
)
3171 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3172 except PostProcessingError
as err
:
3173 self
.report_error('Postprocessing: %s' % str(err
))
3176 for ph
in self
._post
_hooks
:
3177 ph(info_dict
['filepath'])
3178 except Exception as err
:
3179 self
.report_error('post hooks: %s' % str(err
))
3181 info_dict
['__write_download_archive'] = True
3183 if self
.params
.get('force_write_download_archive'):
3184 info_dict
['__write_download_archive'] = True
3186 # Make sure the info_dict was modified in-place
3187 assert info_dict
is original_infodict
3189 max_downloads
= self
.params
.get('max_downloads')
3190 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
3191 raise MaxDownloadsReached()
3193 def __download_wrapper(self
, func
):
3194 @functools.wraps(func
)
3195 def wrapper(*args
, **kwargs
):
3197 res
= func(*args
, **kwargs
)
3198 except UnavailableVideoError
as e
:
3199 self
.report_error(e
)
3200 except MaxDownloadsReached
as e
:
3201 self
.to_screen(f
'[info] {e}')
3203 except DownloadCancelled
as e
:
3204 self
.to_screen(f
'[info] {e}')
3205 if not self
.params
.get('break_per_url'):
3208 if self
.params
.get('dump_single_json', False):
3209 self
.post_extract(res
)
3210 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3213 def download(self
, url_list
):
3214 """Download a given list of URLs."""
3215 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3216 outtmpl
= self
.outtmpl_dict
['default']
3217 if (len(url_list
) > 1
3219 and '%' not in outtmpl
3220 and self
.params
.get('max_downloads') != 1):
3221 raise SameFileError(outtmpl
)
3223 for url
in url_list
:
3224 self
.__download
_wrapper
(self
.extract_info
)(
3225 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3227 return self
._download
_retcode
3229 def download_with_info_file(self
, info_filename
):
3230 with contextlib
.closing(fileinput
.FileInput(
3231 [info_filename
], mode
='r',
3232 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3233 # FileInput doesn't have a read method, we can't call json.load
3234 info
= self
.sanitize_info(json
.loads('\n'.join(f
)), self
.params
.get('clean_infojson', True))
3236 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3237 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3238 if not isinstance(e
, EntryNotInPlaylist
):
3239 self
.to_stderr('\r')
3240 webpage_url
= info
.get('webpage_url')
3241 if webpage_url
is not None:
3242 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3243 return self
.download([webpage_url
])
3246 return self
._download
_retcode
3249 def sanitize_info(info_dict
, remove_private_keys
=False):
3250 ''' Sanitize the infodict for converting to json '''
3251 if info_dict
is None:
3253 info_dict
.setdefault('epoch', int(time
.time()))
3254 info_dict
.setdefault('_type', 'video')
3255 remove_keys
= {'__original_infodict'}
# Always remove this since this may contain a copy of the entire dict
3256 keep_keys
= ['_type'] # Always keep this to facilitate load-info-json
3257 if remove_private_keys
:
3259 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3260 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3262 reject
= lambda k
, v
: k
not in keep_keys
and (
3263 k
.startswith('_') or k
in remove_keys
or v
is None)
3265 reject
= lambda k
, v
: k
in remove_keys
3268 if isinstance(obj
, dict):
3269 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3270 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3271 return list(map(filter_fn
, obj
))
3272 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3277 return filter_fn(info_dict
)
3280 def filter_requested_info(info_dict
, actually_filter
=True):
3281 ''' Alias of sanitize_info for backward compatibility '''
3282 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3285 def post_extract(info_dict
):
3286 def actual_post_extract(info_dict
):
3287 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3288 for video_dict
in info_dict
.get('entries', {}):
3289 actual_post_extract(video_dict
or {})
3292 post_extractor
= info_dict
.get('__post_extractor') or (lambda: {})
3293 extra
= post_extractor().items()
3294 info_dict
.update(extra
)
3295 info_dict
.pop('__post_extractor', None)
3297 original_infodict
= info_dict
.get('__original_infodict') or {}
3298 original_infodict
.update(extra
)
3299 original_infodict
.pop('__post_extractor', None)
3301 actual_post_extract(info_dict
or {})
3303 def run_pp(self
, pp
, infodict
):
3304 files_to_delete
= []
3305 if '__files_to_move' not in infodict
:
3306 infodict
['__files_to_move'] = {}
3308 files_to_delete
, infodict
= pp
.run(infodict
)
3309 except PostProcessingError
as e
:
3310 # Must be True and not 'only_download'
3311 if self
.params
.get('ignoreerrors') is True:
3312 self
.report_error(e
)
3316 if not files_to_delete
:
3318 if self
.params
.get('keepvideo', False):
3319 for f
in files_to_delete
:
3320 infodict
['__files_to_move'].setdefault(f
, '')
3322 for old_filename
in set(files_to_delete
):
3323 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
3325 os
.remove(encodeFilename(old_filename
))
3326 except (IOError, OSError):
3327 self
.report_warning('Unable to remove downloaded original file')
3328 if old_filename
in infodict
['__files_to_move']:
3329 del infodict
['__files_to_move'][old_filename
]
3332 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3333 self
._forceprint
(key
, info
)
3334 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3335 info
= self
.run_pp(pp
, info
)
3338 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3339 info
= dict(ie_info
)
3340 info
['__files_to_move'] = files_to_move
or {}
3341 info
= self
.run_all_pps(key
, info
)
3342 return info
, info
.pop('__files_to_move', None)
3344 def post_process(self
, filename
, info
, files_to_move
=None):
3345 """Run all the postprocessors on the given file."""
3346 info
['filepath'] = filename
3347 info
['__files_to_move'] = files_to_move
or {}
3348 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3349 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3350 del info
['__files_to_move']
3351 return self
.run_all_pps('after_move', info
)
3353 def _make_archive_id(self
, info_dict
):
3354 video_id
= info_dict
.get('id')
3357 # Future-proof against any change in case
3358 # and backwards compatibility with prior versions
3359 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3360 if extractor
is None:
3361 url
= str_or_none(info_dict
.get('url'))
3364 # Try to find matching extractor for the URL and take its ie_key
3365 for ie_key
, ie
in self
._ies
.items():
3366 if ie
.suitable(url
):
3371 return '%s %s' % (extractor
.lower(), video_id
)
3373 def in_download_archive(self
, info_dict
):
3374 fn
= self
.params
.get('download_archive')
3378 vid_id
= self
._make
_archive
_id
(info_dict
)
3380 return False # Incomplete video information
3382 return vid_id
in self
.archive
3384 def record_download_archive(self
, info_dict
):
3385 fn
= self
.params
.get('download_archive')
3388 vid_id
= self
._make
_archive
_id
(info_dict
)
3390 self
.write_debug(f
'Adding to archive: {vid_id}')
3391 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3392 archive_file
.write(vid_id
+ '\n')
3393 self
.archive
.add(vid_id
)
3396 def format_resolution(format
, default
='unknown'):
3397 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3399 if format
.get('resolution') is not None:
3400 return format
['resolution']
3401 if format
.get('width') and format
.get('height'):
3402 return '%dx%d' % (format
['width'], format
['height'])
3403 elif format
.get('height'):
3404 return '%sp' % format
['height']
3405 elif format
.get('width'):
3406 return '%dx?' % format
['width']
3409 def _list_format_headers(self
, *headers
):
3410 if self
.params
.get('listformats_table', True) is not False:
3411 return [self
._format
_screen
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3414 def _format_note(self
, fdict
):
3416 if fdict
.get('ext') in ['f4f', 'f4m']:
3417 res
+= '(unsupported)'
3418 if fdict
.get('language'):
3421 res
+= '[%s]' % fdict
['language']
3422 if fdict
.get('format_note') is not None:
3425 res
+= fdict
['format_note']
3426 if fdict
.get('tbr') is not None:
3429 res
+= '%4dk' % fdict
['tbr']
3430 if fdict
.get('container') is not None:
3433 res
+= '%s container' % fdict
['container']
3434 if (fdict
.get('vcodec') is not None
3435 and fdict
.get('vcodec') != 'none'):
3438 res
+= fdict
['vcodec']
3439 if fdict
.get('vbr') is not None:
3441 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3443 if fdict
.get('vbr') is not None:
3444 res
+= '%4dk' % fdict
['vbr']
3445 if fdict
.get('fps') is not None:
3448 res
+= '%sfps' % fdict
['fps']
3449 if fdict
.get('acodec') is not None:
3452 if fdict
['acodec'] == 'none':
3455 res
+= '%-5s' % fdict
['acodec']
3456 elif fdict
.get('abr') is not None:
3460 if fdict
.get('abr') is not None:
3461 res
+= '@%3dk' % fdict
['abr']
3462 if fdict
.get('asr') is not None:
3463 res
+= ' (%5dHz)' % fdict
['asr']
3464 if fdict
.get('filesize') is not None:
3467 res
+= format_bytes(fdict
['filesize'])
3468 elif fdict
.get('filesize_approx') is not None:
3471 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3474 def render_formats_table(self
, info_dict
):
3475 if not info_dict
.get('formats') and not info_dict
.get('url'):
3478 formats
= info_dict
.get('formats', [info_dict
])
3479 if not self
.params
.get('listformats_table', True) is not False:
3482 format_field(f
, 'format_id'),
3483 format_field(f
, 'ext'),
3484 self
.format_resolution(f
),
3485 self
._format
_note
(f
)
3486 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3487 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3489 delim
= self
._format
_screen
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3492 self
._format
_screen
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3493 format_field(f
, 'ext'),
3494 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3495 format_field(f
, 'fps', '\t%d'),
3496 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3498 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~\t%s', func
=format_bytes
),
3499 format_field(f
, 'tbr', '\t%dk'),
3500 shorten_protocol_name(f
.get('protocol', '')),
3502 format_field(f
, 'vcodec', default
='unknown').replace(
3503 'none', 'images' if f
.get('acodec') == 'none'
3504 else self
._format
_screen
('audio only', self
.Styles
.SUPPRESS
)),
3505 format_field(f
, 'vbr', '\t%dk'),
3506 format_field(f
, 'acodec', default
='unknown').replace(
3507 'none', '' if f
.get('vcodec') == 'none'
3508 else self
._format
_screen
('video only', self
.Styles
.SUPPRESS
)),
3509 format_field(f
, 'abr', '\t%dk'),
3510 format_field(f
, 'asr', '\t%dHz'),
3512 self
._format
_screen
('UNSUPPORTED', 'light red') if f
.get('ext') in ('f4f', 'f4m') else None,
3513 format_field(f
, 'language', '[%s]'),
3514 join_nonempty(format_field(f
, 'format_note'),
3515 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3518 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3519 header_line
= self
._list
_format
_headers
(
3520 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3521 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3523 return render_table(
3524 header_line
, table
, hide_empty
=True,
3525 delim
=self
._format
_screen
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3527 def render_thumbnails_table(self
, info_dict
):
3528 thumbnails
= list(info_dict
.get('thumbnails') or [])
3531 return render_table(
3532 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3533 [[t
.get('id'), t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
])
3535 def render_subtitles_table(self
, video_id
, subtitles
):
3536 def _row(lang
, formats
):
3537 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3538 if len(set(names
)) == 1:
3539 names
= [] if names
[0] == 'unknown' else names
[:1]
3540 return [lang
, ', '.join(names
), ', '.join(exts
)]
3544 return render_table(
3545 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3546 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3549 def __list_table(self
, video_id
, name
, func
, *args
):
3552 self
.to_screen(f
'{video_id} has no {name}')
3554 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3555 self
.to_stdout(table
)
3557 def list_formats(self
, info_dict
):
3558 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3560 def list_thumbnails(self
, info_dict
):
3561 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3563 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3564 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3566 def urlopen(self
, req
):
3567 """ Start an HTTP download """
3568 if isinstance(req
, compat_basestring
):
3569 req
= sanitized_Request(req
)
3570 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
3572 def print_debug_header(self
):
3573 if not self
.params
.get('verbose'):
3576 def get_encoding(stream
):
3577 ret
= getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__)
3578 if not supports_terminal_sequences(stream
):
3579 from .compat
import WINDOWS_VT_MODE
3580 ret
+= ' (No VT)' if WINDOWS_VT_MODE
is False else ' (No ANSI)'
3583 encoding_str
= 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3584 locale
.getpreferredencoding(),
3585 sys
.getfilesystemencoding(),
3586 get_encoding(self
._screen
_file
), get_encoding(self
._err
_file
),
3587 self
.get_encoding())
3589 logger
= self
.params
.get('logger')
3591 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3592 write_debug(encoding_str
)
3594 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
3595 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3597 source
= detect_variant()
3598 write_debug(join_nonempty(
3599 'yt-dlp version', __version__
,
3600 f
'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD
else '',
3601 '' if source
== 'unknown' else f
'({source})',
3603 if not _LAZY_LOADER
:
3604 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3605 write_debug('Lazy loading extractors is forcibly disabled')
3607 write_debug('Lazy loading extractors is disabled')
3608 if plugin_extractors
or plugin_postprocessors
:
3609 write_debug('Plugins: %s' % [
3610 '%s%s' % (klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3611 for name
, klass
in itertools
.chain(plugin_extractors
.items(), plugin_postprocessors
.items())])
3612 if self
.params
.get('compat_opts'):
3613 write_debug('Compatibility options: %s' % ', '.join(self
.params
.get('compat_opts')))
3615 if source
== 'source':
3618 ['git', 'rev-parse', '--short', 'HEAD'],
3619 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
3620 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
3621 out
, err
= sp
.communicate_or_kill()
3622 out
= out
.decode().strip()
3623 if re
.match('[0-9a-f]+', out
):
3624 write_debug('Git HEAD: %s' % out
)
3631 def python_implementation():
3632 impl_name
= platform
.python_implementation()
3633 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
3634 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
3637 write_debug('Python version %s (%s %s) - %s' % (
3638 platform
.python_version(),
3639 python_implementation(),
3640 platform
.architecture()[0],
3643 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
3644 ffmpeg_features
= {key for key, val in ffmpeg_features.items() if val}
3646 exe_versions
['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features
)
3648 exe_versions
['rtmpdump'] = rtmpdump_version()
3649 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3650 exe_str
= ', '.join(
3651 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3653 write_debug('exe versions: %s' % exe_str
)
3655 from .downloader
.websocket
import has_websockets
3656 from .postprocessor
.embedthumbnail
import has_mutagen
3657 from .cookies
import SQLITE_AVAILABLE
, SECRETSTORAGE_AVAILABLE
3659 lib_str
= join_nonempty(
3660 compat_pycrypto_AES
and compat_pycrypto_AES
.__name
__.split('.')[0],
3661 SECRETSTORAGE_AVAILABLE
and 'secretstorage',
3662 has_mutagen
and 'mutagen',
3663 SQLITE_AVAILABLE
and 'sqlite',
3664 has_websockets
and 'websockets',
3665 delim
=', ') or 'none'
3666 write_debug('Optional libraries: %s' % lib_str
)
3669 for handler
in self
._opener
.handlers
:
3670 if hasattr(handler
, 'proxies'):
3671 proxy_map
.update(handler
.proxies
)
3672 write_debug(f
'Proxy map: {proxy_map}')
3675 if False and self
.params
.get('call_home'):
3676 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3677 write_debug('Public IP address: %s' % ipaddr
)
3678 latest_version
= self
.urlopen(
3679 'https://yt-dl.org/latest/version').read().decode('utf-8')
3680 if version_tuple(latest_version
) > version_tuple(__version__
):
3681 self
.report_warning(
3682 'You are using an outdated version (newest version: %s)! '
3683 'See https://yt-dl.org/update if you need help updating.' %
3686 def _setup_opener(self
):
3687 timeout_val
= self
.params
.get('socket_timeout')
3688 self
._socket
_timeout
= 20 if timeout_val
is None else float(timeout_val
)
3690 opts_cookiesfrombrowser
= self
.params
.get('cookiesfrombrowser')
3691 opts_cookiefile
= self
.params
.get('cookiefile')
3692 opts_proxy
= self
.params
.get('proxy')
3694 self
.cookiejar
= load_cookies(opts_cookiefile
, opts_cookiesfrombrowser
, self
)
3696 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
3697 if opts_proxy
is not None:
3698 if opts_proxy
== '':
3701 proxies
= {'http': opts_proxy, 'https': opts_proxy}
3703 proxies
= compat_urllib_request
.getproxies()
3704 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3705 if 'http' in proxies
and 'https' not in proxies
:
3706 proxies
['https'] = proxies
['http']
3707 proxy_handler
= PerRequestProxyHandler(proxies
)
3709 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
3710 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
3711 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
3712 redirect_handler
= YoutubeDLRedirectHandler()
3713 data_handler
= compat_urllib_request_DataHandler()
3715 # When passing our own FileHandler instance, build_opener won't add the
3716 # default FileHandler and allows us to disable the file protocol, which
3717 # can be used for malicious purposes (see
3718 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3719 file_handler
= compat_urllib_request
.FileHandler()
3721 def file_open(*args
, **kwargs
):
3722 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3723 file_handler
.file_open
= file_open
3725 opener
= compat_urllib_request
.build_opener(
3726 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
3728 # Delete the default user-agent header, which would otherwise apply in
3729 # cases where our custom HTTP handler doesn't come into play
3730 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3731 opener
.addheaders
= []
3732 self
._opener
= opener
3734 def encode(self
, s
):
3735 if isinstance(s
, bytes):
3736 return s
# Already encoded
3739 return s
.encode(self
.get_encoding())
3740 except UnicodeEncodeError as err
:
3741 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
3744 def get_encoding(self
):
3745 encoding
= self
.params
.get('encoding')
3746 if encoding
is None:
3747 encoding
= preferredencoding()
3750 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
3751 ''' Write infojson and returns True = written, False = skip, None = error '''
3752 if overwrite
is None:
3753 overwrite
= self
.params
.get('overwrites', True)
3754 if not self
.params
.get('writeinfojson'):
3757 self
.write_debug(f
'Skipping writing {label} infojson')
3759 elif not self
._ensure
_dir
_exists
(infofn
):
3761 elif not overwrite
and os
.path
.exists(infofn
):
3762 self
.to_screen(f
'[info] {label.title()} metadata is already present')
3764 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
3766 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
3767 except (OSError, IOError):
3768 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
3772 def _write_description(self
, label
, ie_result
, descfn
):
3773 ''' Write description and returns True = written, False = skip, None = error '''
3774 if not self
.params
.get('writedescription'):
3777 self
.write_debug(f
'Skipping writing {label} description')
3779 elif not self
._ensure
_dir
_exists
(descfn
):
3781 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
3782 self
.to_screen(f
'[info] {label.title()} description is already present')
3783 elif ie_result
.get('description') is None:
3784 self
.report_warning(f
'There\'s no {label} description to write')
3788 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
3789 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
3790 descfile
.write(ie_result
['description'])
3791 except (OSError, IOError):
3792 self
.report_error(f
'Cannot write {label} description file {descfn}')
3796 def _write_subtitles(self
, info_dict
, filename
):
3797 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3799 subtitles
= info_dict
.get('requested_subtitles')
3800 if not subtitles
or not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
3801 # subtitles download errors are already managed as troubles in relevant IE
3802 # that way it will silently go on when used with unsupporting IE
3805 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
3806 if not sub_filename_base
:
3807 self
.to_screen('[info] Skipping writing video subtitles')
3809 for sub_lang
, sub_info
in subtitles
.items():
3810 sub_format
= sub_info
['ext']
3811 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
3812 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
3813 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
3815 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3816 sub_info
['filepath'] = existing_sub
3817 ret
.append((existing_sub
, sub_filename_final
))
3820 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
3821 if sub_info
.get('data') is not None:
3823 # Use newline='' to prevent conversion of newline characters
3824 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3825 with io
.open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
3826 subfile
.write(sub_info
['data'])
3827 sub_info
['filepath'] = sub_filename
3828 ret
.append((sub_filename
, sub_filename_final
))
3830 except (OSError, IOError):
3831 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
3835 sub_copy
= sub_info
.copy()
3836 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
3837 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
3838 sub_info
['filepath'] = sub_filename
3839 ret
.append((sub_filename
, sub_filename_final
))
3840 except (DownloadError
, ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
3841 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
3842 raise DownloadError(f
'Unable to download video subtitles for {sub_lang!r}: {err}', err
)
3843 self
.report_warning(f
'Unable to download video subtitles for {sub_lang!r}: {err}')
3846 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
3847 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3848 write_all
= self
.params
.get('write_all_thumbnails', False)
3849 thumbnails
, ret
= [], []
3850 if write_all
or self
.params
.get('writethumbnail', False):
3851 thumbnails
= info_dict
.get('thumbnails') or []
3852 multiple
= write_all
and len(thumbnails
) > 1
3854 if thumb_filename_base
is None:
3855 thumb_filename_base
= filename
3856 if thumbnails
and not thumb_filename_base
:
3857 self
.write_debug(f
'Skipping writing {label} thumbnail')
3860 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
3861 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
3862 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
3863 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
3864 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
3866 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
3868 self
.to_screen('[info] %s is already present' % (
3869 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize())
3870 t
['filepath'] = existing_thumb
3871 ret
.append((existing_thumb
, thumb_filename_final
))
3873 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
3875 uf
= self
.urlopen(sanitized_Request(t
['url'], headers
=t
.get('http_headers', {})))
3876 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
3877 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
3878 shutil
.copyfileobj(uf
, thumbf
)
3879 ret
.append((thumb_filename
, thumb_filename_final
))
3880 t
['filepath'] = thumb_filename
3881 except network_exceptions
as err
:
3883 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
3884 if ret
and not write_all
: