4 from __future__
import absolute_import
, unicode_literals
31 from string
import ascii_letters
35 compat_get_terminal_size
,
42 compat_tokenize_tokenize
,
44 compat_urllib_request
,
45 compat_urllib_request_DataHandler
,
46 windows_enable_vt_mode
,
48 from .cookies
import load_cookies
58 DOT_DESKTOP_LINK_TEMPLATE
,
59 DOT_URL_LINK_TEMPLATE
,
60 DOT_WEBLOC_LINK_TEMPLATE
,
88 PerRequestProxyHandler
,
94 register_socks_protocols
,
109 supports_terminal_sequences
,
115 UnavailableVideoError
,
121 YoutubeDLCookieProcessor
,
123 YoutubeDLRedirectHandler
,
125 from .cache
import Cache
126 from .extractor
import (
127 gen_extractor_classes
,
130 _PLUGIN_CLASSES
as plugin_extractors
132 from .extractor
.openload
import PhantomJSwrapper
133 from .downloader
import (
135 get_suitable_downloader
,
136 shorten_protocol_name
138 from .downloader
.rtmp
import rtmpdump_version
139 from .postprocessor
import (
142 FFmpegFixupDurationPP
,
145 FFmpegFixupStretchedPP
,
146 FFmpegFixupTimestampPP
,
149 MoveFilesAfterDownloadPP
,
150 _PLUGIN_CLASSES
as plugin_postprocessors
152 from .update
import detect_variant
153 from .version
import __version__
155 if compat_os_name
== 'nt':
159 class YoutubeDL(object):
162 YoutubeDL objects are the ones responsible of downloading the
163 actual video file and writing it to disk if the user has requested
164 it, among some other tasks. In most cases there should be one per
165 program. As, given a video URL, the downloader doesn't know how to
166 extract all the needed information, task that InfoExtractors do, it
167 has to pass the URL to one of them.
169 For this, YoutubeDL objects have a method that allows
170 InfoExtractors to be registered in a given order. When it is passed
171 a URL, the YoutubeDL object handles it to the first InfoExtractor it
172 finds that reports being able to handle it. The InfoExtractor extracts
173 all the information about the video or videos the URL refers to, and
174 YoutubeDL process the extracted information, possibly using a File
175 Downloader to download the video.
177 YoutubeDL objects accept a lot of parameters. In order not to saturate
178 the object constructor with arguments, it receives a dictionary of
179 options instead. These options are available through the params
180 attribute for the InfoExtractors to use. The YoutubeDL also
181 registers itself as the downloader in charge for the InfoExtractors
182 that are added to it, so this is a "mutual registration".
186 username: Username for authentication purposes.
187 password: Password for authentication purposes.
188 videopassword: Password for accessing a video.
189 ap_mso: Adobe Pass multiple-system operator identifier.
190 ap_username: Multiple-system operator account username.
191 ap_password: Multiple-system operator account password.
192 usenetrc: Use netrc for authentication instead.
193 verbose: Print additional info to stdout.
194 quiet: Do not print messages to stdout.
195 no_warnings: Do not print out anything for warnings.
196 forceprint: A list of templates to force print
197 forceurl: Force printing final URL. (Deprecated)
198 forcetitle: Force printing title. (Deprecated)
199 forceid: Force printing ID. (Deprecated)
200 forcethumbnail: Force printing thumbnail URL. (Deprecated)
201 forcedescription: Force printing description. (Deprecated)
202 forcefilename: Force printing final filename. (Deprecated)
203 forceduration: Force printing duration. (Deprecated)
204 forcejson: Force printing info_dict as JSON.
205 dump_single_json: Force printing the info_dict of the whole playlist
206 (or video) as a single JSON line.
207 force_write_download_archive: Force writing download archive regardless
208 of 'skip_download' or 'simulate'.
209 simulate: Do not download the video files. If unset (or None),
210 simulate only if listsubtitles, listformats or list_thumbnails is used
211 format: Video format code. see "FORMAT SELECTION" for more details.
212 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
213 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
214 extracting metadata even if the video is not actually
215 available for download (experimental)
216 format_sort: How to sort the video formats. see "Sorting Formats"
218 format_sort_force: Force the given format_sort. see "Sorting Formats"
220 allow_multiple_video_streams: Allow multiple video streams to be merged
222 allow_multiple_audio_streams: Allow multiple audio streams to be merged
224 check_formats Whether to test if the formats are downloadable.
225 Can be True (check all), False (check none)
226 or None (check only if requested by extractor)
227 paths: Dictionary of output paths. The allowed keys are 'home'
228 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
229 outtmpl: Dictionary of templates for output names. Allowed keys
230 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
231 For compatibility with youtube-dl, a single string can also be used
232 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
233 restrictfilenames: Do not allow "&" and spaces in file names
234 trim_file_name: Limit length of filename (extension excluded)
235 windowsfilenames: Force the filenames to be windows compatible
236 ignoreerrors: Do not stop on download/postprocessing errors.
237 Can be 'only_download' to ignore only download errors.
238 Default is 'only_download' for CLI, but False for API
239 skip_playlist_after_errors: Number of allowed failures until the rest of
240 the playlist is skipped
241 force_generic_extractor: Force downloader to use the generic extractor
242 overwrites: Overwrite all video and metadata files if True,
243 overwrite only non-video files if None
244 and don't overwrite any file if False
245 For compatibility with youtube-dl,
246 "nooverwrites" may also be used instead
247 playliststart: Playlist item to start at.
248 playlistend: Playlist item to end at.
249 playlist_items: Specific indices of playlist to download.
250 playlistreverse: Download playlist items in reverse order.
251 playlistrandom: Download playlist items in random order.
252 matchtitle: Download only matching titles.
253 rejecttitle: Reject downloads for matching titles.
254 logger: Log messages to a logging.Logger instance.
255 logtostderr: Log messages to stderr instead of stdout.
256 consoletitle: Display progress in console window's titlebar.
257 writedescription: Write the video description to a .description file
258 writeinfojson: Write the video description to a .info.json file
259 clean_infojson: Remove private fields from the infojson
260 getcomments: Extract video comments. This will not be written to disk
261 unless writeinfojson is also given
262 writeannotations: Write the video annotations to a .annotations.xml file
263 writethumbnail: Write the thumbnail image to a file
264 allow_playlist_files: Whether to write playlists' description, infojson etc
265 also to disk when using the 'write*' options
266 write_all_thumbnails: Write all thumbnail formats to files
267 writelink: Write an internet shortcut file, depending on the
268 current platform (.url/.webloc/.desktop)
269 writeurllink: Write a Windows internet shortcut file (.url)
270 writewebloclink: Write a macOS internet shortcut file (.webloc)
271 writedesktoplink: Write a Linux internet shortcut file (.desktop)
272 writesubtitles: Write the video subtitles to a file
273 writeautomaticsub: Write the automatically generated subtitles to a file
274 allsubtitles: Deprecated - Use subtitleslangs = ['all']
275 Downloads all the subtitles of the video
276 (requires writesubtitles or writeautomaticsub)
277 listsubtitles: Lists all available subtitles for the video
278 subtitlesformat: The format code for subtitles
279 subtitleslangs: List of languages of the subtitles to download (can be regex).
280 The list may contain "all" to refer to all the available
281 subtitles. The language can be prefixed with a "-" to
282 exclude it from the requested languages. Eg: ['all', '-live_chat']
283 keepvideo: Keep the video file after post-processing
284 daterange: A DateRange object, download only if the upload_date is in the range.
285 skip_download: Skip the actual download of the video file
286 cachedir: Location of the cache files in the filesystem.
287 False to disable filesystem cache.
288 noplaylist: Download single video instead of a playlist if in doubt.
289 age_limit: An integer representing the user's age in years.
290 Unsuitable videos for the given age are skipped.
291 min_views: An integer representing the minimum view count the video
292 must have in order to not be skipped.
293 Videos without view count information are always
294 downloaded. None for no limit.
295 max_views: An integer representing the maximum view count.
296 Videos that are more popular than that are not
298 Videos without view count information are always
299 downloaded. None for no limit.
300 download_archive: File name of a file where all downloads are recorded.
301 Videos already present in the file are not downloaded
303 break_on_existing: Stop the download process after attempting to download a
304 file that is in the archive.
305 break_on_reject: Stop the download process when encountering a video that
306 has been filtered out.
307 cookiefile: File name where cookies should be read from and dumped to
308 cookiesfrombrowser: A tuple containing the name of the browser and the profile
309 name/path from where cookies are loaded.
310 Eg: ('chrome', ) or ('vivaldi', 'default')
311 nocheckcertificate:Do not verify SSL certificates
312 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
313 At the moment, this is only supported by YouTube.
314 proxy: URL of the proxy server to use
315 geo_verification_proxy: URL of the proxy to use for IP address verification
316 on geo-restricted sites.
317 socket_timeout: Time to wait for unresponsive hosts, in seconds
318 bidi_workaround: Work around buggy terminals without bidirectional text
319 support, using fridibi
320 debug_printtraffic:Print out sent and received HTTP traffic
321 include_ads: Download ads as well
322 default_search: Prepend this string if an input url is not valid.
323 'auto' for elaborate guessing
324 encoding: Use this encoding instead of the system-specified.
325 extract_flat: Do not resolve URLs, return the immediate result.
326 Pass in 'in_playlist' to only show this behavior for
328 postprocessors: A list of dictionaries, each with an entry
329 * key: The name of the postprocessor. See
330 yt_dlp/postprocessor/__init__.py for a list.
331 * when: When to run the postprocessor. Can be one of
332 pre_process|before_dl|post_process|after_move.
333 Assumed to be 'post_process' if not given
334 post_hooks: Deprecated - Register a custom postprocessor instead
335 A list of functions that get called as the final step
336 for each video file, after all postprocessors have been
337 called. The filename will be passed as the only argument.
338 progress_hooks: A list of functions that get called on download
339 progress, with a dictionary with the entries
340 * status: One of "downloading", "error", or "finished".
341 Check this first and ignore unknown values.
342 * info_dict: The extracted info_dict
344 If status is one of "downloading", or "finished", the
345 following properties may also be present:
346 * filename: The final filename (always present)
347 * tmpfilename: The filename we're currently writing to
348 * downloaded_bytes: Bytes on disk
349 * total_bytes: Size of the whole file, None if unknown
350 * total_bytes_estimate: Guess of the eventual file size,
352 * elapsed: The number of seconds since download started.
353 * eta: The estimated time in seconds, None if unknown
354 * speed: The download speed in bytes/second, None if
356 * fragment_index: The counter of the currently
357 downloaded video fragment.
358 * fragment_count: The number of fragments (= individual
359 files that will be merged)
361 Progress hooks are guaranteed to be called at least once
362 (with status "finished") if the download is successful.
363 postprocessor_hooks: A list of functions that get called on postprocessing
364 progress, with a dictionary with the entries
365 * status: One of "started", "processing", or "finished".
366 Check this first and ignore unknown values.
367 * postprocessor: Name of the postprocessor
368 * info_dict: The extracted info_dict
370 Progress hooks are guaranteed to be called at least twice
371 (with status "started" and "finished") if the processing is successful.
372 merge_output_format: Extension to use when merging formats.
373 final_ext: Expected final extension; used to detect when the file was
374 already downloaded and converted. "merge_output_format" is
375 replaced by this extension when given
376 fixup: Automatically correct known faults of the file.
378 - "never": do nothing
379 - "warn": only emit a warning
380 - "detect_or_warn": check whether we can do anything
381 about it, warn otherwise (default)
382 source_address: Client-side IP address to bind to.
383 call_home: Boolean, true iff we are allowed to contact the
384 yt-dlp servers for debugging. (BROKEN)
385 sleep_interval_requests: Number of seconds to sleep between requests
387 sleep_interval: Number of seconds to sleep before each download when
388 used alone or a lower bound of a range for randomized
389 sleep before each download (minimum possible number
390 of seconds to sleep) when used along with
392 max_sleep_interval:Upper bound of a range for randomized sleep before each
393 download (maximum possible number of seconds to sleep).
394 Must only be used along with sleep_interval.
395 Actual sleep time will be a random float from range
396 [sleep_interval; max_sleep_interval].
397 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
398 listformats: Print an overview of available video formats and exit.
399 list_thumbnails: Print a table of all thumbnails and exit.
400 match_filter: A function that gets called with the info_dict of
402 If it returns a message, the video is ignored.
403 If it returns None, the video is downloaded.
404 match_filter_func in utils.py is one example for this.
405 no_color: Do not emit color codes in output.
406 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
409 Two-letter ISO 3166-2 country code that will be used for
410 explicit geographic restriction bypassing via faking
411 X-Forwarded-For HTTP header
413 IP range in CIDR notation that will be used similarly to
416 The following options determine which downloader is picked:
417 external_downloader: A dictionary of protocol keys and the executable of the
418 external downloader to use for it. The allowed protocols
419 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
420 Set the value to 'native' to use the native downloader
421 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
422 or {'m3u8': 'ffmpeg'} instead.
423 Use the native HLS downloader instead of ffmpeg/avconv
424 if True, otherwise use ffmpeg/avconv if False, otherwise
425 use downloader suggested by extractor if None.
426 compat_opts: Compatibility options. See "Differences in default behavior".
427 The following options do not work when used through the API:
428 filename, abort-on-error, multistreams, no-live-chat, format-sort
429 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
430 Refer __init__.py for their implementation
431 progress_template: Dictionary of templates for progress outputs.
432 Allowed keys are 'download', 'postprocess',
433 'download-title' (console title) and 'postprocess-title'.
434 The template is mapped on a dictionary with keys 'progress' and 'info'
436 The following parameters are not used by YoutubeDL itself, they are used by
437 the downloader (see yt_dlp/downloader/common.py):
438 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
439 max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
440 noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
441 external_downloader_args.
443 The following options are used by the post processors:
444 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
445 otherwise prefer ffmpeg. (avconv support is deprecated)
446 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
447 to the binary or its containing directory.
448 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
449 and a list of additional command-line arguments for the
450 postprocessor/executable. The dict can also have "PP+EXE" keys
451 which are used when the given exe is used by the given PP.
452 Use 'default' as the name for arguments to passed to all PP
453 For compatibility with youtube-dl, a single list of args
456 The following options are used by the extractors:
457 extractor_retries: Number of times to retry for known errors
458 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
459 hls_split_discontinuity: Split HLS playlists to different formats at
460 discontinuities such as ad breaks (default: False)
461 extractor_args: A dictionary of arguments to be passed to the extractors.
462 See "EXTRACTOR ARGUMENTS" for details.
463 Eg: {'youtube': {'skip': ['dash', 'hls']}}
464 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
465 If True (default), DASH manifests and related
466 data will be downloaded and processed by extractor.
467 You can reduce network I/O by disabling it if you don't
468 care about DASH. (only for youtube)
469 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
470 If True (default), HLS manifests and related
471 data will be downloaded and processed by extractor.
472 You can reduce network I/O by disabling it if you don't
473 care about HLS. (only for youtube)
476 _NUMERIC_FIELDS
= set((
477 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
478 'timestamp', 'release_timestamp',
479 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
480 'average_rating', 'comment_count', 'age_limit',
481 'start_time', 'end_time',
482 'chapter_number', 'season_number', 'episode_number',
483 'track_number', 'disc_number', 'release_year',
486 _format_selection_exts
= {
487 'audio': {'m4a', 'mp3', 'ogg', 'aac'}
,
488 'video': {'mp4', 'flv', 'webm', '3gp'}
,
489 'storyboards': {'mhtml'}
,
494 _pps
= {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
495 _printed_messages
= set()
496 _first_webpage_request
= True
497 _download_retcode
= None
498 _num_downloads
= None
500 _playlist_urls
= set()
503 def __init__(self
, params
=None, auto_init
=True):
504 """Create a FileDownloader object with the given options.
505 @param auto_init Whether to load the default extractors and print header (if verbose).
506 Set to 'no_verbose_header' to not print the header
511 self
._ies
_instances
= {}
512 self
._pps
= {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
513 self
._printed
_messages
= set()
514 self
._first
_webpage
_request
= True
515 self
._post
_hooks
= []
516 self
._progress
_hooks
= []
517 self
._postprocessor
_hooks
= []
518 self
._download
_retcode
= 0
519 self
._num
_downloads
= 0
520 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
521 self
._err
_file
= sys
.stderr
523 self
.cache
= Cache(self
)
525 windows_enable_vt_mode()
526 # FIXME: This will break if we ever print color to stdout
527 self
.params
['no_color'] = self
.params
.get('no_color') or not supports_terminal_sequences(self
._err
_file
)
529 if sys
.version_info
< (3, 6):
531 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys
.version_info
[:2])
533 if self
.params
.get('allow_unplayable_formats'):
535 f
'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
536 'This is a developer option intended for debugging. \n'
537 ' If you experience any issues while using this option, '
538 f
'{self._color_text("DO NOT", "red")} open a bug report')
540 def check_deprecated(param
, option
, suggestion
):
541 if self
.params
.get(param
) is not None:
542 self
.report_warning('%s is deprecated. Use %s instead' % (option
, suggestion
))
546 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
547 if self
.params
.get('geo_verification_proxy') is None:
548 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
550 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
551 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
552 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
554 for msg
in self
.params
.get('_warnings', []):
555 self
.report_warning(msg
)
557 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
558 # nooverwrites was unnecessarily changed to overwrites
559 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
560 # This ensures compatibility with both keys
561 self
.params
['overwrites'] = not self
.params
['nooverwrites']
562 elif self
.params
.get('overwrites') is None:
563 self
.params
.pop('overwrites', None)
565 self
.params
['nooverwrites'] = not self
.params
['overwrites']
567 if params
.get('bidi_workaround', False):
570 master
, slave
= pty
.openpty()
571 width
= compat_get_terminal_size().columns
575 width_args
= ['-w', str(width
)]
577 stdin
=subprocess
.PIPE
,
579 stderr
=self
._err
_file
)
581 self
._output
_process
= Popen(['bidiv'] + width_args
, **sp_kwargs
)
583 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
584 self
._output
_channel
= os
.fdopen(master
, 'rb')
585 except OSError as ose
:
586 if ose
.errno
== errno
.ENOENT
:
588 'Could not find fribidi executable, ignoring --bidi-workaround. '
589 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
593 if (sys
.platform
!= 'win32'
594 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
595 and not params
.get('restrictfilenames', False)):
596 # Unicode filesystem API will throw errors (#1474, #13027)
598 'Assuming --restrict-filenames since file system encoding '
599 'cannot encode all characters. '
600 'Set the LC_ALL environment variable to fix this.')
601 self
.params
['restrictfilenames'] = True
603 self
.outtmpl_dict
= self
.parse_outtmpl()
605 # Creating format selector here allows us to catch syntax errors before the extraction
606 self
.format_selector
= (
607 None if self
.params
.get('format') is None
608 else self
.build_format_selector(self
.params
['format']))
613 if auto_init
!= 'no_verbose_header':
614 self
.print_debug_header()
615 self
.add_default_info_extractors()
617 for pp_def_raw
in self
.params
.get('postprocessors', []):
618 pp_def
= dict(pp_def_raw
)
619 when
= pp_def
.pop('when', 'post_process')
620 pp_class
= get_postprocessor(pp_def
.pop('key'))
621 pp
= pp_class(self
, **compat_kwargs(pp_def
))
622 self
.add_post_processor(pp
, when
=when
)
624 for ph
in self
.params
.get('post_hooks', []):
625 self
.add_post_hook(ph
)
627 for ph
in self
.params
.get('progress_hooks', []):
628 self
.add_progress_hook(ph
)
630 register_socks_protocols()
632 def preload_download_archive(fn
):
633 """Preload the archive, if any is specified"""
636 self
.write_debug(f
'Loading archive file {fn!r}')
638 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
639 for line
in archive_file
:
640 self
.archive
.add(line
.strip())
641 except IOError as ioe
:
642 if ioe
.errno
!= errno
.ENOENT
:
648 preload_download_archive(self
.params
.get('download_archive'))
650 def warn_if_short_id(self
, argv
):
651 # short YouTube ID starting with dash?
653 i
for i
, a
in enumerate(argv
)
654 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
658 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
659 + ['--'] + [argv
[i
] for i
in idxs
]
662 'Long argument string detected. '
663 'Use -- to separate parameters and URLs, like this:\n%s' %
664 args_to_str(correct_argv
))
666 def add_info_extractor(self
, ie
):
667 """Add an InfoExtractor object to the end of the list."""
669 self
._ies
[ie_key
] = ie
670 if not isinstance(ie
, type):
671 self
._ies
_instances
[ie_key
] = ie
672 ie
.set_downloader(self
)
674 def _get_info_extractor_class(self
, ie_key
):
675 ie
= self
._ies
.get(ie_key
)
677 ie
= get_info_extractor(ie_key
)
678 self
.add_info_extractor(ie
)
681 def get_info_extractor(self
, ie_key
):
683 Get an instance of an IE with name ie_key, it will try to get one from
684 the _ies list, if there's no instance it will create a new one and add
685 it to the extractor list.
687 ie
= self
._ies
_instances
.get(ie_key
)
689 ie
= get_info_extractor(ie_key
)()
690 self
.add_info_extractor(ie
)
693 def add_default_info_extractors(self
):
695 Add the InfoExtractors returned by gen_extractors to the end of the list
697 for ie
in gen_extractor_classes():
698 self
.add_info_extractor(ie
)
700 def add_post_processor(self
, pp
, when
='post_process'):
701 """Add a PostProcessor object to the end of the chain."""
702 self
._pps
[when
].append(pp
)
703 pp
.set_downloader(self
)
705 def add_post_hook(self
, ph
):
706 """Add the post hook"""
707 self
._post
_hooks
.append(ph
)
709 def add_progress_hook(self
, ph
):
710 """Add the download progress hook"""
711 self
._progress
_hooks
.append(ph
)
713 def add_postprocessor_hook(self
, ph
):
714 """Add the postprocessing progress hook"""
715 self
._postprocessor
_hooks
.append(ph
)
717 def _bidi_workaround(self
, message
):
718 if not hasattr(self
, '_output_channel'):
721 assert hasattr(self
, '_output_process')
722 assert isinstance(message
, compat_str
)
723 line_count
= message
.count('\n') + 1
724 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
725 self
._output
_process
.stdin
.flush()
726 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
727 for _
in range(line_count
))
728 return res
[:-len('\n')]
730 def _write_string(self
, message
, out
=None, only_once
=False):
732 if message
in self
._printed
_messages
:
734 self
._printed
_messages
.add(message
)
735 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
737 def to_stdout(self
, message
, skip_eol
=False, quiet
=False):
738 """Print message to stdout"""
739 if self
.params
.get('logger'):
740 self
.params
['logger'].debug(message
)
741 elif not quiet
or self
.params
.get('verbose'):
743 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
744 self
._err
_file
if quiet
else self
._screen
_file
)
746 def to_stderr(self
, message
, only_once
=False):
747 """Print message to stderr"""
748 assert isinstance(message
, compat_str
)
749 if self
.params
.get('logger'):
750 self
.params
['logger'].error(message
)
752 self
._write
_string
('%s\n' % self
._bidi
_workaround
(message
), self
._err
_file
, only_once
=only_once
)
754 def to_console_title(self
, message
):
755 if not self
.params
.get('consoletitle', False):
757 if compat_os_name
== 'nt':
758 if ctypes
.windll
.kernel32
.GetConsoleWindow():
759 # c_wchar_p() might not be necessary if `message` is
760 # already of type unicode()
761 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
762 elif 'TERM' in os
.environ
:
763 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
765 def save_console_title(self
):
766 if not self
.params
.get('consoletitle', False):
768 if self
.params
.get('simulate'):
770 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
771 # Save the title on stack
772 self
._write
_string
('\033[22;0t', self
._screen
_file
)
774 def restore_console_title(self
):
775 if not self
.params
.get('consoletitle', False):
777 if self
.params
.get('simulate'):
779 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
780 # Restore the title from stack
781 self
._write
_string
('\033[23;0t', self
._screen
_file
)
784 self
.save_console_title()
787 def __exit__(self
, *args
):
788 self
.restore_console_title()
790 if self
.params
.get('cookiefile') is not None:
791 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
793 def trouble(self
, message
=None, tb
=None):
794 """Determine action to take when a download problem appears.
796 Depending on if the downloader has been configured to ignore
797 download errors or not, this method may throw an exception or
798 not when errors are found, after printing the message.
800 tb, if given, is additional traceback information.
802 if message
is not None:
803 self
.to_stderr(message
)
804 if self
.params
.get('verbose'):
806 if sys
.exc_info()[0]: # if .trouble has been called from an except block
808 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
809 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
810 tb
+= encode_compat_str(traceback
.format_exc())
812 tb_data
= traceback
.format_list(traceback
.extract_stack())
813 tb
= ''.join(tb_data
)
816 if not self
.params
.get('ignoreerrors'):
817 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
818 exc_info
= sys
.exc_info()[1].exc_info
820 exc_info
= sys
.exc_info()
821 raise DownloadError(message
, exc_info
)
822 self
._download
_retcode
= 1
824 def to_screen(self
, message
, skip_eol
=False):
825 """Print message to stdout if not in quiet mode"""
827 message
, skip_eol
, quiet
=self
.params
.get('quiet', False))
829 def _color_text(self
, text
, color
):
830 if self
.params
.get('no_color'):
832 return f
'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
834 def report_warning(self
, message
, only_once
=False):
836 Print the message to stderr, it will be prefixed with 'WARNING:'
837 If stderr is a tty file the 'WARNING:' will be colored
839 if self
.params
.get('logger') is not None:
840 self
.params
['logger'].warning(message
)
842 if self
.params
.get('no_warnings'):
844 self
.to_stderr(f
'{self._color_text("WARNING:", "yellow")} {message}', only_once
)
846 def report_error(self
, message
, tb
=None):
848 Do the same as trouble, but prefixes the message with 'ERROR:', colored
849 in red if stderr is a tty file.
851 self
.trouble(f
'{self._color_text("ERROR:", "red")} {message}', tb
)
853 def write_debug(self
, message
, only_once
=False):
854 '''Log debug message or Print message to stderr'''
855 if not self
.params
.get('verbose', False):
857 message
= '[debug] %s' % message
858 if self
.params
.get('logger'):
859 self
.params
['logger'].debug(message
)
861 self
.to_stderr(message
, only_once
)
863 def report_file_already_downloaded(self
, file_name
):
864 """Report file has already been fully downloaded."""
866 self
.to_screen('[download] %s has already been downloaded' % file_name
)
867 except UnicodeEncodeError:
868 self
.to_screen('[download] The file has already been downloaded')
870 def report_file_delete(self
, file_name
):
871 """Report that existing file will be deleted."""
873 self
.to_screen('Deleting existing file %s' % file_name
)
874 except UnicodeEncodeError:
875 self
.to_screen('Deleting existing file')
877 def raise_no_formats(self
, info
, forced
=False):
878 has_drm
= info
.get('__has_drm')
879 msg
= 'This video is DRM protected' if has_drm
else 'No video formats found!'
880 expected
= self
.params
.get('ignore_no_formats_error')
881 if forced
or not expected
:
882 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
883 expected
=has_drm
or expected
)
885 self
.report_warning(msg
)
887 def parse_outtmpl(self
):
888 outtmpl_dict
= self
.params
.get('outtmpl', {})
889 if not isinstance(outtmpl_dict
, dict):
890 outtmpl_dict
= {'default': outtmpl_dict}
891 # Remove spaces in the default template
892 if self
.params
.get('restrictfilenames'):
893 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
895 sanitize
= lambda x
: x
896 outtmpl_dict
.update({
897 k
: sanitize(v
) for k
, v
in DEFAULT_OUTTMPL
.items()
898 if outtmpl_dict
.get(k
) is None})
899 for key
, val
in outtmpl_dict
.items():
900 if isinstance(val
, bytes):
902 'Parameter outtmpl is bytes, but should be a unicode string. '
903 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
906 def get_output_path(self
, dir_type
='', filename
=None):
907 paths
= self
.params
.get('paths', {})
908 assert isinstance(paths
, dict)
910 expand_path(paths
.get('home', '').strip()),
911 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
914 # Temporary fix for #4787
915 # 'Treat' all problem characters by passing filename through preferredencoding
916 # to workaround encoding issues with subprocess on python2 @ Windows
917 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
918 path
= encodeFilename(path
, True).decode(preferredencoding())
919 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
922 def _outtmpl_expandpath(outtmpl
):
923 # expand_path translates '%%' into '%' and '$$' into '$'
924 # correspondingly that is not what we want since we need to keep
925 # '%%' intact for template dict substitution step. Working around
926 # with boundary-alike separator hack.
927 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
928 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
930 # outtmpl should be expand_path'ed before template dict substitution
931 # because meta fields may contain env variables we don't want to
932 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
933 # title "Hello $PATH", we don't want `$PATH` to be expanded.
934 return expand_path(outtmpl
).replace(sep
, '')
937 def escape_outtmpl(outtmpl
):
938 ''' Escape any remaining strings like %s, %abc% etc. '''
940 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
941 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
945 def validate_outtmpl(cls
, outtmpl
):
946 ''' @return None or Exception object '''
948 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljqBU]'),
949 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
950 cls
._outtmpl
_expandpath
(outtmpl
))
952 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
954 except ValueError as err
:
958 def _copy_infodict(info_dict
):
959 info_dict
= dict(info_dict
)
960 for key
in ('__original_infodict', '__postprocessors'):
961 info_dict
.pop(key
, None)
964 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=None):
965 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
966 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
968 info_dict
= self
._copy
_infodict
(info_dict
)
969 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
970 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
971 if info_dict
.get('duration', None) is not None
973 info_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
974 if info_dict
.get('resolution') is None:
975 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
977 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
978 # of %(field)s to %(field)0Nd for backward compatibility
979 field_size_compat_map
= {
980 'playlist_index': len(str(info_dict
.get('_last_playlist_index') or '')),
981 'playlist_autonumber': len(str(info_dict
.get('n_entries') or '')),
982 'autonumber': self
.params
.get('autonumber_size') or 5,
986 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljqBU]'))
991 # Field is of the form key1.key2...
992 # where keys (except first) can be string, int or slice
993 FIELD_RE
= r
'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num
=r
'(?:-?\d+)')
994 MATH_FIELD_RE
= r
'''{field}|{num}'''.format(field
=FIELD_RE
, num
=r
'-?\d+(?:.\d+)?')
995 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
996 INTERNAL_FORMAT_RE
= re
.compile(r
'''(?x)
999 (?P<maths>(?:{math_op}{math_field})*)
1000 (?:>(?P<strf_format>.+?))?
1001 (?P<alternate>(?<!\\),[^|)]+)?
1002 (?:\|(?P<default>.*?))?
1003 $'''.format(field
=FIELD_RE
, math_op
=MATH_OPERATORS_RE
, math_field
=MATH_FIELD_RE
))
1005 def _traverse_infodict(k
):
1009 return traverse_obj(info_dict
, k
, is_user_input
=True, traverse_string
=True)
1011 def get_value(mdict
):
1013 value
= _traverse_infodict(mdict
['fields'])
1016 value
= float_or_none(value
)
1017 if value
is not None:
1020 offset_key
= mdict
['maths']
1022 value
= float_or_none(value
)
1026 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1027 offset_key
).group(0)
1028 offset_key
= offset_key
[len(item
):]
1029 if operator
is None:
1030 operator
= MATH_FUNCTIONS
[item
]
1032 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1033 offset
= float_or_none(item
)
1035 offset
= float_or_none(_traverse_infodict(item
))
1037 value
= operator(value
, multiplier
* offset
)
1038 except (TypeError, ZeroDivisionError):
1041 # Datetime formatting
1042 if mdict
['strf_format']:
1043 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1047 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1049 def _dumpjson_default(obj
):
1050 if isinstance(obj
, (set, LazyList
)):
1052 raise TypeError(f
'Object of type {type(obj).__name__} is not JSON serializable')
1054 def create_key(outer_mobj
):
1055 if not outer_mobj
.group('has_key'):
1056 return outer_mobj
.group(0)
1057 key
= outer_mobj
.group('key')
1058 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1059 initial_field
= mobj
.group('fields').split('.')[-1] if mobj
else ''
1060 value
, default
= None, na
1062 mobj
= mobj
.groupdict()
1063 default
= mobj
['default'] if mobj
['default'] is not None else default
1064 value
= get_value(mobj
)
1065 if value
is None and mobj
['alternate']:
1066 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['alternate'][1:])
1070 fmt
= outer_mobj
.group('format')
1071 if fmt
== 's' and value
is not None and key
in field_size_compat_map
.keys():
1072 fmt
= '0{:d}d'.format(field_size_compat_map
[key
])
1074 value
= default
if value
is None else value
1076 str_fmt
= f
'{fmt[:-1]}s'
1077 if fmt
[-1] == 'l': # list
1078 delim
= '\n' if '#' in (outer_mobj
.group('conversion') or '') else ', '
1079 value
, fmt
= delim
.join(variadic(value
)), str_fmt
1080 elif fmt
[-1] == 'j': # json
1081 value
, fmt
= json
.dumps(value
, default
=_dumpjson_default
), str_fmt
1082 elif fmt
[-1] == 'q': # quoted
1083 value
, fmt
= compat_shlex_quote(str(value
)), str_fmt
1084 elif fmt
[-1] == 'B': # bytes
1085 value
= f
'%{str_fmt}'.encode('utf-8') % str(value
).encode('utf-8')
1086 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1087 elif fmt
[-1] == 'U': # unicode normalized
1088 opts
= outer_mobj
.group('conversion') or ''
1089 value
, fmt
= unicodedata
.normalize(
1090 # "+" = compatibility equivalence, "#" = NFD
1091 'NF%s%s' % ('K' if '+' in opts
else '', 'D' if '#' in opts
else 'C'),
1093 elif fmt
[-1] == 'c':
1095 value
= str(value
)[0]
1098 elif fmt
[-1] not in 'rs': # numeric
1099 value
= float_or_none(value
)
1101 value
, fmt
= default
, 's'
1105 # If value is an object, sanitize might convert it to a string
1106 # So we convert it to repr first
1107 value
, fmt
= repr(value
), str_fmt
1108 if fmt
[-1] in 'csr':
1109 value
= sanitize(initial_field
, value
)
1111 key
= '%s\0%s' % (key
.replace('%', '%\0'), outer_mobj
.group('format'))
1112 TMPL_DICT
[key
] = value
1113 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1115 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1117 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1118 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1119 return self
.escape_outtmpl(outtmpl
) % info_dict
1121 def _prepare_filename(self
, info_dict
, tmpl_type
='default'):
1123 sanitize
= lambda k
, v
: sanitize_filename(
1125 restricted
=self
.params
.get('restrictfilenames'),
1126 is_id
=(k
== 'id' or k
.endswith('_id')))
1127 outtmpl
= self
._outtmpl
_expandpath
(self
.outtmpl_dict
.get(tmpl_type
, self
.outtmpl_dict
['default']))
1128 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, sanitize
)
1130 force_ext
= OUTTMPL_TYPES
.get(tmpl_type
)
1131 if filename
and force_ext
is not None:
1132 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1134 # https://github.com/blackjack4494/youtube-dlc/issues/85
1135 trim_file_name
= self
.params
.get('trim_file_name', False)
1137 fn_groups
= filename
.rsplit('.')
1140 if len(fn_groups
) > 2:
1141 sub_ext
= fn_groups
[-2]
1142 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
1145 except ValueError as err
:
1146 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1149 def prepare_filename(self
, info_dict
, dir_type
='', warn
=False):
1150 """Generate the output filename."""
1152 filename
= self
._prepare
_filename
(info_dict
, dir_type
or 'default')
1153 if not filename
and dir_type
not in ('', 'temp'):
1157 if not self
.params
.get('paths'):
1159 elif filename
== '-':
1160 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1161 elif os
.path
.isabs(filename
):
1162 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1163 if filename
== '-' or not filename
:
1166 return self
.get_output_path(dir_type
, filename
)
1168 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1169 """ Returns None if the file should be downloaded """
1171 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
1174 if 'title' in info_dict
:
1175 # This can happen when we're just evaluating the playlist
1176 title
= info_dict
['title']
1177 matchtitle
= self
.params
.get('matchtitle', False)
1179 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1180 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1181 rejecttitle
= self
.params
.get('rejecttitle', False)
1183 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1184 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1185 date
= info_dict
.get('upload_date')
1186 if date
is not None:
1187 dateRange
= self
.params
.get('daterange', DateRange())
1188 if date
not in dateRange
:
1189 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
1190 view_count
= info_dict
.get('view_count')
1191 if view_count
is not None:
1192 min_views
= self
.params
.get('min_views')
1193 if min_views
is not None and view_count
< min_views
:
1194 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1195 max_views
= self
.params
.get('max_views')
1196 if max_views
is not None and view_count
> max_views
:
1197 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1198 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1199 return 'Skipping "%s" because it is age restricted' % video_title
1201 match_filter
= self
.params
.get('match_filter')
1202 if match_filter
is not None:
1204 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1206 # For backward compatibility
1207 ret
= None if incomplete
else match_filter(info_dict
)
1212 if self
.in_download_archive(info_dict
):
1213 reason
= '%s has already been recorded in the archive' % video_title
1214 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1216 reason
= check_filter()
1217 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1218 if reason
is not None:
1220 self
.to_screen('[download] ' + reason
)
1221 if self
.params
.get(break_opt
, False):
1226 def add_extra_info(info_dict
, extra_info
):
1227 '''Set the keys from extra_info in info dict if they are missing'''
1228 for key
, value
in extra_info
.items():
1229 info_dict
.setdefault(key
, value
)
1231 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1232 process
=True, force_generic_extractor
=False):
1234 Return a list with a dictionary for each video extracted.
1237 url -- URL to extract
1240 download -- whether to download videos during extraction
1241 ie_key -- extractor key hint
1242 extra_info -- dictionary containing the extra values to add to each result
1243 process -- whether to resolve all unresolved references (URLs, playlist items),
1244 must be True for download to work.
1245 force_generic_extractor -- force using the generic extractor
1248 if extra_info
is None:
1251 if not ie_key
and force_generic_extractor
:
1255 ies
= {ie_key: self._get_info_extractor_class(ie_key)}
1259 for ie_key
, ie
in ies
.items():
1260 if not ie
.suitable(url
):
1263 if not ie
.working():
1264 self
.report_warning('The program functionality for this site has been marked as broken, '
1265 'and will probably not work.')
1267 temp_id
= ie
.get_temp_id(url
)
1268 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
1269 self
.to_screen("[%s] %s: has already been recorded in archive" % (
1272 return self
.__extract
_info
(url
, self
.get_info_extractor(ie_key
), download
, extra_info
, process
)
1274 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
1276 def __handle_extraction_exceptions(func
):
1277 @functools.wraps(func
)
1278 def wrapper(self
, *args
, **kwargs
):
1280 return func(self
, *args
, **kwargs
)
1281 except GeoRestrictedError
as e
:
1284 msg
+= '\nThis video is available in %s.' % ', '.join(
1285 map(ISO3166Utils
.short2full
, e
.countries
))
1286 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1287 self
.report_error(msg
)
1288 except ExtractorError
as e
: # An error we somewhat expected
1289 self
.report_error(compat_str(e
), e
.format_traceback())
1290 except ThrottledDownload
:
1291 self
.to_stderr('\r')
1292 self
.report_warning('The download speed is below throttle limit. Re-extracting data')
1293 return wrapper(self
, *args
, **kwargs
)
1294 except (MaxDownloadsReached
, ExistingVideoReached
, RejectedVideoReached
, LazyList
.IndexError):
1296 except Exception as e
:
1297 if self
.params
.get('ignoreerrors'):
1298 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1303 @__handle_extraction_exceptions
1304 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1305 ie_result
= ie
.extract(url
)
1306 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1308 if isinstance(ie_result
, list):
1309 # Backwards compatibility: old IE result format
1311 '_type': 'compat_list',
1312 'entries': ie_result
,
1314 if extra_info
.get('original_url'):
1315 ie_result
.setdefault('original_url', extra_info
['original_url'])
1316 self
.add_default_extra_info(ie_result
, ie
, url
)
1318 return self
.process_ie_result(ie_result
, download
, extra_info
)
1322 def add_default_extra_info(self
, ie_result
, ie
, url
):
1324 self
.add_extra_info(ie_result
, {
1326 'original_url': url
,
1327 'webpage_url_basename': url_basename(url
),
1330 self
.add_extra_info(ie_result
, {
1331 'extractor': ie
.IE_NAME
,
1332 'extractor_key': ie
.ie_key(),
1335 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1337 Take the result of the ie(may be modified) and resolve all unresolved
1338 references (URLs, playlist items).
1340 It will also download the videos if 'download'.
1341 Returns the resolved ie_result.
1343 if extra_info
is None:
1345 result_type
= ie_result
.get('_type', 'video')
1347 if result_type
in ('url', 'url_transparent'):
1348 ie_result
['url'] = sanitize_url(ie_result
['url'])
1349 if ie_result
.get('original_url'):
1350 extra_info
.setdefault('original_url', ie_result
['original_url'])
1352 extract_flat
= self
.params
.get('extract_flat', False)
1353 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1354 or extract_flat
is True):
1355 info_copy
= ie_result
.copy()
1356 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1357 if ie
and not ie_result
.get('id'):
1358 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1359 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1360 self
.add_extra_info(info_copy
, extra_info
)
1361 self
.__forced
_printings
(info_copy
, self
.prepare_filename(info_copy
), incomplete
=True)
1362 if self
.params
.get('force_write_download_archive', False):
1363 self
.record_download_archive(info_copy
)
1366 if result_type
== 'video':
1367 self
.add_extra_info(ie_result
, extra_info
)
1368 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1369 additional_urls
= (ie_result
or {}).get('additional_urls')
1371 # TODO: Improve MetadataParserPP to allow setting a list
1372 if isinstance(additional_urls
, compat_str
):
1373 additional_urls
= [additional_urls
]
1375 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1376 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1377 ie_result
['additional_entries'] = [
1379 url
, download
, extra_info
,
1380 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1381 for url
in additional_urls
1384 elif result_type
== 'url':
1385 # We have to add extra_info to the results because it may be
1386 # contained in a playlist
1387 return self
.extract_info(
1388 ie_result
['url'], download
,
1389 ie_key
=ie_result
.get('ie_key'),
1390 extra_info
=extra_info
)
1391 elif result_type
== 'url_transparent':
1392 # Use the information from the embedding page
1393 info
= self
.extract_info(
1394 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1395 extra_info
=extra_info
, download
=False, process
=False)
1397 # extract_info may return None when ignoreerrors is enabled and
1398 # extraction failed with an error, don't crash and return early
1403 force_properties
= dict(
1404 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
1405 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1406 if f
in force_properties
:
1407 del force_properties
[f
]
1408 new_result
= info
.copy()
1409 new_result
.update(force_properties
)
1411 # Extracted info may not be a video result (i.e.
1412 # info.get('_type', 'video') != video) but rather an url or
1413 # url_transparent. In such cases outer metadata (from ie_result)
1414 # should be propagated to inner one (info). For this to happen
1415 # _type of info should be overridden with url_transparent. This
1416 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1417 if new_result
.get('_type') == 'url':
1418 new_result
['_type'] = 'url_transparent'
1420 return self
.process_ie_result(
1421 new_result
, download
=download
, extra_info
=extra_info
)
1422 elif result_type
in ('playlist', 'multi_video'):
1423 # Protect from infinite recursion due to recursively nested playlists
1424 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1425 webpage_url
= ie_result
['webpage_url']
1426 if webpage_url
in self
._playlist
_urls
:
1428 '[download] Skipping already downloaded playlist: %s'
1429 % ie_result
.get('title') or ie_result
.get('id'))
1432 self
._playlist
_level
+= 1
1433 self
._playlist
_urls
.add(webpage_url
)
1434 self
._sanitize
_thumbnails
(ie_result
)
1436 return self
.__process
_playlist
(ie_result
, download
)
1438 self
._playlist
_level
-= 1
1439 if not self
._playlist
_level
:
1440 self
._playlist
_urls
.clear()
1441 elif result_type
== 'compat_list':
1442 self
.report_warning(
1443 'Extractor %s returned a compat_list result. '
1444 'It needs to be updated.' % ie_result
.get('extractor'))
1447 self
.add_extra_info(r
, {
1448 'extractor': ie_result
['extractor'],
1449 'webpage_url': ie_result
['webpage_url'],
1450 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1451 'extractor_key': ie_result
['extractor_key'],
1454 ie_result
['entries'] = [
1455 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1456 for r
in ie_result
['entries']
1460 raise Exception('Invalid result type: %s' % result_type
)
1462 def _ensure_dir_exists(self
, path
):
1463 return make_dir(path
, self
.report_error
)
1465 def __process_playlist(self
, ie_result
, download
):
1466 # We process each entry in the playlist
1467 playlist
= ie_result
.get('title') or ie_result
.get('id')
1468 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1470 if 'entries' not in ie_result
:
1471 raise EntryNotInPlaylist()
1472 incomplete_entries
= bool(ie_result
.get('requested_entries'))
1473 if incomplete_entries
:
1474 def fill_missing_entries(entries
, indexes
):
1475 ret
= [None] * max(*indexes
)
1476 for i
, entry
in zip(indexes
, entries
):
1479 ie_result
['entries'] = fill_missing_entries(ie_result
['entries'], ie_result
['requested_entries'])
1481 playlist_results
= []
1483 playliststart
= self
.params
.get('playliststart', 1)
1484 playlistend
= self
.params
.get('playlistend')
1485 # For backwards compatibility, interpret -1 as whole list
1486 if playlistend
== -1:
1489 playlistitems_str
= self
.params
.get('playlist_items')
1490 playlistitems
= None
1491 if playlistitems_str
is not None:
1492 def iter_playlistitems(format
):
1493 for string_segment
in format
.split(','):
1494 if '-' in string_segment
:
1495 start
, end
= string_segment
.split('-')
1496 for item
in range(int(start
), int(end
) + 1):
1499 yield int(string_segment
)
1500 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1502 ie_entries
= ie_result
['entries']
1504 'Downloading %d videos' if not isinstance(ie_entries
, list)
1505 else 'Collected %d videos; downloading %%d of them' % len(ie_entries
))
1507 if isinstance(ie_entries
, list):
1509 return ie_entries
[i
- 1]
1511 if not isinstance(ie_entries
, PagedList
):
1512 ie_entries
= LazyList(ie_entries
)
1515 return YoutubeDL
.__handle
_extraction
_exceptions
(
1516 lambda self
, i
: ie_entries
[i
- 1]
1520 items
= playlistitems
if playlistitems
is not None else itertools
.count(playliststart
)
1524 if playlistitems
is None and playlistend
is not None and playlistend
< i
:
1528 entry
= get_entry(i
)
1530 raise EntryNotInPlaylist()
1531 except (IndexError, EntryNotInPlaylist
):
1532 if incomplete_entries
:
1533 raise EntryNotInPlaylist()
1534 elif not playlistitems
:
1536 entries
.append(entry
)
1538 if entry
is not None:
1539 self
._match
_entry
(entry
, incomplete
=True, silent
=True)
1540 except (ExistingVideoReached
, RejectedVideoReached
):
1542 ie_result
['entries'] = entries
1544 # Save playlist_index before re-ordering
1546 ((playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
- 1), entry
)
1547 for i
, entry
in enumerate(entries
, 1)
1548 if entry
is not None]
1549 n_entries
= len(entries
)
1551 if not playlistitems
and (playliststart
or playlistend
):
1552 playlistitems
= list(range(playliststart
, playliststart
+ n_entries
))
1553 ie_result
['requested_entries'] = playlistitems
1555 if not self
.params
.get('simulate') and self
.params
.get('allow_playlist_files', True):
1557 'playlist': playlist
,
1558 'playlist_id': ie_result
.get('id'),
1559 'playlist_title': ie_result
.get('title'),
1560 'playlist_uploader': ie_result
.get('uploader'),
1561 'playlist_uploader_id': ie_result
.get('uploader_id'),
1562 'playlist_index': 0,
1563 'n_entries': n_entries
,
1565 ie_copy
.update(dict(ie_result
))
1567 if self
._write
_info
_json
('playlist', ie_result
,
1568 self
.prepare_filename(ie_copy
, 'pl_infojson')) is None:
1570 if self
._write
_description
('playlist', ie_result
,
1571 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
1573 # TODO: This should be passed to ThumbnailsConvertor if necessary
1574 self
._write
_thumbnails
('playlist', ie_copy
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1576 if self
.params
.get('playlistreverse', False):
1577 entries
= entries
[::-1]
1578 if self
.params
.get('playlistrandom', False):
1579 random
.shuffle(entries
)
1581 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1583 self
.to_screen('[%s] playlist %s: %s' % (ie_result
['extractor'], playlist
, msg
% n_entries
))
1585 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1586 for i
, entry_tuple
in enumerate(entries
, 1):
1587 playlist_index
, entry
= entry_tuple
1588 if 'playlist-index' in self
.params
.get('compat_opts', []):
1589 playlist_index
= playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
- 1
1590 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1591 # This __x_forwarded_for_ip thing is a bit ugly but requires
1594 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1596 'n_entries': n_entries
,
1597 '_last_playlist_index': max(playlistitems
) if playlistitems
else (playlistend
or n_entries
),
1598 'playlist_index': playlist_index
,
1599 'playlist_autonumber': i
,
1600 'playlist': playlist
,
1601 'playlist_id': ie_result
.get('id'),
1602 'playlist_title': ie_result
.get('title'),
1603 'playlist_uploader': ie_result
.get('uploader'),
1604 'playlist_uploader_id': ie_result
.get('uploader_id'),
1605 'extractor': ie_result
['extractor'],
1606 'webpage_url': ie_result
['webpage_url'],
1607 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1608 'extractor_key': ie_result
['extractor_key'],
1611 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1614 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1615 if not entry_result
:
1617 if failures
>= max_failures
:
1619 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist
, failures
))
1621 # TODO: skip failed (empty) entries?
1622 playlist_results
.append(entry_result
)
1623 ie_result
['entries'] = playlist_results
1624 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1627 @__handle_extraction_exceptions
1628 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1629 return self
.process_ie_result(
1630 entry
, download
=download
, extra_info
=extra_info
)
1632 def _build_format_filter(self
, filter_spec
):
1633 " Returns a function to filter the formats according to the filter_spec "
1643 operator_rex
= re
.compile(r
'''(?x)\s*
1644 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1645 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1646 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1647 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1648 m
= operator_rex
.fullmatch(filter_spec
)
1651 comparison_value
= int(m
.group('value'))
1653 comparison_value
= parse_filesize(m
.group('value'))
1654 if comparison_value
is None:
1655 comparison_value
= parse_filesize(m
.group('value') + 'B')
1656 if comparison_value
is None:
1658 'Invalid value %r in format specification %r' % (
1659 m
.group('value'), filter_spec
))
1660 op
= OPERATORS
[m
.group('op')]
1665 '^=': lambda attr
, value
: attr
.startswith(value
),
1666 '$=': lambda attr
, value
: attr
.endswith(value
),
1667 '*=': lambda attr
, value
: value
in attr
,
1669 str_operator_rex
= re
.compile(r
'''(?x)\s*
1670 (?P<key>[a-zA-Z0-9._-]+)\s*
1671 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1672 (?P<value>[a-zA-Z0-9._-]+)\s*
1673 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1674 m
= str_operator_rex
.fullmatch(filter_spec
)
1676 comparison_value
= m
.group('value')
1677 str_op
= STR_OPERATORS
[m
.group('op')]
1678 if m
.group('negation'):
1679 op
= lambda attr
, value
: not str_op(attr
, value
)
1684 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
1687 actual_value
= f
.get(m
.group('key'))
1688 if actual_value
is None:
1689 return m
.group('none_inclusive')
1690 return op(actual_value
, comparison_value
)
1693 def _default_format_spec(self
, info_dict
, download
=True):
1696 merger
= FFmpegMergerPP(self
)
1697 return merger
.available
and merger
.can_merge()
1700 not self
.params
.get('simulate')
1704 or info_dict
.get('is_live', False)
1705 or self
.outtmpl_dict
['default'] == '-'))
1708 or self
.params
.get('allow_multiple_audio_streams', False)
1709 or 'format-spec' in self
.params
.get('compat_opts', []))
1712 'best/bestvideo+bestaudio' if prefer_best
1713 else 'bestvideo*+bestaudio/best' if not compat
1714 else 'bestvideo+bestaudio/best')
1716 def build_format_selector(self
, format_spec
):
1717 def syntax_error(note
, start
):
1719 'Invalid format specification: '
1720 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1721 return SyntaxError(message
)
1723 PICKFIRST
= 'PICKFIRST'
1727 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1729 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1730 'video': self
.params
.get('allow_multiple_video_streams', False)}
1732 check_formats
= self
.params
.get('check_formats')
1734 def _parse_filter(tokens
):
1736 for type, string
, start
, _
, _
in tokens
:
1737 if type == tokenize
.OP
and string
== ']':
1738 return ''.join(filter_parts
)
1740 filter_parts
.append(string
)
1742 def _remove_unused_ops(tokens
):
1743 # Remove operators that we don't use and join them with the surrounding strings
1744 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1745 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1746 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1747 for type, string
, start
, end
, line
in tokens
:
1748 if type == tokenize
.OP
and string
== '[':
1750 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1752 yield type, string
, start
, end
, line
1753 # everything inside brackets will be handled by _parse_filter
1754 for type, string
, start
, end
, line
in tokens
:
1755 yield type, string
, start
, end
, line
1756 if type == tokenize
.OP
and string
== ']':
1758 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1760 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1762 yield type, string
, start
, end
, line
1763 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1765 last_string
= string
1769 last_string
+= string
1771 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1773 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1775 current_selector
= None
1776 for type, string
, start
, _
, _
in tokens
:
1777 # ENCODING is only defined in python 3.x
1778 if type == getattr(tokenize
, 'ENCODING', None):
1780 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1781 current_selector
= FormatSelector(SINGLE
, string
, [])
1782 elif type == tokenize
.OP
:
1784 if not inside_group
:
1785 # ')' will be handled by the parentheses group
1786 tokens
.restore_last_token()
1788 elif inside_merge
and string
in ['/', ',']:
1789 tokens
.restore_last_token()
1791 elif inside_choice
and string
== ',':
1792 tokens
.restore_last_token()
1795 if not current_selector
:
1796 raise syntax_error('"," must follow a format selector', start
)
1797 selectors
.append(current_selector
)
1798 current_selector
= None
1800 if not current_selector
:
1801 raise syntax_error('"/" must follow a format selector', start
)
1802 first_choice
= current_selector
1803 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1804 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1806 if not current_selector
:
1807 current_selector
= FormatSelector(SINGLE
, 'best', [])
1808 format_filter
= _parse_filter(tokens
)
1809 current_selector
.filters
.append(format_filter
)
1811 if current_selector
:
1812 raise syntax_error('Unexpected "("', start
)
1813 group
= _parse_format_selection(tokens
, inside_group
=True)
1814 current_selector
= FormatSelector(GROUP
, group
, [])
1816 if not current_selector
:
1817 raise syntax_error('Unexpected "+"', start
)
1818 selector_1
= current_selector
1819 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1821 raise syntax_error('Expected a selector', start
)
1822 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1824 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1825 elif type == tokenize
.ENDMARKER
:
1827 if current_selector
:
1828 selectors
.append(current_selector
)
1831 def _merge(formats_pair
):
1832 format_1
, format_2
= formats_pair
1835 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1836 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1838 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
1839 get_no_more
= {'video': False, 'audio': False}
1840 for (i
, fmt_info
) in enumerate(formats_info
):
1841 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
1844 for aud_vid
in ['audio', 'video']:
1845 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
1846 if get_no_more
[aud_vid
]:
1849 get_no_more
[aud_vid
] = True
1851 if len(formats_info
) == 1:
1852 return formats_info
[0]
1854 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1855 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1857 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1858 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1860 output_ext
= self
.params
.get('merge_output_format')
1863 output_ext
= the_only_video
['ext']
1864 elif the_only_audio
and not video_fmts
:
1865 output_ext
= the_only_audio
['ext']
1869 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
1872 'requested_formats': formats_info
,
1873 'format': '+'.join(filtered('format')),
1874 'format_id': '+'.join(filtered('format_id')),
1876 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
1877 'language': '+'.join(orderedSet(filtered('language'))),
1878 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1879 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1880 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1885 'width': the_only_video
.get('width'),
1886 'height': the_only_video
.get('height'),
1887 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
1888 'fps': the_only_video
.get('fps'),
1889 'dynamic_range': the_only_video
.get('dynamic_range'),
1890 'vcodec': the_only_video
.get('vcodec'),
1891 'vbr': the_only_video
.get('vbr'),
1892 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1897 'acodec': the_only_audio
.get('acodec'),
1898 'abr': the_only_audio
.get('abr'),
1899 'asr': the_only_audio
.get('asr'),
1904 def _check_formats(formats
):
1905 if not check_formats
:
1909 self
.to_screen('[info] Testing format %s' % f
['format_id'])
1910 temp_file
= tempfile
.NamedTemporaryFile(
1911 suffix
='.tmp', delete
=False,
1912 dir=self
.get_output_path('temp') or None)
1915 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
1916 except (DownloadError
, IOError, OSError, ValueError) + network_exceptions
:
1919 if os
.path
.exists(temp_file
.name
):
1921 os
.remove(temp_file
.name
)
1923 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
1927 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
1929 def _build_selector_function(selector
):
1930 if isinstance(selector
, list): # ,
1931 fs
= [_build_selector_function(s
) for s
in selector
]
1933 def selector_function(ctx
):
1936 return selector_function
1938 elif selector
.type == GROUP
: # ()
1939 selector_function
= _build_selector_function(selector
.selector
)
1941 elif selector
.type == PICKFIRST
: # /
1942 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1944 def selector_function(ctx
):
1946 picked_formats
= list(f(ctx
))
1948 return picked_formats
1951 elif selector
.type == MERGE
: # +
1952 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1954 def selector_function(ctx
):
1955 for pair
in itertools
.product(
1956 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1959 elif selector
.type == SINGLE
: # atom
1960 format_spec
= selector
.selector
or 'best'
1962 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1963 if format_spec
== 'all':
1964 def selector_function(ctx
):
1965 yield from _check_formats(ctx
['formats'])
1966 elif format_spec
== 'mergeall':
1967 def selector_function(ctx
):
1968 formats
= list(_check_formats(ctx
['formats']))
1971 merged_format
= formats
[-1]
1972 for f
in formats
[-2::-1]:
1973 merged_format
= _merge((merged_format
, f
))
1977 format_fallback
, format_reverse
, format_idx
= False, True, 1
1979 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1981 if mobj
is not None:
1982 format_idx
= int_or_none(mobj
.group('n'), default
=1)
1983 format_reverse
= mobj
.group('bw')[0] == 'b'
1984 format_type
= (mobj
.group('type') or [None])[0]
1985 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
1986 format_modified
= mobj
.group('mod') is not None
1988 format_fallback
= not format_type
and not format_modified
# for b, w
1990 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
1991 if format_type
and format_modified
# bv*, ba*, wv*, wa*
1992 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
1993 if format_type
# bv, ba, wv, wa
1994 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
1995 if not format_modified
# b, w
1996 else lambda f
: True) # b*, w*
1997 filter_f
= lambda f
: _filter_f(f
) and (
1998 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2000 if format_spec
in self
._format
_selection
_exts
['audio']:
2001 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2002 elif format_spec
in self
._format
_selection
_exts
['video']:
2003 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2004 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2005 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2007 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2009 def selector_function(ctx
):
2010 formats
= list(ctx
['formats'])
2011 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2012 if format_fallback
and ctx
['incomplete_formats'] and not matches
:
2013 # for extractors with incomplete formats (audio only (soundcloud)
2014 # or video only (imgur)) best/worst will fallback to
2015 # best/worst {video,audio}-only format
2017 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2019 yield matches
[format_idx
- 1]
2023 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2025 def final_selector(ctx
):
2026 ctx_copy
= copy
.deepcopy(ctx
)
2027 for _filter
in filters
:
2028 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2029 return selector_function(ctx_copy
)
2030 return final_selector
2032 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
2034 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
2035 except tokenize
.TokenError
:
2036 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2038 class TokenIterator(object):
2039 def __init__(self
, tokens
):
2040 self
.tokens
= tokens
2047 if self
.counter
>= len(self
.tokens
):
2048 raise StopIteration()
2049 value
= self
.tokens
[self
.counter
]
2055 def restore_last_token(self
):
2058 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2059 return _build_selector_function(parsed_selector
)
2061 def _calc_headers(self
, info_dict
):
2062 res
= std_headers
.copy()
2064 add_headers
= info_dict
.get('http_headers')
2066 res
.update(add_headers
)
2068 cookies
= self
._calc
_cookies
(info_dict
)
2070 res
['Cookie'] = cookies
2072 if 'X-Forwarded-For' not in res
:
2073 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2074 if x_forwarded_for_ip
:
2075 res
['X-Forwarded-For'] = x_forwarded_for_ip
2079 def _calc_cookies(self
, info_dict
):
2080 pr
= sanitized_Request(info_dict
['url'])
2081 self
.cookiejar
.add_cookie_header(pr
)
2082 return pr
.get_header('Cookie')
2084 def _sanitize_thumbnails(self
, info_dict
):
2085 thumbnails
= info_dict
.get('thumbnails')
2086 if thumbnails
is None:
2087 thumbnail
= info_dict
.get('thumbnail')
2089 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
2091 thumbnails
.sort(key
=lambda t
: (
2092 t
.get('preference') if t
.get('preference') is not None else -1,
2093 t
.get('width') if t
.get('width') is not None else -1,
2094 t
.get('height') if t
.get('height') is not None else -1,
2095 t
.get('id') if t
.get('id') is not None else '',
2098 def thumbnail_tester():
2099 def test_thumbnail(t
):
2100 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2102 self
.urlopen(HEADRequest(t
['url']))
2103 except network_exceptions
as err
:
2104 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2107 return test_thumbnail
2109 for i
, t
in enumerate(thumbnails
):
2110 if t
.get('id') is None:
2112 if t
.get('width') and t
.get('height'):
2113 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2114 t
['url'] = sanitize_url(t
['url'])
2116 if self
.params
.get('check_formats'):
2117 info_dict
['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails
[::-1])).reverse()
2119 info_dict
['thumbnails'] = thumbnails
2121 def process_video_result(self
, info_dict
, download
=True):
2122 assert info_dict
.get('_type', 'video') == 'video'
2124 if 'id' not in info_dict
:
2125 raise ExtractorError('Missing "id" field in extractor result')
2126 if 'title' not in info_dict
:
2127 raise ExtractorError('Missing "title" field in extractor result',
2128 video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2130 def report_force_conversion(field
, field_not
, conversion
):
2131 self
.report_warning(
2132 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2133 % (field
, field_not
, conversion
))
2135 def sanitize_string_field(info
, string_field
):
2136 field
= info
.get(string_field
)
2137 if field
is None or isinstance(field
, compat_str
):
2139 report_force_conversion(string_field
, 'a string', 'string')
2140 info
[string_field
] = compat_str(field
)
2142 def sanitize_numeric_fields(info
):
2143 for numeric_field
in self
._NUMERIC
_FIELDS
:
2144 field
= info
.get(numeric_field
)
2145 if field
is None or isinstance(field
, compat_numeric_types
):
2147 report_force_conversion(numeric_field
, 'numeric', 'int')
2148 info
[numeric_field
] = int_or_none(field
)
2150 sanitize_string_field(info_dict
, 'id')
2151 sanitize_numeric_fields(info_dict
)
2153 if 'playlist' not in info_dict
:
2154 # It isn't part of a playlist
2155 info_dict
['playlist'] = None
2156 info_dict
['playlist_index'] = None
2158 self
._sanitize
_thumbnails
(info_dict
)
2160 thumbnail
= info_dict
.get('thumbnail')
2161 thumbnails
= info_dict
.get('thumbnails')
2163 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2165 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2167 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2168 info_dict
['display_id'] = info_dict
['id']
2170 if info_dict
.get('duration') is not None:
2171 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2173 for ts_key
, date_key
in (
2174 ('timestamp', 'upload_date'),
2175 ('release_timestamp', 'release_date'),
2177 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2178 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2179 # see http://bugs.python.org/issue1646728)
2181 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2182 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2183 except (ValueError, OverflowError, OSError):
2186 live_keys
= ('is_live', 'was_live')
2187 live_status
= info_dict
.get('live_status')
2188 if live_status
is None:
2189 for key
in live_keys
:
2190 if info_dict
.get(key
) is False:
2192 if info_dict
.get(key
):
2195 if all(info_dict
.get(key
) is False for key
in live_keys
):
2196 live_status
= 'not_live'
2198 info_dict
['live_status'] = live_status
2199 for key
in live_keys
:
2200 if info_dict
.get(key
) is None:
2201 info_dict
[key
] = (live_status
== key
)
2203 # Auto generate title fields corresponding to the *_number fields when missing
2204 # in order to always have clean titles. This is very common for TV series.
2205 for field
in ('chapter', 'season', 'episode'):
2206 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2207 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2209 for cc_kind
in ('subtitles', 'automatic_captions'):
2210 cc
= info_dict
.get(cc_kind
)
2212 for _
, subtitle
in cc
.items():
2213 for subtitle_format
in subtitle
:
2214 if subtitle_format
.get('url'):
2215 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2216 if subtitle_format
.get('ext') is None:
2217 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2219 automatic_captions
= info_dict
.get('automatic_captions')
2220 subtitles
= info_dict
.get('subtitles')
2222 info_dict
['requested_subtitles'] = self
.process_subtitles(
2223 info_dict
['id'], subtitles
, automatic_captions
)
2225 # We now pick which formats have to be downloaded
2226 if info_dict
.get('formats') is None:
2227 # There's only one format available
2228 formats
= [info_dict
]
2230 formats
= info_dict
['formats']
2232 info_dict
['__has_drm'] = any(f
.get('has_drm') for f
in formats
)
2233 if not self
.params
.get('allow_unplayable_formats'):
2234 formats
= [f
for f
in formats
if not f
.get('has_drm')]
2237 self
.raise_no_formats(info_dict
)
2239 def is_wellformed(f
):
2242 self
.report_warning(
2243 '"url" field is missing or empty - skipping format, '
2244 'there is an error in extractor')
2246 if isinstance(url
, bytes):
2247 sanitize_string_field(f
, 'url')
2250 # Filter out malformed formats for better extraction robustness
2251 formats
= list(filter(is_wellformed
, formats
))
2255 # We check that all the formats have the format and format_id fields
2256 for i
, format
in enumerate(formats
):
2257 sanitize_string_field(format
, 'format_id')
2258 sanitize_numeric_fields(format
)
2259 format
['url'] = sanitize_url(format
['url'])
2260 if not format
.get('format_id'):
2261 format
['format_id'] = compat_str(i
)
2263 # Sanitize format_id from characters used in format selector expression
2264 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2265 format_id
= format
['format_id']
2266 if format_id
not in formats_dict
:
2267 formats_dict
[format_id
] = []
2268 formats_dict
[format_id
].append(format
)
2270 # Make sure all formats have unique format_id
2271 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2272 for format_id
, ambiguous_formats
in formats_dict
.items():
2273 ambigious_id
= len(ambiguous_formats
) > 1
2274 for i
, format
in enumerate(ambiguous_formats
):
2276 format
['format_id'] = '%s-%d' % (format_id
, i
)
2277 if format
.get('ext') is None:
2278 format
['ext'] = determine_ext(format
['url']).lower()
2279 # Ensure there is no conflict between id and ext in format selection
2280 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2281 if format
['format_id'] != format
['ext'] and format
['format_id'] in common_exts
:
2282 format
['format_id'] = 'f%s' % format
['format_id']
2284 for i
, format
in enumerate(formats
):
2285 if format
.get('format') is None:
2286 format
['format'] = '{id} - {res}{note}'.format(
2287 id=format
['format_id'],
2288 res
=self
.format_resolution(format
),
2289 note
=format_field(format
, 'format_note', ' (%s)'),
2291 if format
.get('protocol') is None:
2292 format
['protocol'] = determine_protocol(format
)
2293 if format
.get('resolution') is None:
2294 format
['resolution'] = self
.format_resolution(format
, default
=None)
2295 if format
.get('dynamic_range') is None and format
.get('vcodec') != 'none':
2296 format
['dynamic_range'] = 'SDR'
2297 # Add HTTP headers, so that external programs can use them from the
2299 full_format_info
= info_dict
.copy()
2300 full_format_info
.update(format
)
2301 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
2302 # Remove private housekeeping stuff
2303 if '__x_forwarded_for_ip' in info_dict
:
2304 del info_dict
['__x_forwarded_for_ip']
2306 # TODO Central sorting goes here
2308 if not formats
or formats
[0] is not info_dict
:
2309 # only set the 'formats' fields if the original info_dict list them
2310 # otherwise we end up with a circular reference, the first (and unique)
2311 # element in the 'formats' field in info_dict is info_dict itself,
2312 # which can't be exported to json
2313 info_dict
['formats'] = formats
2315 info_dict
, _
= self
.pre_process(info_dict
)
2317 if self
.params
.get('list_thumbnails'):
2318 self
.list_thumbnails(info_dict
)
2319 if self
.params
.get('listformats'):
2320 if not info_dict
.get('formats') and not info_dict
.get('url'):
2321 self
.to_screen('%s has no formats' % info_dict
['id'])
2323 self
.list_formats(info_dict
)
2324 if self
.params
.get('listsubtitles'):
2325 if 'automatic_captions' in info_dict
:
2326 self
.list_subtitles(
2327 info_dict
['id'], automatic_captions
, 'automatic captions')
2328 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2329 list_only
= self
.params
.get('simulate') is None and (
2330 self
.params
.get('list_thumbnails') or self
.params
.get('listformats') or self
.params
.get('listsubtitles'))
2332 # Without this printing, -F --print-json will not work
2333 self
.__forced
_printings
(info_dict
, self
.prepare_filename(info_dict
), incomplete
=True)
2336 format_selector
= self
.format_selector
2337 if format_selector
is None:
2338 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2339 self
.write_debug('Default format spec: %s' % req_format
)
2340 format_selector
= self
.build_format_selector(req_format
)
2342 # While in format selection we may need to have an access to the original
2343 # format set in order to calculate some metrics or do some processing.
2344 # For now we need to be able to guess whether original formats provided
2345 # by extractor are incomplete or not (i.e. whether extractor provides only
2346 # video-only or audio-only formats) for proper formats selection for
2347 # extractors with such incomplete formats (see
2348 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2349 # Since formats may be filtered during format selection and may not match
2350 # the original formats the results may be incorrect. Thus original formats
2351 # or pre-calculated metrics should be passed to format selection routines
2353 # We will pass a context object containing all necessary additional data
2354 # instead of just formats.
2355 # This fixes incorrect format selection issue (see
2356 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2357 incomplete_formats
= (
2358 # All formats are video-only or
2359 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
2360 # all formats are audio-only
2361 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
2365 'incomplete_formats': incomplete_formats
,
2368 formats_to_download
= list(format_selector(ctx
))
2369 if not formats_to_download
:
2370 if not self
.params
.get('ignore_no_formats_error'):
2371 raise ExtractorError('Requested format is not available', expected
=True,
2372 video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2374 self
.report_warning('Requested format is not available')
2375 # Process what we can, even without any available formats.
2376 self
.process_info(dict(info_dict
))
2379 '[info] %s: Downloading %d format(s): %s' % (
2380 info_dict
['id'], len(formats_to_download
),
2381 ", ".join([f
['format_id'] for f
in formats_to_download
])))
2382 for fmt
in formats_to_download
:
2383 new_info
= dict(info_dict
)
2384 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2385 new_info
['__original_infodict'] = info_dict
2386 new_info
.update(fmt
)
2387 self
.process_info(new_info
)
2388 # We update the info dict with the selected best quality format (backwards compatibility)
2389 if formats_to_download
:
2390 info_dict
.update(formats_to_download
[-1])
2393 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2394 """Select the requested subtitles and their format"""
2396 if normal_subtitles
and self
.params
.get('writesubtitles'):
2397 available_subs
.update(normal_subtitles
)
2398 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2399 for lang
, cap_info
in automatic_captions
.items():
2400 if lang
not in available_subs
:
2401 available_subs
[lang
] = cap_info
2403 if (not self
.params
.get('writesubtitles') and not
2404 self
.params
.get('writeautomaticsub') or not
2408 all_sub_langs
= available_subs
.keys()
2409 if self
.params
.get('allsubtitles', False):
2410 requested_langs
= all_sub_langs
2411 elif self
.params
.get('subtitleslangs', False):
2412 # A list is used so that the order of languages will be the same as
2413 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2414 requested_langs
= []
2415 for lang_re
in self
.params
.get('subtitleslangs'):
2416 if lang_re
== 'all':
2417 requested_langs
.extend(all_sub_langs
)
2419 discard
= lang_re
[0] == '-'
2421 lang_re
= lang_re
[1:]
2422 current_langs
= filter(re
.compile(lang_re
+ '$').match
, all_sub_langs
)
2424 for lang
in current_langs
:
2425 while lang
in requested_langs
:
2426 requested_langs
.remove(lang
)
2428 requested_langs
.extend(current_langs
)
2429 requested_langs
= orderedSet(requested_langs
)
2430 elif 'en' in available_subs
:
2431 requested_langs
= ['en']
2433 requested_langs
= [list(all_sub_langs
)[0]]
2435 self
.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs
))
2437 formats_query
= self
.params
.get('subtitlesformat', 'best')
2438 formats_preference
= formats_query
.split('/') if formats_query
else []
2440 for lang
in requested_langs
:
2441 formats
= available_subs
.get(lang
)
2443 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
2445 for ext
in formats_preference
:
2449 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2455 self
.report_warning(
2456 'No subtitle format found matching "%s" for language %s, '
2457 'using %s' % (formats_query
, lang
, f
['ext']))
2461 def __forced_printings(self
, info_dict
, filename
, incomplete
):
2462 def print_mandatory(field
, actual_field
=None):
2463 if actual_field
is None:
2464 actual_field
= field
2465 if (self
.params
.get('force%s' % field
, False)
2466 and (not incomplete
or info_dict
.get(actual_field
) is not None)):
2467 self
.to_stdout(info_dict
[actual_field
])
2469 def print_optional(field
):
2470 if (self
.params
.get('force%s' % field
, False)
2471 and info_dict
.get(field
) is not None):
2472 self
.to_stdout(info_dict
[field
])
2474 info_dict
= info_dict
.copy()
2475 if filename
is not None:
2476 info_dict
['filename'] = filename
2477 if info_dict
.get('requested_formats') is not None:
2478 # For RTMP URLs, also include the playpath
2479 info_dict
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2480 elif 'url' in info_dict
:
2481 info_dict
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
2483 if self
.params
.get('forceprint') or self
.params
.get('forcejson'):
2484 self
.post_extract(info_dict
)
2485 for tmpl
in self
.params
.get('forceprint', []):
2486 mobj
= re
.match(r
'\w+(=?)$', tmpl
)
2487 if mobj
and mobj
.group(1):
2488 tmpl
= f
'{tmpl[:-1]} = %({tmpl[:-1]})s'
2490 tmpl
= '%({})s'.format(tmpl
)
2491 self
.to_stdout(self
.evaluate_outtmpl(tmpl
, info_dict
))
2493 print_mandatory('title')
2494 print_mandatory('id')
2495 print_mandatory('url', 'urls')
2496 print_optional('thumbnail')
2497 print_optional('description')
2498 print_optional('filename')
2499 if self
.params
.get('forceduration') and info_dict
.get('duration') is not None:
2500 self
.to_stdout(formatSeconds(info_dict
['duration']))
2501 print_mandatory('format')
2503 if self
.params
.get('forcejson'):
2504 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
2506 def dl(self
, name
, info
, subtitle
=False, test
=False):
2507 if not info
.get('url'):
2508 self
.raise_no_formats(info
, True)
2511 verbose
= self
.params
.get('verbose')
2514 'quiet': self
.params
.get('quiet') or not verbose
,
2516 'noprogress': not verbose
,
2518 'skip_unavailable_fragments': False,
2519 'keep_fragments': False,
2521 '_no_ytdl_file': True,
2524 params
= self
.params
2525 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
2527 for ph
in self
._progress
_hooks
:
2528 fd
.add_progress_hook(ph
)
2529 urls
= '", "'.join([f
['url'] for f
in info
.get('requested_formats', [])] or [info
['url']])
2530 self
.write_debug('Invoking downloader on "%s"' % urls
)
2532 new_info
= copy
.deepcopy(self
._copy
_infodict
(info
))
2533 if new_info
.get('http_headers') is None:
2534 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
2535 return fd
.download(name
, new_info
, subtitle
)
2537 def process_info(self
, info_dict
):
2538 """Process a single resolved IE result."""
2540 assert info_dict
.get('_type', 'video') == 'video'
2542 max_downloads
= self
.params
.get('max_downloads')
2543 if max_downloads
is not None:
2544 if self
._num
_downloads
>= int(max_downloads
):
2545 raise MaxDownloadsReached()
2547 # TODO: backward compatibility, to be removed
2548 info_dict
['fulltitle'] = info_dict
['title']
2550 if 'format' not in info_dict
and 'ext' in info_dict
:
2551 info_dict
['format'] = info_dict
['ext']
2553 if self
._match
_entry
(info_dict
) is not None:
2556 self
.post_extract(info_dict
)
2557 self
._num
_downloads
+= 1
2559 # info_dict['_filename'] needs to be set for backward compatibility
2560 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2561 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2565 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
2567 if self
.params
.get('simulate'):
2568 if self
.params
.get('force_write_download_archive', False):
2569 self
.record_download_archive(info_dict
)
2570 # Do nothing else if in simulate mode
2573 if full_filename
is None:
2575 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
2577 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
2580 if self
._write
_description
('video', info_dict
,
2581 self
.prepare_filename(info_dict
, 'description')) is None:
2584 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
2585 if sub_files
is None:
2587 files_to_move
.update(dict(sub_files
))
2589 thumb_files
= self
._write
_thumbnails
(
2590 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
2591 if thumb_files
is None:
2593 files_to_move
.update(dict(thumb_files
))
2595 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2596 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
2597 if _infojson_written
:
2598 info_dict
['__infojson_filename'] = infofn
2599 elif _infojson_written
is None:
2602 # Note: Annotations are deprecated
2604 if self
.params
.get('writeannotations', False):
2605 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2607 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
2609 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2610 self
.to_screen('[info] Video annotations are already present')
2611 elif not info_dict
.get('annotations'):
2612 self
.report_warning('There are no annotations to write.')
2615 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2616 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2617 annofile
.write(info_dict
['annotations'])
2618 except (KeyError, TypeError):
2619 self
.report_warning('There are no annotations to write.')
2620 except (OSError, IOError):
2621 self
.report_error('Cannot write annotations file: ' + annofn
)
2624 # Write internet shortcut files
2625 url_link
= webloc_link
= desktop_link
= False
2626 if self
.params
.get('writelink', False):
2627 if sys
.platform
== "darwin": # macOS.
2629 elif sys
.platform
.startswith("linux"):
2631 else: # if sys.platform in ['win32', 'cygwin']:
2633 if self
.params
.get('writeurllink', False):
2635 if self
.params
.get('writewebloclink', False):
2637 if self
.params
.get('writedesktoplink', False):
2640 if url_link
or webloc_link
or desktop_link
:
2641 if 'webpage_url' not in info_dict
:
2642 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2644 ascii_url
= iri_to_uri(info_dict
['webpage_url'])
2646 def _write_link_file(extension
, template
, newline
, embed_filename
):
2647 linkfn
= replace_extension(full_filename
, extension
, info_dict
.get('ext'))
2648 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2649 self
.to_screen('[info] Internet shortcut is already present')
2652 self
.to_screen('[info] Writing internet shortcut to: ' + linkfn
)
2653 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8', newline
=newline
) as linkfile
:
2654 template_vars
= {'url': ascii_url}
2656 template_vars
['filename'] = linkfn
[:-(len(extension
) + 1)]
2657 linkfile
.write(template
% template_vars
)
2658 except (OSError, IOError):
2659 self
.report_error('Cannot write internet shortcut ' + linkfn
)
2664 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE
, '\r\n', embed_filename
=False):
2667 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE
, '\n', embed_filename
=False):
2670 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE
, '\n', embed_filename
=True):
2674 info_dict
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
2675 except PostProcessingError
as err
:
2676 self
.report_error('Preprocessing: %s' % str(err
))
2679 must_record_download_archive
= False
2680 if self
.params
.get('skip_download', False):
2681 info_dict
['filepath'] = temp_filename
2682 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2683 info_dict
['__files_to_move'] = files_to_move
2684 info_dict
= self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
)
2687 info_dict
.setdefault('__postprocessors', [])
2690 def existing_file(*filepaths
):
2691 ext
= info_dict
.get('ext')
2692 final_ext
= self
.params
.get('final_ext', ext
)
2694 for file in orderedSet(filepaths
):
2695 if final_ext
!= ext
:
2696 converted
= replace_extension(file, final_ext
, ext
)
2697 if os
.path
.exists(encodeFilename(converted
)):
2698 existing_files
.append(converted
)
2699 if os
.path
.exists(encodeFilename(file)):
2700 existing_files
.append(file)
2702 if not existing_files
or self
.params
.get('overwrites', False):
2703 for file in orderedSet(existing_files
):
2704 self
.report_file_delete(file)
2705 os
.remove(encodeFilename(file))
2708 info_dict
['ext'] = os
.path
.splitext(existing_files
[0])[1][1:]
2709 return existing_files
[0]
2712 if info_dict
.get('requested_formats') is not None:
2714 def compatible_formats(formats
):
2715 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2716 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2717 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2718 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2722 exts
= set(format
.get('ext') for format
in formats
)
2724 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2727 for ext_sets
in COMPATIBLE_EXTS
:
2728 if ext_sets
.issuperset(exts
):
2730 # TODO: Check acodec/vcodec
2733 requested_formats
= info_dict
['requested_formats']
2734 old_ext
= info_dict
['ext']
2735 if self
.params
.get('merge_output_format') is None:
2736 if not compatible_formats(requested_formats
):
2737 info_dict
['ext'] = 'mkv'
2738 self
.report_warning(
2739 'Requested formats are incompatible for merge and will be merged into mkv')
2740 if (info_dict
['ext'] == 'webm'
2741 and info_dict
.get('thumbnails')
2742 # check with type instead of pp_key, __name__, or isinstance
2743 # since we dont want any custom PPs to trigger this
2744 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])):
2745 info_dict
['ext'] = 'mkv'
2746 self
.report_warning(
2747 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2748 new_ext
= info_dict
['ext']
2750 def correct_ext(filename
, ext
=new_ext
):
2753 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2755 os
.path
.splitext(filename
)[0]
2756 if filename_real_ext
in (old_ext
, new_ext
)
2758 return '%s.%s' % (filename_wo_ext
, ext
)
2760 # Ensure filename always has a correct extension for successful merge
2761 full_filename
= correct_ext(full_filename
)
2762 temp_filename
= correct_ext(temp_filename
)
2763 dl_filename
= existing_file(full_filename
, temp_filename
)
2764 info_dict
['__real_download'] = False
2766 if dl_filename
is not None:
2767 self
.report_file_already_downloaded(dl_filename
)
2768 elif get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-'):
2769 info_dict
['url'] = '\n'.join(f
['url'] for f
in requested_formats
)
2770 success
, real_download
= self
.dl(temp_filename
, info_dict
)
2771 info_dict
['__real_download'] = real_download
2774 merger
= FFmpegMergerPP(self
)
2775 if self
.params
.get('allow_unplayable_formats'):
2776 self
.report_warning(
2777 'You have requested merging of multiple formats '
2778 'while also allowing unplayable formats to be downloaded. '
2779 'The formats won\'t be merged to prevent data corruption.')
2780 elif not merger
.available
:
2781 self
.report_warning(
2782 'You have requested merging of multiple formats but ffmpeg is not installed. '
2783 'The formats won\'t be merged.')
2785 if temp_filename
== '-':
2786 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
)
2787 else 'but the formats are incompatible for simultaneous download' if merger
.available
2788 else 'but ffmpeg is not installed')
2789 self
.report_warning(
2790 f
'You have requested downloading multiple formats to stdout {reason}. '
2791 'The formats will be streamed one after the other')
2792 fname
= temp_filename
2793 for f
in requested_formats
:
2794 new_info
= dict(info_dict
)
2795 del new_info
['requested_formats']
2797 if temp_filename
!= '-':
2798 fname
= prepend_extension(
2799 correct_ext(temp_filename
, new_info
['ext']),
2800 'f%s' % f
['format_id'], new_info
['ext'])
2801 if not self
._ensure
_dir
_exists
(fname
):
2803 f
['filepath'] = fname
2804 downloaded
.append(fname
)
2805 partial_success
, real_download
= self
.dl(fname
, new_info
)
2806 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
2807 success
= success
and partial_success
2808 if merger
.available
and not self
.params
.get('allow_unplayable_formats'):
2809 info_dict
['__postprocessors'].append(merger
)
2810 info_dict
['__files_to_merge'] = downloaded
2811 # Even if there were no downloads, it is being merged only now
2812 info_dict
['__real_download'] = True
2814 for file in downloaded
:
2815 files_to_move
[file] = None
2817 # Just a single file
2818 dl_filename
= existing_file(full_filename
, temp_filename
)
2819 if dl_filename
is None or dl_filename
== temp_filename
:
2820 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2821 # So we should try to resume the download
2822 success
, real_download
= self
.dl(temp_filename
, info_dict
)
2823 info_dict
['__real_download'] = real_download
2825 self
.report_file_already_downloaded(dl_filename
)
2827 dl_filename
= dl_filename
or temp_filename
2828 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2830 except network_exceptions
as err
:
2831 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2833 except (OSError, IOError) as err
:
2834 raise UnavailableVideoError(err
)
2835 except (ContentTooShortError
, ) as err
:
2836 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2839 if success
and full_filename
!= '-':
2843 fixup_policy
= self
.params
.get('fixup')
2844 vid
= info_dict
['id']
2846 if fixup_policy
in ('ignore', 'never'):
2848 elif fixup_policy
== 'warn':
2850 elif fixup_policy
!= 'force':
2851 assert fixup_policy
in ('detect_or_warn', None)
2852 if not info_dict
.get('__real_download'):
2855 def ffmpeg_fixup(cndn
, msg
, cls
):
2859 self
.report_warning(f
'{vid}: {msg}')
2863 info_dict
['__postprocessors'].append(pp
)
2865 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
2867 stretched_ratio
= info_dict
.get('stretched_ratio')
2869 stretched_ratio
not in (1, None),
2870 f
'Non-uniform pixel ratio {stretched_ratio}',
2871 FFmpegFixupStretchedPP
)
2874 (info_dict
.get('requested_formats') is None
2875 and info_dict
.get('container') == 'm4a_dash'
2876 and info_dict
.get('ext') == 'm4a'),
2877 'writing DASH m4a. Only some players support this container',
2880 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
2881 downloader
= downloader
.__name
__ if downloader
else None
2882 ffmpeg_fixup(info_dict
.get('requested_formats') is None and downloader
== 'HlsFD',
2883 'malformed AAC bitstream detected', FFmpegFixupM3u8PP
)
2884 ffmpeg_fixup(downloader
== 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP
)
2885 ffmpeg_fixup(downloader
== 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP
)
2889 info_dict
= self
.post_process(dl_filename
, info_dict
, files_to_move
)
2890 except PostProcessingError
as err
:
2891 self
.report_error('Postprocessing: %s' % str(err
))
2894 for ph
in self
._post
_hooks
:
2895 ph(info_dict
['filepath'])
2896 except Exception as err
:
2897 self
.report_error('post hooks: %s' % str(err
))
2899 must_record_download_archive
= True
2901 if must_record_download_archive
or self
.params
.get('force_write_download_archive', False):
2902 self
.record_download_archive(info_dict
)
2903 max_downloads
= self
.params
.get('max_downloads')
2904 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
2905 raise MaxDownloadsReached()
2907 def download(self
, url_list
):
2908 """Download a given list of URLs."""
2909 outtmpl
= self
.outtmpl_dict
['default']
2910 if (len(url_list
) > 1
2912 and '%' not in outtmpl
2913 and self
.params
.get('max_downloads') != 1):
2914 raise SameFileError(outtmpl
)
2916 for url
in url_list
:
2918 # It also downloads the videos
2919 res
= self
.extract_info(
2920 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2921 except UnavailableVideoError
:
2922 self
.report_error('unable to download video')
2923 except MaxDownloadsReached
:
2924 self
.to_screen('[info] Maximum number of downloads reached')
2926 except ExistingVideoReached
:
2927 self
.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2929 except RejectedVideoReached
:
2930 self
.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2933 if self
.params
.get('dump_single_json', False):
2934 self
.post_extract(res
)
2935 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
2937 return self
._download
_retcode
2939 def download_with_info_file(self
, info_filename
):
2940 with contextlib
.closing(fileinput
.FileInput(
2941 [info_filename
], mode
='r',
2942 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2943 # FileInput doesn't have a read method, we can't call json.load
2944 info
= self
.sanitize_info(json
.loads('\n'.join(f
)), self
.params
.get('clean_infojson', True))
2946 self
.process_ie_result(info
, download
=True)
2947 except (DownloadError
, EntryNotInPlaylist
, ThrottledDownload
):
2948 webpage_url
= info
.get('webpage_url')
2949 if webpage_url
is not None:
2950 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2951 return self
.download([webpage_url
])
2954 return self
._download
_retcode
2957 def sanitize_info(info_dict
, remove_private_keys
=False):
2958 ''' Sanitize the infodict for converting to json '''
2959 if info_dict
is None:
2961 info_dict
.setdefault('epoch', int(time
.time()))
2962 remove_keys
= {'__original_infodict'}
# Always remove this since this may contain a copy of the entire dict
2963 keep_keys
= ['_type'], # Always keep this to facilitate load-info-json
2964 if remove_private_keys
:
2966 'requested_formats', 'requested_subtitles', 'requested_entries',
2967 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2969 empty_values
= (None, {}, [], set(), tuple())
2970 reject
= lambda k
, v
: k
not in keep_keys
and (
2971 k
.startswith('_') or k
in remove_keys
or v
in empty_values
)
2973 reject
= lambda k
, v
: k
in remove_keys
2974 filter_fn
= lambda obj
: (
2975 list(map(filter_fn
, obj
)) if isinstance(obj
, (LazyList
, list, tuple, set))
2976 else obj
if not isinstance(obj
, dict)
2977 else dict((k
, filter_fn(v
)) for k
, v
in obj
.items() if not reject(k
, v
)))
2978 return filter_fn(info_dict
)
2981 def filter_requested_info(info_dict
, actually_filter
=True):
2982 ''' Alias of sanitize_info for backward compatibility '''
2983 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
2985 def run_pp(self
, pp
, infodict
):
2986 files_to_delete
= []
2987 if '__files_to_move' not in infodict
:
2988 infodict
['__files_to_move'] = {}
2990 files_to_delete
, infodict
= pp
.run(infodict
)
2991 except PostProcessingError
as e
:
2992 # Must be True and not 'only_download'
2993 if self
.params
.get('ignoreerrors') is True:
2994 self
.report_error(e
)
2998 if not files_to_delete
:
3000 if self
.params
.get('keepvideo', False):
3001 for f
in files_to_delete
:
3002 infodict
['__files_to_move'].setdefault(f
, '')
3004 for old_filename
in set(files_to_delete
):
3005 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
3007 os
.remove(encodeFilename(old_filename
))
3008 except (IOError, OSError):
3009 self
.report_warning('Unable to remove downloaded original file')
3010 if old_filename
in infodict
['__files_to_move']:
3011 del infodict
['__files_to_move'][old_filename
]
3015 def post_extract(info_dict
):
3016 def actual_post_extract(info_dict
):
3017 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3018 for video_dict
in info_dict
.get('entries', {}):
3019 actual_post_extract(video_dict
or {})
3022 post_extractor
= info_dict
.get('__post_extractor') or (lambda: {})
3023 extra
= post_extractor().items()
3024 info_dict
.update(extra
)
3025 info_dict
.pop('__post_extractor', None)
3027 original_infodict
= info_dict
.get('__original_infodict') or {}
3028 original_infodict
.update(extra
)
3029 original_infodict
.pop('__post_extractor', None)
3031 actual_post_extract(info_dict
or {})
3033 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3034 info
= dict(ie_info
)
3035 info
['__files_to_move'] = files_to_move
or {}
3036 for pp
in self
._pps
[key
]:
3037 info
= self
.run_pp(pp
, info
)
3038 return info
, info
.pop('__files_to_move', None)
3040 def post_process(self
, filename
, ie_info
, files_to_move
=None):
3041 """Run all the postprocessors on the given file."""
3042 info
= dict(ie_info
)
3043 info
['filepath'] = filename
3044 info
['__files_to_move'] = files_to_move
or {}
3046 for pp
in ie_info
.get('__postprocessors', []) + self
._pps
['post_process']:
3047 info
= self
.run_pp(pp
, info
)
3048 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3049 del info
['__files_to_move']
3050 for pp
in self
._pps
['after_move']:
3051 info
= self
.run_pp(pp
, info
)
3054 def _make_archive_id(self
, info_dict
):
3055 video_id
= info_dict
.get('id')
3058 # Future-proof against any change in case
3059 # and backwards compatibility with prior versions
3060 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3061 if extractor
is None:
3062 url
= str_or_none(info_dict
.get('url'))
3065 # Try to find matching extractor for the URL and take its ie_key
3066 for ie_key
, ie
in self
._ies
.items():
3067 if ie
.suitable(url
):
3072 return '%s %s' % (extractor
.lower(), video_id
)
3074 def in_download_archive(self
, info_dict
):
3075 fn
= self
.params
.get('download_archive')
3079 vid_id
= self
._make
_archive
_id
(info_dict
)
3081 return False # Incomplete video information
3083 return vid_id
in self
.archive
3085 def record_download_archive(self
, info_dict
):
3086 fn
= self
.params
.get('download_archive')
3089 vid_id
= self
._make
_archive
_id
(info_dict
)
3091 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3092 archive_file
.write(vid_id
+ '\n')
3093 self
.archive
.add(vid_id
)
3096 def format_resolution(format
, default
='unknown'):
3097 is_images
= format
.get('vcodec') == 'none' and format
.get('acodec') == 'none'
3098 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3100 if format
.get('resolution') is not None:
3101 return format
['resolution']
3102 if format
.get('width') and format
.get('height'):
3103 res
= '%dx%d' % (format
['width'], format
['height'])
3104 elif format
.get('height'):
3105 res
= '%sp' % format
['height']
3106 elif format
.get('width'):
3107 res
= '%dx?' % format
['width']
3112 return f
'{res} images' if is_images
else res
3114 def _format_note(self
, fdict
):
3116 if fdict
.get('ext') in ['f4f', 'f4m']:
3117 res
+= '(unsupported) '
3118 if fdict
.get('language'):
3121 res
+= '[%s] ' % fdict
['language']
3122 if fdict
.get('format_note') is not None:
3123 res
+= fdict
['format_note'] + ' '
3124 if fdict
.get('tbr') is not None:
3125 res
+= '%4dk ' % fdict
['tbr']
3126 if fdict
.get('container') is not None:
3129 res
+= '%s container' % fdict
['container']
3130 if (fdict
.get('vcodec') is not None
3131 and fdict
.get('vcodec') != 'none'):
3134 res
+= fdict
['vcodec']
3135 if fdict
.get('vbr') is not None:
3137 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3139 if fdict
.get('vbr') is not None:
3140 res
+= '%4dk' % fdict
['vbr']
3141 if fdict
.get('fps') is not None:
3144 res
+= '%sfps' % fdict
['fps']
3145 if fdict
.get('acodec') is not None:
3148 if fdict
['acodec'] == 'none':
3151 res
+= '%-5s' % fdict
['acodec']
3152 elif fdict
.get('abr') is not None:
3156 if fdict
.get('abr') is not None:
3157 res
+= '@%3dk' % fdict
['abr']
3158 if fdict
.get('asr') is not None:
3159 res
+= ' (%5dHz)' % fdict
['asr']
3160 if fdict
.get('filesize') is not None:
3163 res
+= format_bytes(fdict
['filesize'])
3164 elif fdict
.get('filesize_approx') is not None:
3167 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3170 def list_formats(self
, info_dict
):
3171 formats
= info_dict
.get('formats', [info_dict
])
3173 'list-formats' not in self
.params
.get('compat_opts', [])
3174 and self
.params
.get('listformats_table', True) is not False)
3178 format_field(f
, 'format_id'),
3179 format_field(f
, 'ext'),
3180 self
.format_resolution(f
),
3181 format_field(f
, 'fps', '%d'),
3182 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3184 format_field(f
, 'filesize', ' %s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~%s', func
=format_bytes
),
3185 format_field(f
, 'tbr', '%4dk'),
3186 shorten_protocol_name(f
.get('protocol', '').replace("native", "n")),
3188 format_field(f
, 'vcodec', default
='unknown').replace('none', ''),
3189 format_field(f
, 'vbr', '%4dk'),
3190 format_field(f
, 'acodec', default
='unknown').replace('none', ''),
3191 format_field(f
, 'abr', '%3dk'),
3192 format_field(f
, 'asr', '%5dHz'),
3193 ', '.join(filter(None, (
3194 'UNSUPPORTED' if f
.get('ext') in ('f4f', 'f4m') else '',
3195 format_field(f
, 'language', '[%s]'),
3196 format_field(f
, 'format_note'),
3197 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3199 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3200 header_line
= ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO',
3201 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3205 format_field(f
, 'format_id'),
3206 format_field(f
, 'ext'),
3207 self
.format_resolution(f
),
3208 self
._format
_note
(f
)]
3210 if f
.get('preference') is None or f
['preference'] >= -1000]
3211 header_line
= ['format code', 'extension', 'resolution', 'note']
3214 '[info] Available formats for %s:' % info_dict
['id'])
3215 self
.to_stdout(render_table(
3216 header_line
, table
, delim
=new_format
, extraGap
=(0 if new_format
else 1), hideEmpty
=new_format
))
3218 def list_thumbnails(self
, info_dict
):
3219 thumbnails
= list(info_dict
.get('thumbnails'))
3221 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
3225 '[info] Thumbnails for %s:' % info_dict
['id'])
3226 self
.to_stdout(render_table(
3227 ['ID', 'width', 'height', 'URL'],
3228 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
3230 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3232 self
.to_screen('%s has no %s' % (video_id
, name
))
3235 'Available %s for %s:' % (name
, video_id
))
3237 def _row(lang
, formats
):
3238 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3239 if len(set(names
)) == 1:
3240 names
= [] if names
[0] == 'unknown' else names
[:1]
3241 return [lang
, ', '.join(names
), ', '.join(exts
)]
3243 self
.to_stdout(render_table(
3244 ['Language', 'Name', 'Formats'],
3245 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3248 def urlopen(self
, req
):
3249 """ Start an HTTP download """
3250 if isinstance(req
, compat_basestring
):
3251 req
= sanitized_Request(req
)
3252 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
3254 def print_debug_header(self
):
3255 if not self
.params
.get('verbose'):
3258 def get_encoding(stream
):
3259 ret
= getattr(stream
, 'encoding', 'missing (%s)' % type(stream
).__name
__)
3260 if not supports_terminal_sequences(stream
):
3264 encoding_str
= 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3265 locale
.getpreferredencoding(),
3266 sys
.getfilesystemencoding(),
3267 get_encoding(self
._screen
_file
), get_encoding(self
._err
_file
),
3268 self
.get_encoding())
3270 logger
= self
.params
.get('logger')
3272 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
3273 write_debug(encoding_str
)
3275 write_string(f
'[debug] {encoding_str}', encoding
=None)
3276 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
3278 source
= detect_variant()
3279 write_debug('yt-dlp version %s%s' % (__version__
, '' if source
== 'unknown' else f
' ({source})'))
3280 if not _LAZY_LOADER
:
3281 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
3282 write_debug('Lazy loading extractors is forcibly disabled')
3284 write_debug('Lazy loading extractors is disabled')
3285 if plugin_extractors
or plugin_postprocessors
:
3286 write_debug('Plugins: %s' % [
3287 '%s%s' % (klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
3288 for name
, klass
in itertools
.chain(plugin_extractors
.items(), plugin_postprocessors
.items())])
3289 if self
.params
.get('compat_opts'):
3290 write_debug('Compatibility options: %s' % ', '.join(self
.params
.get('compat_opts')))
3293 ['git', 'rev-parse', '--short', 'HEAD'],
3294 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
3295 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
3296 out
, err
= sp
.communicate_or_kill()
3297 out
= out
.decode().strip()
3298 if re
.match('[0-9a-f]+', out
):
3299 write_debug('Git HEAD: %s' % out
)
3306 def python_implementation():
3307 impl_name
= platform
.python_implementation()
3308 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
3309 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
3312 write_debug('Python version %s (%s %s) - %s' % (
3313 platform
.python_version(),
3314 python_implementation(),
3315 platform
.architecture()[0],
3318 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
3319 exe_versions
['rtmpdump'] = rtmpdump_version()
3320 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3321 exe_str
= ', '.join(
3322 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
3324 write_debug('exe versions: %s' % exe_str
)
3326 from .downloader
.websocket
import has_websockets
3327 from .postprocessor
.embedthumbnail
import has_mutagen
3328 from .cookies
import SQLITE_AVAILABLE
, KEYRING_AVAILABLE
3330 lib_str
= ', '.join(sorted(filter(None, (
3331 compat_pycrypto_AES
and compat_pycrypto_AES
.__name
__.split('.')[0],
3332 has_websockets
and 'websockets',
3333 has_mutagen
and 'mutagen',
3334 SQLITE_AVAILABLE
and 'sqlite',
3335 KEYRING_AVAILABLE
and 'keyring',
3337 write_debug('Optional libraries: %s' % lib_str
)
3340 for handler
in self
._opener
.handlers
:
3341 if hasattr(handler
, 'proxies'):
3342 proxy_map
.update(handler
.proxies
)
3343 write_debug(f
'Proxy map: {proxy_map}')
3346 if False and self
.params
.get('call_home'):
3347 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3348 write_debug('Public IP address: %s' % ipaddr
)
3349 latest_version
= self
.urlopen(
3350 'https://yt-dl.org/latest/version').read().decode('utf-8')
3351 if version_tuple(latest_version
) > version_tuple(__version__
):
3352 self
.report_warning(
3353 'You are using an outdated version (newest version: %s)! '
3354 'See https://yt-dl.org/update if you need help updating.' %
3357 def _setup_opener(self
):
3358 timeout_val
= self
.params
.get('socket_timeout')
3359 self
._socket
_timeout
= 20 if timeout_val
is None else float(timeout_val
)
3361 opts_cookiesfrombrowser
= self
.params
.get('cookiesfrombrowser')
3362 opts_cookiefile
= self
.params
.get('cookiefile')
3363 opts_proxy
= self
.params
.get('proxy')
3365 self
.cookiejar
= load_cookies(opts_cookiefile
, opts_cookiesfrombrowser
, self
)
3367 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
3368 if opts_proxy
is not None:
3369 if opts_proxy
== '':
3372 proxies
= {'http': opts_proxy, 'https': opts_proxy}
3374 proxies
= compat_urllib_request
.getproxies()
3375 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3376 if 'http' in proxies
and 'https' not in proxies
:
3377 proxies
['https'] = proxies
['http']
3378 proxy_handler
= PerRequestProxyHandler(proxies
)
3380 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
3381 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
3382 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
3383 redirect_handler
= YoutubeDLRedirectHandler()
3384 data_handler
= compat_urllib_request_DataHandler()
3386 # When passing our own FileHandler instance, build_opener won't add the
3387 # default FileHandler and allows us to disable the file protocol, which
3388 # can be used for malicious purposes (see
3389 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3390 file_handler
= compat_urllib_request
.FileHandler()
3392 def file_open(*args
, **kwargs
):
3393 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3394 file_handler
.file_open
= file_open
3396 opener
= compat_urllib_request
.build_opener(
3397 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
3399 # Delete the default user-agent header, which would otherwise apply in
3400 # cases where our custom HTTP handler doesn't come into play
3401 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3402 opener
.addheaders
= []
3403 self
._opener
= opener
3405 def encode(self
, s
):
3406 if isinstance(s
, bytes):
3407 return s
# Already encoded
3410 return s
.encode(self
.get_encoding())
3411 except UnicodeEncodeError as err
:
3412 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
3415 def get_encoding(self
):
3416 encoding
= self
.params
.get('encoding')
3417 if encoding
is None:
3418 encoding
= preferredencoding()
3421 def _write_info_json(self
, label
, ie_result
, infofn
):
3422 ''' Write infojson and returns True = written, False = skip, None = error '''
3423 if not self
.params
.get('writeinfojson'):
3426 self
.write_debug(f
'Skipping writing {label} infojson')
3428 elif not self
._ensure
_dir
_exists
(infofn
):
3430 elif not self
.params
.get('overwrites', True) and os
.path
.exists(infofn
):
3431 self
.to_screen(f
'[info] {label.title()} metadata is already present')
3433 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
3435 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
3436 except (OSError, IOError):
3437 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
3441 def _write_description(self
, label
, ie_result
, descfn
):
3442 ''' Write description and returns True = written, False = skip, None = error '''
3443 if not self
.params
.get('writedescription'):
3446 self
.write_debug(f
'Skipping writing {label} description')
3448 elif not self
._ensure
_dir
_exists
(descfn
):
3450 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
3451 self
.to_screen(f
'[info] {label.title()} description is already present')
3452 elif ie_result
.get('description') is None:
3453 self
.report_warning(f
'There\'s no {label} description to write')
3457 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
3458 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
3459 descfile
.write(ie_result
['description'])
3460 except (OSError, IOError):
3461 self
.report_error(f
'Cannot write {label} description file {descfn}')
3465 def _write_subtitles(self
, info_dict
, filename
):
3466 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3468 subtitles
= info_dict
.get('requested_subtitles')
3469 if not subtitles
or not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
3470 # subtitles download errors are already managed as troubles in relevant IE
3471 # that way it will silently go on when used with unsupporting IE
3474 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
3475 if not sub_filename_base
:
3476 self
.to_screen('[info] Skipping writing video subtitles')
3478 for sub_lang
, sub_info
in subtitles
.items():
3479 sub_format
= sub_info
['ext']
3480 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
3481 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
3482 if not self
.params
.get('overwrites', True) and os
.path
.exists(sub_filename
):
3483 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3484 sub_info
['filepath'] = sub_filename
3485 ret
.append((sub_filename
, sub_filename_final
))
3488 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
3489 if sub_info
.get('data') is not None:
3491 # Use newline='' to prevent conversion of newline characters
3492 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3493 with io
.open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
3494 subfile
.write(sub_info
['data'])
3495 sub_info
['filepath'] = sub_filename
3496 ret
.append((sub_filename
, sub_filename_final
))
3498 except (OSError, IOError):
3499 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
3503 sub_copy
= sub_info
.copy()
3504 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
3505 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
3506 sub_info
['filepath'] = sub_filename
3507 ret
.append((sub_filename
, sub_filename_final
))
3508 except (ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
3509 self
.report_warning(f
'Unable to download video subtitles for {sub_lang!r}: {err}')
3513 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
3514 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3515 write_all
= self
.params
.get('write_all_thumbnails', False)
3516 thumbnails
, ret
= [], []
3517 if write_all
or self
.params
.get('writethumbnail', False):
3518 thumbnails
= info_dict
.get('thumbnails') or []
3519 multiple
= write_all
and len(thumbnails
) > 1
3521 if thumb_filename_base
is None:
3522 thumb_filename_base
= filename
3523 if thumbnails
and not thumb_filename_base
:
3524 self
.write_debug(f
'Skipping writing {label} thumbnail')
3527 for t
in thumbnails
[::-1]:
3528 thumb_ext
= (f
'{t["id"]}.' if multiple
else '') + determine_ext(t
['url'], 'jpg')
3529 thumb_display_id
= f
'{label} thumbnail' + (f
' {t["id"]}' if multiple
else '')
3530 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
3531 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
3533 if not self
.params
.get('overwrites', True) and os
.path
.exists(thumb_filename
):
3534 ret
.append((thumb_filename
, thumb_filename_final
))
3535 t
['filepath'] = thumb_filename
3536 self
.to_screen(f
'[info] {thumb_display_id.title()} is already present')
3538 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
3540 uf
= self
.urlopen(t
['url'])
3541 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
3542 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
3543 shutil
.copyfileobj(uf
, thumbf
)
3544 ret
.append((thumb_filename
, thumb_filename_final
))
3545 t
['filepath'] = thumb_filename
3546 except network_exceptions
as err
:
3547 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
3548 if ret
and not write_all
: