4 from __future__
import absolute_import
, unicode_literals
29 from string
import ascii_letters
34 compat_get_terminal_size
,
40 compat_tokenize_tokenize
,
42 compat_urllib_request
,
43 compat_urllib_request_DataHandler
,
55 DOT_DESKTOP_LINK_TEMPLATE
,
56 DOT_URL_LINK_TEMPLATE
,
57 DOT_WEBLOC_LINK_TEMPLATE
,
80 PerRequestProxyHandler
,
85 register_socks_protocols
,
99 UnavailableVideoError
,
105 YoutubeDLCookieProcessor
,
107 YoutubeDLRedirectHandler
,
108 process_communicate_or_kill
,
110 from .cache
import Cache
111 from .extractor
import get_info_extractor
, gen_extractor_classes
, _LAZY_LOADER
, _PLUGIN_CLASSES
112 from .extractor
.openload
import PhantomJSwrapper
113 from .downloader
import get_suitable_downloader
114 from .downloader
.rtmp
import rtmpdump_version
115 from .postprocessor
import (
118 FFmpegFixupStretchedPP
,
121 # FFmpegSubtitlesConvertorPP,
123 MoveFilesAfterDownloadPP
,
125 from .version
import __version__
127 if compat_os_name
== 'nt':
131 class YoutubeDL(object):
134 YoutubeDL objects are the ones responsible of downloading the
135 actual video file and writing it to disk if the user has requested
136 it, among some other tasks. In most cases there should be one per
137 program. As, given a video URL, the downloader doesn't know how to
138 extract all the needed information, task that InfoExtractors do, it
139 has to pass the URL to one of them.
141 For this, YoutubeDL objects have a method that allows
142 InfoExtractors to be registered in a given order. When it is passed
143 a URL, the YoutubeDL object handles it to the first InfoExtractor it
144 finds that reports being able to handle it. The InfoExtractor extracts
145 all the information about the video or videos the URL refers to, and
146 YoutubeDL process the extracted information, possibly using a File
147 Downloader to download the video.
149 YoutubeDL objects accept a lot of parameters. In order not to saturate
150 the object constructor with arguments, it receives a dictionary of
151 options instead. These options are available through the params
152 attribute for the InfoExtractors to use. The YoutubeDL also
153 registers itself as the downloader in charge for the InfoExtractors
154 that are added to it, so this is a "mutual registration".
158 username: Username for authentication purposes.
159 password: Password for authentication purposes.
160 videopassword: Password for accessing a video.
161 ap_mso: Adobe Pass multiple-system operator identifier.
162 ap_username: Multiple-system operator account username.
163 ap_password: Multiple-system operator account password.
164 usenetrc: Use netrc for authentication instead.
165 verbose: Print additional info to stdout.
166 quiet: Do not print messages to stdout.
167 no_warnings: Do not print out anything for warnings.
168 forceurl: Force printing final URL.
169 forcetitle: Force printing title.
170 forceid: Force printing ID.
171 forcethumbnail: Force printing thumbnail URL.
172 forcedescription: Force printing description.
173 forcefilename: Force printing final filename.
174 forceduration: Force printing duration.
175 forcejson: Force printing info_dict as JSON.
176 dump_single_json: Force printing the info_dict of the whole playlist
177 (or video) as a single JSON line.
178 force_write_download_archive: Force writing download archive regardless
179 of 'skip_download' or 'simulate'.
180 simulate: Do not download the video files.
181 format: Video format code. see "FORMAT SELECTION" for more details.
182 format_sort: How to sort the video formats. see "Sorting Formats"
184 format_sort_force: Force the given format_sort. see "Sorting Formats"
186 allow_multiple_video_streams: Allow multiple video streams to be merged
188 allow_multiple_audio_streams: Allow multiple audio streams to be merged
190 outtmpl: Dictionary of templates for output names. Allowed keys
191 are 'default' and the keys of OUTTMPL_TYPES (in utils.py)
192 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
193 restrictfilenames: Do not allow "&" and spaces in file names
194 trim_file_name: Limit length of filename (extension excluded)
195 ignoreerrors: Do not stop on download errors
196 (Default True when running youtube-dlc,
197 but False when directly accessing YoutubeDL class)
198 force_generic_extractor: Force downloader to use the generic extractor
199 overwrites: Overwrite all video and metadata files if True,
200 overwrite only non-video files if None
201 and don't overwrite any file if False
202 playliststart: Playlist item to start at.
203 playlistend: Playlist item to end at.
204 playlist_items: Specific indices of playlist to download.
205 playlistreverse: Download playlist items in reverse order.
206 playlistrandom: Download playlist items in random order.
207 matchtitle: Download only matching titles.
208 rejecttitle: Reject downloads for matching titles.
209 logger: Log messages to a logging.Logger instance.
210 logtostderr: Log messages to stderr instead of stdout.
211 writedescription: Write the video description to a .description file
212 writeinfojson: Write the video description to a .info.json file
213 writecomments: Extract video comments. This will not be written to disk
214 unless writeinfojson is also given
215 writeannotations: Write the video annotations to a .annotations.xml file
216 writethumbnail: Write the thumbnail image to a file
217 allow_playlist_files: Whether to write playlists' description, infojson etc
218 also to disk when using the 'write*' options
219 write_all_thumbnails: Write all thumbnail formats to files
220 writelink: Write an internet shortcut file, depending on the
221 current platform (.url/.webloc/.desktop)
222 writeurllink: Write a Windows internet shortcut file (.url)
223 writewebloclink: Write a macOS internet shortcut file (.webloc)
224 writedesktoplink: Write a Linux internet shortcut file (.desktop)
225 writesubtitles: Write the video subtitles to a file
226 writeautomaticsub: Write the automatically generated subtitles to a file
227 allsubtitles: Downloads all the subtitles of the video
228 (requires writesubtitles or writeautomaticsub)
229 listsubtitles: Lists all available subtitles for the video
230 subtitlesformat: The format code for subtitles
231 subtitleslangs: List of languages of the subtitles to download
232 keepvideo: Keep the video file after post-processing
233 daterange: A DateRange object, download only if the upload_date is in the range.
234 skip_download: Skip the actual download of the video file
235 cachedir: Location of the cache files in the filesystem.
236 False to disable filesystem cache.
237 noplaylist: Download single video instead of a playlist if in doubt.
238 age_limit: An integer representing the user's age in years.
239 Unsuitable videos for the given age are skipped.
240 min_views: An integer representing the minimum view count the video
241 must have in order to not be skipped.
242 Videos without view count information are always
243 downloaded. None for no limit.
244 max_views: An integer representing the maximum view count.
245 Videos that are more popular than that are not
247 Videos without view count information are always
248 downloaded. None for no limit.
249 download_archive: File name of a file where all downloads are recorded.
250 Videos already present in the file are not downloaded
252 break_on_existing: Stop the download process after attempting to download a
253 file that is in the archive.
254 break_on_reject: Stop the download process when encountering a video that
255 has been filtered out.
256 cookiefile: File name where cookies should be read from and dumped to
257 nocheckcertificate:Do not verify SSL certificates
258 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
259 At the moment, this is only supported by YouTube.
260 proxy: URL of the proxy server to use
261 geo_verification_proxy: URL of the proxy to use for IP address verification
262 on geo-restricted sites.
263 socket_timeout: Time to wait for unresponsive hosts, in seconds
264 bidi_workaround: Work around buggy terminals without bidirectional text
265 support, using fridibi
266 debug_printtraffic:Print out sent and received HTTP traffic
267 include_ads: Download ads as well
268 default_search: Prepend this string if an input url is not valid.
269 'auto' for elaborate guessing
270 encoding: Use this encoding instead of the system-specified.
271 extract_flat: Do not resolve URLs, return the immediate result.
272 Pass in 'in_playlist' to only show this behavior for
274 postprocessors: A list of dictionaries, each with an entry
275 * key: The name of the postprocessor. See
276 youtube_dlc/postprocessor/__init__.py for a list.
277 * _after_move: Optional. If True, run this post_processor
278 after 'MoveFilesAfterDownload'
279 as well as any further keyword arguments for the
281 post_hooks: A list of functions that get called as the final step
282 for each video file, after all postprocessors have been
283 called. The filename will be passed as the only argument.
284 progress_hooks: A list of functions that get called on download
285 progress, with a dictionary with the entries
286 * status: One of "downloading", "error", or "finished".
287 Check this first and ignore unknown values.
289 If status is one of "downloading", or "finished", the
290 following properties may also be present:
291 * filename: The final filename (always present)
292 * tmpfilename: The filename we're currently writing to
293 * downloaded_bytes: Bytes on disk
294 * total_bytes: Size of the whole file, None if unknown
295 * total_bytes_estimate: Guess of the eventual file size,
297 * elapsed: The number of seconds since download started.
298 * eta: The estimated time in seconds, None if unknown
299 * speed: The download speed in bytes/second, None if
301 * fragment_index: The counter of the currently
302 downloaded video fragment.
303 * fragment_count: The number of fragments (= individual
304 files that will be merged)
306 Progress hooks are guaranteed to be called at least once
307 (with status "finished") if the download is successful.
308 merge_output_format: Extension to use when merging formats.
309 final_ext: Expected final extension; used to detect when the file was
310 already downloaded and converted. "merge_output_format" is
311 replaced by this extension when given
312 fixup: Automatically correct known faults of the file.
314 - "never": do nothing
315 - "warn": only emit a warning
316 - "detect_or_warn": check whether we can do anything
317 about it, warn otherwise (default)
318 source_address: Client-side IP address to bind to.
319 call_home: Boolean, true iff we are allowed to contact the
320 youtube-dlc servers for debugging.
321 sleep_interval: Number of seconds to sleep before each download when
322 used alone or a lower bound of a range for randomized
323 sleep before each download (minimum possible number
324 of seconds to sleep) when used along with
326 max_sleep_interval:Upper bound of a range for randomized sleep before each
327 download (maximum possible number of seconds to sleep).
328 Must only be used along with sleep_interval.
329 Actual sleep time will be a random float from range
330 [sleep_interval; max_sleep_interval].
331 listformats: Print an overview of available video formats and exit.
332 list_thumbnails: Print a table of all thumbnails and exit.
333 match_filter: A function that gets called with the info_dict of
335 If it returns a message, the video is ignored.
336 If it returns None, the video is downloaded.
337 match_filter_func in utils.py is one example for this.
338 no_color: Do not emit color codes in output.
339 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
342 Two-letter ISO 3166-2 country code that will be used for
343 explicit geographic restriction bypassing via faking
344 X-Forwarded-For HTTP header
346 IP range in CIDR notation that will be used similarly to
349 The following options determine which downloader is picked:
350 external_downloader: Executable of the external downloader to call.
351 None or unset for standard (built-in) downloader.
352 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
353 if True, otherwise use ffmpeg/avconv if False, otherwise
354 use downloader suggested by extractor if None.
356 The following parameters are not used by YoutubeDL itself, they are used by
357 the downloader (see youtube_dlc/downloader/common.py):
358 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
359 noresizebuffer, retries, continuedl, noprogress, consoletitle,
360 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
363 The following options are used by the post processors:
364 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
365 otherwise prefer ffmpeg. (avconv support is deprecated)
366 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
367 to the binary or its containing directory.
368 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
369 and a list of additional command-line arguments for the
370 postprocessor/executable. The dict can also have "PP+EXE" keys
371 which are used when the given exe is used by the given PP.
372 Use 'default' as the name for arguments to passed to all PP
373 The following options are used by the Youtube extractor:
374 youtube_include_dash_manifest: If True (default), DASH manifests and related
375 data will be downloaded and processed by extractor.
376 You can reduce network I/O by disabling it if you don't
380 _NUMERIC_FIELDS
= set((
381 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
382 'timestamp', 'upload_year', 'upload_month', 'upload_day',
383 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
384 'average_rating', 'comment_count', 'age_limit',
385 'start_time', 'end_time',
386 'chapter_number', 'season_number', 'episode_number',
387 'track_number', 'disc_number', 'release_year',
393 _pps
= {'beforedl': [], 'aftermove': [], 'normal': []}
394 __prepare_filename_warned
= False
395 _download_retcode
= None
396 _num_downloads
= None
398 _playlist_urls
= set()
401 def __init__(self
, params
=None, auto_init
=True):
402 """Create a FileDownloader object with the given options."""
406 self
._ies
_instances
= {}
407 self
._pps
= {'beforedl': [], 'aftermove': [], 'normal': []}
408 self
.__prepare
_filename
_warned
= False
409 self
._post
_hooks
= []
410 self
._progress
_hooks
= []
411 self
._download
_retcode
= 0
412 self
._num
_downloads
= 0
413 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
414 self
._err
_file
= sys
.stderr
417 'nocheckcertificate': False,
419 self
.params
.update(params
)
420 self
.cache
= Cache(self
)
423 """Preload the archive, if any is specified"""
424 def preload_download_archive(self
):
425 fn
= self
.params
.get('download_archive')
429 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
430 for line
in archive_file
:
431 self
.archive
.add(line
.strip())
432 except IOError as ioe
:
433 if ioe
.errno
!= errno
.ENOENT
:
438 def check_deprecated(param
, option
, suggestion
):
439 if self
.params
.get(param
) is not None:
441 '%s is deprecated. Use %s instead.' % (option
, suggestion
))
445 if self
.params
.get('verbose'):
446 self
.to_stdout('[debug] Loading archive file %r' % self
.params
.get('download_archive'))
448 preload_download_archive(self
)
450 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
451 if self
.params
.get('geo_verification_proxy') is None:
452 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
454 if self
.params
.get('final_ext'):
455 if self
.params
.get('merge_output_format'):
456 self
.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
457 self
.params
['merge_output_format'] = self
.params
['final_ext']
459 if 'overwrites' in self
.params
and self
.params
['overwrites'] is None:
460 del self
.params
['overwrites']
462 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
463 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
464 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
466 if params
.get('bidi_workaround', False):
469 master
, slave
= pty
.openpty()
470 width
= compat_get_terminal_size().columns
474 width_args
= ['-w', str(width
)]
476 stdin
=subprocess
.PIPE
,
478 stderr
=self
._err
_file
)
480 self
._output
_process
= subprocess
.Popen(
481 ['bidiv'] + width_args
, **sp_kwargs
484 self
._output
_process
= subprocess
.Popen(
485 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
486 self
._output
_channel
= os
.fdopen(master
, 'rb')
487 except OSError as ose
:
488 if ose
.errno
== errno
.ENOENT
:
489 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
493 if (sys
.platform
!= 'win32'
494 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
495 and not params
.get('restrictfilenames', False)):
496 # Unicode filesystem API will throw errors (#1474, #13027)
498 'Assuming --restrict-filenames since file system encoding '
499 'cannot encode all characters. '
500 'Set the LC_ALL environment variable to fix this.')
501 self
.params
['restrictfilenames'] = True
503 self
.outtmpl_dict
= self
.parse_outtmpl()
508 self
.print_debug_header()
509 self
.add_default_info_extractors()
511 for pp_def_raw
in self
.params
.get('postprocessors', []):
512 pp_class
= get_postprocessor(pp_def_raw
['key'])
513 pp_def
= dict(pp_def_raw
)
516 when
= pp_def
['when']
520 pp
= pp_class(self
, **compat_kwargs(pp_def
))
521 self
.add_post_processor(pp
, when
=when
)
523 for ph
in self
.params
.get('post_hooks', []):
524 self
.add_post_hook(ph
)
526 for ph
in self
.params
.get('progress_hooks', []):
527 self
.add_progress_hook(ph
)
529 register_socks_protocols()
531 def warn_if_short_id(self
, argv
):
532 # short YouTube ID starting with dash?
534 i
for i
, a
in enumerate(argv
)
535 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
539 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
540 + ['--'] + [argv
[i
] for i
in idxs
]
543 'Long argument string detected. '
544 'Use -- to separate parameters and URLs, like this:\n%s\n' %
545 args_to_str(correct_argv
))
547 def add_info_extractor(self
, ie
):
548 """Add an InfoExtractor object to the end of the list."""
550 if not isinstance(ie
, type):
551 self
._ies
_instances
[ie
.ie_key()] = ie
552 ie
.set_downloader(self
)
554 def get_info_extractor(self
, ie_key
):
556 Get an instance of an IE with name ie_key, it will try to get one from
557 the _ies list, if there's no instance it will create a new one and add
558 it to the extractor list.
560 ie
= self
._ies
_instances
.get(ie_key
)
562 ie
= get_info_extractor(ie_key
)()
563 self
.add_info_extractor(ie
)
566 def add_default_info_extractors(self
):
568 Add the InfoExtractors returned by gen_extractors to the end of the list
570 for ie
in gen_extractor_classes():
571 self
.add_info_extractor(ie
)
573 def add_post_processor(self
, pp
, when
='normal'):
574 """Add a PostProcessor object to the end of the chain."""
575 self
._pps
[when
].append(pp
)
576 pp
.set_downloader(self
)
578 def add_post_hook(self
, ph
):
579 """Add the post hook"""
580 self
._post
_hooks
.append(ph
)
582 def add_progress_hook(self
, ph
):
583 """Add the progress hook (currently only for the file downloader)"""
584 self
._progress
_hooks
.append(ph
)
586 def _bidi_workaround(self
, message
):
587 if not hasattr(self
, '_output_channel'):
590 assert hasattr(self
, '_output_process')
591 assert isinstance(message
, compat_str
)
592 line_count
= message
.count('\n') + 1
593 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
594 self
._output
_process
.stdin
.flush()
595 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
596 for _
in range(line_count
))
597 return res
[:-len('\n')]
599 def to_screen(self
, message
, skip_eol
=False):
600 """Print message to stdout if not in quiet mode."""
601 return self
.to_stdout(message
, skip_eol
, check_quiet
=True)
603 def _write_string(self
, s
, out
=None):
604 write_string(s
, out
=out
, encoding
=self
.params
.get('encoding'))
606 def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False):
607 """Print message to stdout if not in quiet mode."""
608 if self
.params
.get('logger'):
609 self
.params
['logger'].debug(message
)
610 elif not check_quiet
or not self
.params
.get('quiet', False):
611 message
= self
._bidi
_workaround
(message
)
612 terminator
= ['\n', ''][skip_eol
]
613 output
= message
+ terminator
615 self
._write
_string
(output
, self
._screen
_file
)
617 def to_stderr(self
, message
):
618 """Print message to stderr."""
619 assert isinstance(message
, compat_str
)
620 if self
.params
.get('logger'):
621 self
.params
['logger'].error(message
)
623 message
= self
._bidi
_workaround
(message
)
624 output
= message
+ '\n'
625 self
._write
_string
(output
, self
._err
_file
)
627 def to_console_title(self
, message
):
628 if not self
.params
.get('consoletitle', False):
630 if compat_os_name
== 'nt':
631 if ctypes
.windll
.kernel32
.GetConsoleWindow():
632 # c_wchar_p() might not be necessary if `message` is
633 # already of type unicode()
634 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
635 elif 'TERM' in os
.environ
:
636 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
638 def save_console_title(self
):
639 if not self
.params
.get('consoletitle', False):
641 if self
.params
.get('simulate', False):
643 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
644 # Save the title on stack
645 self
._write
_string
('\033[22;0t', self
._screen
_file
)
647 def restore_console_title(self
):
648 if not self
.params
.get('consoletitle', False):
650 if self
.params
.get('simulate', False):
652 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
653 # Restore the title from stack
654 self
._write
_string
('\033[23;0t', self
._screen
_file
)
657 self
.save_console_title()
660 def __exit__(self
, *args
):
661 self
.restore_console_title()
663 if self
.params
.get('cookiefile') is not None:
664 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
666 def trouble(self
, message
=None, tb
=None):
667 """Determine action to take when a download problem appears.
669 Depending on if the downloader has been configured to ignore
670 download errors or not, this method may throw an exception or
671 not when errors are found, after printing the message.
673 tb, if given, is additional traceback information.
675 if message
is not None:
676 self
.to_stderr(message
)
677 if self
.params
.get('verbose'):
679 if sys
.exc_info()[0]: # if .trouble has been called from an except block
681 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
682 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
683 tb
+= encode_compat_str(traceback
.format_exc())
685 tb_data
= traceback
.format_list(traceback
.extract_stack())
686 tb
= ''.join(tb_data
)
688 if not self
.params
.get('ignoreerrors', False):
689 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
690 exc_info
= sys
.exc_info()[1].exc_info
692 exc_info
= sys
.exc_info()
693 raise DownloadError(message
, exc_info
)
694 self
._download
_retcode
= 1
696 def report_warning(self
, message
):
698 Print the message to stderr, it will be prefixed with 'WARNING:'
699 If stderr is a tty file the 'WARNING:' will be colored
701 if self
.params
.get('logger') is not None:
702 self
.params
['logger'].warning(message
)
704 if self
.params
.get('no_warnings'):
706 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
707 _msg_header
= '\033[0;33mWARNING:\033[0m'
709 _msg_header
= 'WARNING:'
710 warning_message
= '%s %s' % (_msg_header
, message
)
711 self
.to_stderr(warning_message
)
713 def report_error(self
, message
, tb
=None):
715 Do the same as trouble, but prefixes the message with 'ERROR:', colored
716 in red if stderr is a tty file.
718 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
719 _msg_header
= '\033[0;31mERROR:\033[0m'
721 _msg_header
= 'ERROR:'
722 error_message
= '%s %s' % (_msg_header
, message
)
723 self
.trouble(error_message
, tb
)
725 def report_file_already_downloaded(self
, file_name
):
726 """Report file has already been fully downloaded."""
728 self
.to_screen('[download] %s has already been downloaded' % file_name
)
729 except UnicodeEncodeError:
730 self
.to_screen('[download] The file has already been downloaded')
732 def report_file_delete(self
, file_name
):
733 """Report that existing file will be deleted."""
735 self
.to_screen('Deleting existing file %s' % file_name
)
736 except UnicodeEncodeError:
737 self
.to_screen('Deleting existing file')
739 def parse_outtmpl(self
):
740 outtmpl_dict
= self
.params
.get('outtmpl', {})
741 if not isinstance(outtmpl_dict
, dict):
742 outtmpl_dict
= {'default': outtmpl_dict}
743 outtmpl_dict
.update({
744 k
: v
for k
, v
in DEFAULT_OUTTMPL
.items()
745 if not outtmpl_dict
.get(k
)})
746 for key
, val
in outtmpl_dict
.items():
747 if isinstance(val
, bytes):
749 'Parameter outtmpl is bytes, but should be a unicode string. '
750 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
753 def _prepare_filename(self
, info_dict
, tmpl_type
='default'):
755 template_dict
= dict(info_dict
)
757 template_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
758 formatSeconds(info_dict
['duration'], '-')
759 if info_dict
.get('duration', None) is not None
762 template_dict
['epoch'] = int(time
.time())
763 autonumber_size
= self
.params
.get('autonumber_size')
764 if autonumber_size
is None:
766 template_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
767 if template_dict
.get('resolution') is None:
768 if template_dict
.get('width') and template_dict
.get('height'):
769 template_dict
['resolution'] = '%dx%d' % (template_dict
['width'], template_dict
['height'])
770 elif template_dict
.get('height'):
771 template_dict
['resolution'] = '%sp' % template_dict
['height']
772 elif template_dict
.get('width'):
773 template_dict
['resolution'] = '%dx?' % template_dict
['width']
775 sanitize
= lambda k
, v
: sanitize_filename(
777 restricted
=self
.params
.get('restrictfilenames'),
778 is_id
=(k
== 'id' or k
.endswith('_id')))
779 template_dict
= dict((k
, v
if isinstance(v
, compat_numeric_types
) else sanitize(k
, v
))
780 for k
, v
in template_dict
.items()
781 if v
is not None and not isinstance(v
, (list, tuple, dict)))
782 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
783 template_dict
= collections
.defaultdict(lambda: na
, template_dict
)
785 outtmpl
= self
.outtmpl_dict
.get(tmpl_type
, self
.outtmpl_dict
['default'])
786 force_ext
= OUTTMPL_TYPES
.get(tmpl_type
)
788 # For fields playlist_index and autonumber convert all occurrences
789 # of %(field)s to %(field)0Nd for backward compatibility
790 field_size_compat_map
= {
791 'playlist_index': len(str(template_dict
['n_entries'])),
792 'autonumber': autonumber_size
,
794 FIELD_SIZE_COMPAT_RE
= r
'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
795 mobj
= re
.search(FIELD_SIZE_COMPAT_RE
, outtmpl
)
798 FIELD_SIZE_COMPAT_RE
,
799 r
'%%(\1)0%dd' % field_size_compat_map
[mobj
.group('field')],
802 # As of [1] format syntax is:
803 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
804 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
808 \({0}\) # mapping key
809 (?:[#0\-+ ]+)? # conversion flags (optional)
810 (?:\d+)? # minimum field width (optional)
811 (?:\.\d+)? # precision (optional)
812 [hlL]? # length modifier (optional)
813 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
816 numeric_fields
= list(self
._NUMERIC
_FIELDS
)
819 FORMAT_DATE_RE
= FORMAT_RE
.format(r
'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
820 for mobj
in re
.finditer(FORMAT_DATE_RE
, outtmpl
):
821 conv_type
, field
, frmt
, key
= mobj
.group('type', 'field', 'format', 'key')
822 if key
in template_dict
:
824 value
= strftime_or_none(template_dict
.get(field
), frmt
, na
)
825 if conv_type
in 'crs': # string
826 value
= sanitize(field
, value
)
828 numeric_fields
.append(key
)
829 value
= float_or_none(value
, default
=None)
830 if value
is not None:
831 template_dict
[key
] = value
833 # Missing numeric fields used together with integer presentation types
834 # in format specification will break the argument substitution since
835 # string NA placeholder is returned for missing fields. We will patch
836 # output template for missing fields to meet string presentation type.
837 for numeric_field
in numeric_fields
:
838 if numeric_field
not in template_dict
:
840 FORMAT_RE
.format(re
.escape(numeric_field
)),
841 r
'%({0})s'.format(numeric_field
), outtmpl
)
843 # expand_path translates '%%' into '%' and '$$' into '$'
844 # correspondingly that is not what we want since we need to keep
845 # '%%' intact for template dict substitution step. Working around
846 # with boundary-alike separator hack.
847 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
848 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
850 # outtmpl should be expand_path'ed before template dict substitution
851 # because meta fields may contain env variables we don't want to
852 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
853 # title "Hello $PATH", we don't want `$PATH` to be expanded.
854 filename
= expand_path(outtmpl
).replace(sep
, '') % template_dict
856 if force_ext
is not None:
857 filename
= replace_extension(filename
, force_ext
, template_dict
.get('ext'))
859 # https://github.com/blackjack4494/youtube-dlc/issues/85
860 trim_file_name
= self
.params
.get('trim_file_name', False)
862 fn_groups
= filename
.rsplit('.')
865 if len(fn_groups
) > 2:
866 sub_ext
= fn_groups
[-2]
867 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
869 # Temporary fix for #4787
870 # 'Treat' all problem characters by passing filename through preferredencoding
871 # to workaround encoding issues with subprocess on python2 @ Windows
872 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
873 filename
= encodeFilename(filename
, True).decode(preferredencoding())
874 filename
= sanitize_path(filename
)
877 except ValueError as err
:
878 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
881 def prepare_filename(self
, info_dict
, dir_type
='', warn
=False):
882 """Generate the output filename."""
883 paths
= self
.params
.get('paths', {})
884 assert isinstance(paths
, dict)
885 filename
= self
._prepare
_filename
(info_dict
, dir_type
or 'default')
887 if warn
and not self
.__prepare
_filename
_warned
:
890 elif filename
== '-':
891 self
.report_warning('--paths is ignored when an outputting to stdout')
892 elif os
.path
.isabs(filename
):
893 self
.report_warning('--paths is ignored since an absolute path is given in output template')
894 self
.__prepare
_filename
_warned
= True
895 if filename
== '-' or not filename
:
898 homepath
= expand_path(paths
.get('home', '').strip())
899 assert isinstance(homepath
, compat_str
)
900 subdir
= expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else ''
901 assert isinstance(subdir
, compat_str
)
902 return sanitize_path(os
.path
.join(homepath
, subdir
, filename
))
904 def _match_entry(self
, info_dict
, incomplete
):
905 """ Returns None if the file should be downloaded """
908 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
909 if 'title' in info_dict
:
910 # This can happen when we're just evaluating the playlist
911 title
= info_dict
['title']
912 matchtitle
= self
.params
.get('matchtitle', False)
914 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
915 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
916 rejecttitle
= self
.params
.get('rejecttitle', False)
918 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
919 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
920 date
= info_dict
.get('upload_date')
922 dateRange
= self
.params
.get('daterange', DateRange())
923 if date
not in dateRange
:
924 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
925 view_count
= info_dict
.get('view_count')
926 if view_count
is not None:
927 min_views
= self
.params
.get('min_views')
928 if min_views
is not None and view_count
< min_views
:
929 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
930 max_views
= self
.params
.get('max_views')
931 if max_views
is not None and view_count
> max_views
:
932 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
933 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
934 return 'Skipping "%s" because it is age restricted' % video_title
935 if self
.in_download_archive(info_dict
):
936 return '%s has already been recorded in archive' % video_title
939 match_filter
= self
.params
.get('match_filter')
940 if match_filter
is not None:
941 ret
= match_filter(info_dict
)
946 reason
= check_filter()
947 if reason
is not None:
948 self
.to_screen('[download] ' + reason
)
949 if reason
.endswith('has already been recorded in the archive') and self
.params
.get('break_on_existing', False):
950 raise ExistingVideoReached()
951 elif self
.params
.get('break_on_reject', False):
952 raise RejectedVideoReached()
956 def add_extra_info(info_dict
, extra_info
):
957 '''Set the keys from extra_info in info dict if they are missing'''
958 for key
, value
in extra_info
.items():
959 info_dict
.setdefault(key
, value
)
961 def extract_info(self
, url
, download
=True, ie_key
=None, info_dict
=None, extra_info
={},
962 process
=True, force_generic_extractor
=False):
964 Returns a list with a dictionary for each video we find.
965 If 'download', also downloads the videos.
966 extra_info is a dict containing the extra values to add to each result
969 if not ie_key
and force_generic_extractor
:
973 ies
= [self
.get_info_extractor(ie_key
)]
978 if not ie
.suitable(url
):
982 ie
= self
.get_info_extractor(ie_key
)
984 self
.report_warning('The program functionality for this site has been marked as broken, '
985 'and will probably not work.')
988 temp_id
= str_or_none(
989 ie
.extract_id(url
) if callable(getattr(ie
, 'extract_id', None))
990 else ie
._match
_id
(url
))
991 except (AssertionError, IndexError, AttributeError):
993 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
994 self
.to_screen("[%s] %s: has already been recorded in archive" % (
997 return self
.__extract
_info
(url
, ie
, download
, extra_info
, process
, info_dict
)
999 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
1001 def __handle_extraction_exceptions(func
):
1002 def wrapper(self
, *args
, **kwargs
):
1004 return func(self
, *args
, **kwargs
)
1005 except GeoRestrictedError
as e
:
1008 msg
+= '\nThis video is available in %s.' % ', '.join(
1009 map(ISO3166Utils
.short2full
, e
.countries
))
1010 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1011 self
.report_error(msg
)
1012 except ExtractorError
as e
: # An error we somewhat expected
1013 self
.report_error(compat_str(e
), e
.format_traceback())
1014 except (MaxDownloadsReached
, ExistingVideoReached
, RejectedVideoReached
):
1016 except Exception as e
:
1017 if self
.params
.get('ignoreerrors', False):
1018 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1023 @__handle_extraction_exceptions
1024 def __extract_info(self
, url
, ie
, download
, extra_info
, process
, info_dict
):
1025 ie_result
= ie
.extract(url
)
1026 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1028 if isinstance(ie_result
, list):
1029 # Backwards compatibility: old IE result format
1031 '_type': 'compat_list',
1032 'entries': ie_result
,
1035 if info_dict
.get('id'):
1036 ie_result
['id'] = info_dict
['id']
1037 if info_dict
.get('title'):
1038 ie_result
['title'] = info_dict
['title']
1039 self
.add_default_extra_info(ie_result
, ie
, url
)
1041 return self
.process_ie_result(ie_result
, download
, extra_info
)
1045 def add_default_extra_info(self
, ie_result
, ie
, url
):
1046 self
.add_extra_info(ie_result
, {
1047 'extractor': ie
.IE_NAME
,
1049 'webpage_url_basename': url_basename(url
),
1050 'extractor_key': ie
.ie_key(),
1053 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
1055 Take the result of the ie(may be modified) and resolve all unresolved
1056 references (URLs, playlist items).
1058 It will also download the videos if 'download'.
1059 Returns the resolved ie_result.
1061 result_type
= ie_result
.get('_type', 'video')
1063 if result_type
in ('url', 'url_transparent'):
1064 ie_result
['url'] = sanitize_url(ie_result
['url'])
1065 extract_flat
= self
.params
.get('extract_flat', False)
1066 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1067 or extract_flat
is True):
1068 self
.__forced
_printings
(ie_result
, self
.prepare_filename(ie_result
), incomplete
=True)
1071 if result_type
== 'video':
1072 self
.add_extra_info(ie_result
, extra_info
)
1073 return self
.process_video_result(ie_result
, download
=download
)
1074 elif result_type
== 'url':
1075 # We have to add extra_info to the results because it may be
1076 # contained in a playlist
1077 return self
.extract_info(ie_result
['url'],
1078 download
, info_dict
=ie_result
,
1079 ie_key
=ie_result
.get('ie_key'),
1080 extra_info
=extra_info
)
1081 elif result_type
== 'url_transparent':
1082 # Use the information from the embedding page
1083 info
= self
.extract_info(
1084 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1085 extra_info
=extra_info
, download
=False, process
=False)
1087 # extract_info may return None when ignoreerrors is enabled and
1088 # extraction failed with an error, don't crash and return early
1093 force_properties
= dict(
1094 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
1095 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1096 if f
in force_properties
:
1097 del force_properties
[f
]
1098 new_result
= info
.copy()
1099 new_result
.update(force_properties
)
1101 # Extracted info may not be a video result (i.e.
1102 # info.get('_type', 'video') != video) but rather an url or
1103 # url_transparent. In such cases outer metadata (from ie_result)
1104 # should be propagated to inner one (info). For this to happen
1105 # _type of info should be overridden with url_transparent. This
1106 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1107 if new_result
.get('_type') == 'url':
1108 new_result
['_type'] = 'url_transparent'
1110 return self
.process_ie_result(
1111 new_result
, download
=download
, extra_info
=extra_info
)
1112 elif result_type
in ('playlist', 'multi_video'):
1113 # Protect from infinite recursion due to recursively nested playlists
1114 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1115 webpage_url
= ie_result
['webpage_url']
1116 if webpage_url
in self
._playlist
_urls
:
1118 '[download] Skipping already downloaded playlist: %s'
1119 % ie_result
.get('title') or ie_result
.get('id'))
1122 self
._playlist
_level
+= 1
1123 self
._playlist
_urls
.add(webpage_url
)
1125 return self
.__process
_playlist
(ie_result
, download
)
1127 self
._playlist
_level
-= 1
1128 if not self
._playlist
_level
:
1129 self
._playlist
_urls
.clear()
1130 elif result_type
== 'compat_list':
1131 self
.report_warning(
1132 'Extractor %s returned a compat_list result. '
1133 'It needs to be updated.' % ie_result
.get('extractor'))
1136 self
.add_extra_info(
1139 'extractor': ie_result
['extractor'],
1140 'webpage_url': ie_result
['webpage_url'],
1141 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1142 'extractor_key': ie_result
['extractor_key'],
1146 ie_result
['entries'] = [
1147 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1148 for r
in ie_result
['entries']
1152 raise Exception('Invalid result type: %s' % result_type
)
1154 def __process_playlist(self
, ie_result
, download
):
1155 # We process each entry in the playlist
1156 playlist
= ie_result
.get('title') or ie_result
.get('id')
1157 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1159 if self
.params
.get('allow_playlist_files', True):
1161 'playlist': playlist
,
1162 'playlist_id': ie_result
.get('id'),
1163 'playlist_title': ie_result
.get('title'),
1164 'playlist_uploader': ie_result
.get('uploader'),
1165 'playlist_uploader_id': ie_result
.get('uploader_id'),
1168 ie_copy
.update(dict(ie_result
))
1170 def ensure_dir_exists(path
):
1171 return make_dir(path
, self
.report_error
)
1173 if self
.params
.get('writeinfojson', False):
1174 infofn
= self
.prepare_filename(ie_copy
, 'pl_infojson')
1175 if not ensure_dir_exists(encodeFilename(infofn
)):
1177 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
1178 self
.to_screen('[info] Playlist metadata is already present')
1180 playlist_info
= dict(ie_result
)
1181 # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
1182 del playlist_info
['entries']
1183 self
.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn
)
1185 write_json_file(self
.filter_requested_info(playlist_info
), infofn
)
1186 except (OSError, IOError):
1187 self
.report_error('Cannot write playlist metadata to JSON file ' + infofn
)
1189 if self
.params
.get('writedescription', False):
1190 descfn
= self
.prepare_filename(ie_copy
, 'pl_description')
1191 if not ensure_dir_exists(encodeFilename(descfn
)):
1193 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
1194 self
.to_screen('[info] Playlist description is already present')
1195 elif ie_result
.get('description') is None:
1196 self
.report_warning('There\'s no playlist description to write.')
1199 self
.to_screen('[info] Writing playlist description to: ' + descfn
)
1200 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1201 descfile
.write(ie_result
['description'])
1202 except (OSError, IOError):
1203 self
.report_error('Cannot write playlist description file ' + descfn
)
1206 playlist_results
= []
1208 playliststart
= self
.params
.get('playliststart', 1) - 1
1209 playlistend
= self
.params
.get('playlistend')
1210 # For backwards compatibility, interpret -1 as whole list
1211 if playlistend
== -1:
1214 playlistitems_str
= self
.params
.get('playlist_items')
1215 playlistitems
= None
1216 if playlistitems_str
is not None:
1217 def iter_playlistitems(format
):
1218 for string_segment
in format
.split(','):
1219 if '-' in string_segment
:
1220 start
, end
= string_segment
.split('-')
1221 for item
in range(int(start
), int(end
) + 1):
1224 yield int(string_segment
)
1225 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1227 ie_entries
= ie_result
['entries']
1229 def make_playlistitems_entries(list_ie_entries
):
1230 num_entries
= len(list_ie_entries
)
1232 list_ie_entries
[i
- 1] for i
in playlistitems
1233 if -num_entries
<= i
- 1 < num_entries
]
1235 def report_download(num_entries
):
1237 '[%s] playlist %s: Downloading %d videos' %
1238 (ie_result
['extractor'], playlist
, num_entries
))
1240 if isinstance(ie_entries
, list):
1241 n_all_entries
= len(ie_entries
)
1243 entries
= make_playlistitems_entries(ie_entries
)
1245 entries
= ie_entries
[playliststart
:playlistend
]
1246 n_entries
= len(entries
)
1248 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1249 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
1250 elif isinstance(ie_entries
, PagedList
):
1253 for item
in playlistitems
:
1254 entries
.extend(ie_entries
.getslice(
1258 entries
= ie_entries
.getslice(
1259 playliststart
, playlistend
)
1260 n_entries
= len(entries
)
1261 report_download(n_entries
)
1264 entries
= make_playlistitems_entries(list(itertools
.islice(
1265 ie_entries
, 0, max(playlistitems
))))
1267 entries
= list(itertools
.islice(
1268 ie_entries
, playliststart
, playlistend
))
1269 n_entries
= len(entries
)
1270 report_download(n_entries
)
1272 if self
.params
.get('playlistreverse', False):
1273 entries
= entries
[::-1]
1275 if self
.params
.get('playlistrandom', False):
1276 random
.shuffle(entries
)
1278 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1280 for i
, entry
in enumerate(entries
, 1):
1281 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1282 # This __x_forwarded_for_ip thing is a bit ugly but requires
1285 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1287 'n_entries': n_entries
,
1288 'playlist': playlist
,
1289 'playlist_id': ie_result
.get('id'),
1290 'playlist_title': ie_result
.get('title'),
1291 'playlist_uploader': ie_result
.get('uploader'),
1292 'playlist_uploader_id': ie_result
.get('uploader_id'),
1293 'playlist_index': playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
,
1294 'extractor': ie_result
['extractor'],
1295 'webpage_url': ie_result
['webpage_url'],
1296 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1297 'extractor_key': ie_result
['extractor_key'],
1300 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1303 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1304 # TODO: skip failed (empty) entries?
1305 playlist_results
.append(entry_result
)
1306 ie_result
['entries'] = playlist_results
1307 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1310 @__handle_extraction_exceptions
1311 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1312 return self
.process_ie_result(
1313 entry
, download
=download
, extra_info
=extra_info
)
1315 def _build_format_filter(self
, filter_spec
):
1316 " Returns a function to filter the formats according to the filter_spec "
1326 operator_rex
= re
.compile(r
'''(?x)\s*
1327 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1328 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1329 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1331 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1332 m
= operator_rex
.search(filter_spec
)
1335 comparison_value
= int(m
.group('value'))
1337 comparison_value
= parse_filesize(m
.group('value'))
1338 if comparison_value
is None:
1339 comparison_value
= parse_filesize(m
.group('value') + 'B')
1340 if comparison_value
is None:
1342 'Invalid value %r in format specification %r' % (
1343 m
.group('value'), filter_spec
))
1344 op
= OPERATORS
[m
.group('op')]
1349 '^=': lambda attr
, value
: attr
.startswith(value
),
1350 '$=': lambda attr
, value
: attr
.endswith(value
),
1351 '*=': lambda attr
, value
: value
in attr
,
1353 str_operator_rex
= re
.compile(r
'''(?x)
1354 \s*(?P<key>[a-zA-Z0-9._-]+)
1355 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1356 \s*(?P<value>[a-zA-Z0-9._-]+)
1358 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1359 m
= str_operator_rex
.search(filter_spec
)
1361 comparison_value
= m
.group('value')
1362 str_op
= STR_OPERATORS
[m
.group('op')]
1363 if m
.group('negation'):
1364 op
= lambda attr
, value
: not str_op(attr
, value
)
1369 raise ValueError('Invalid filter specification %r' % filter_spec
)
1372 actual_value
= f
.get(m
.group('key'))
1373 if actual_value
is None:
1374 return m
.group('none_inclusive')
1375 return op(actual_value
, comparison_value
)
1378 def _default_format_spec(self
, info_dict
, download
=True):
1381 merger
= FFmpegMergerPP(self
)
1382 return merger
.available
and merger
.can_merge()
1385 not self
.params
.get('simulate', False)
1389 or info_dict
.get('is_live', False)
1390 or self
.outtmpl_dict
['default'] == '-'))
1393 'best/bestvideo+bestaudio'
1395 else 'bestvideo*+bestaudio/best'
1396 if not self
.params
.get('allow_multiple_audio_streams', False)
1397 else 'bestvideo+bestaudio/best')
1399 def build_format_selector(self
, format_spec
):
1400 def syntax_error(note
, start
):
1402 'Invalid format specification: '
1403 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1404 return SyntaxError(message
)
1406 PICKFIRST
= 'PICKFIRST'
1410 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1412 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1413 'video': self
.params
.get('allow_multiple_video_streams', False)}
1415 def _parse_filter(tokens
):
1417 for type, string
, start
, _
, _
in tokens
:
1418 if type == tokenize
.OP
and string
== ']':
1419 return ''.join(filter_parts
)
1421 filter_parts
.append(string
)
1423 def _remove_unused_ops(tokens
):
1424 # Remove operators that we don't use and join them with the surrounding strings
1425 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1426 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1427 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1428 for type, string
, start
, end
, line
in tokens
:
1429 if type == tokenize
.OP
and string
== '[':
1431 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1433 yield type, string
, start
, end
, line
1434 # everything inside brackets will be handled by _parse_filter
1435 for type, string
, start
, end
, line
in tokens
:
1436 yield type, string
, start
, end
, line
1437 if type == tokenize
.OP
and string
== ']':
1439 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1441 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1443 yield type, string
, start
, end
, line
1444 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1446 last_string
= string
1450 last_string
+= string
1452 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1454 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1456 current_selector
= None
1457 for type, string
, start
, _
, _
in tokens
:
1458 # ENCODING is only defined in python 3.x
1459 if type == getattr(tokenize
, 'ENCODING', None):
1461 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1462 current_selector
= FormatSelector(SINGLE
, string
, [])
1463 elif type == tokenize
.OP
:
1465 if not inside_group
:
1466 # ')' will be handled by the parentheses group
1467 tokens
.restore_last_token()
1469 elif inside_merge
and string
in ['/', ',']:
1470 tokens
.restore_last_token()
1472 elif inside_choice
and string
== ',':
1473 tokens
.restore_last_token()
1476 if not current_selector
:
1477 raise syntax_error('"," must follow a format selector', start
)
1478 selectors
.append(current_selector
)
1479 current_selector
= None
1481 if not current_selector
:
1482 raise syntax_error('"/" must follow a format selector', start
)
1483 first_choice
= current_selector
1484 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1485 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1487 if not current_selector
:
1488 current_selector
= FormatSelector(SINGLE
, 'best', [])
1489 format_filter
= _parse_filter(tokens
)
1490 current_selector
.filters
.append(format_filter
)
1492 if current_selector
:
1493 raise syntax_error('Unexpected "("', start
)
1494 group
= _parse_format_selection(tokens
, inside_group
=True)
1495 current_selector
= FormatSelector(GROUP
, group
, [])
1497 if not current_selector
:
1498 raise syntax_error('Unexpected "+"', start
)
1499 selector_1
= current_selector
1500 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1502 raise syntax_error('Expected a selector', start
)
1503 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1505 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1506 elif type == tokenize
.ENDMARKER
:
1508 if current_selector
:
1509 selectors
.append(current_selector
)
1512 def _build_selector_function(selector
):
1513 if isinstance(selector
, list): # ,
1514 fs
= [_build_selector_function(s
) for s
in selector
]
1516 def selector_function(ctx
):
1518 for format
in f(ctx
):
1520 return selector_function
1522 elif selector
.type == GROUP
: # ()
1523 selector_function
= _build_selector_function(selector
.selector
)
1525 elif selector
.type == PICKFIRST
: # /
1526 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1528 def selector_function(ctx
):
1530 picked_formats
= list(f(ctx
))
1532 return picked_formats
1535 elif selector
.type == SINGLE
: # atom
1536 format_spec
= selector
.selector
if selector
.selector
is not None else 'best'
1538 if format_spec
== 'all':
1539 def selector_function(ctx
):
1540 formats
= list(ctx
['formats'])
1546 format_fallback
= False
1547 format_spec_obj
= re
.match(r
'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec
)
1548 if format_spec_obj
is not None:
1549 format_idx
= 0 if format_spec_obj
.group(1)[0] == 'w' else -1
1550 format_type
= format_spec_obj
.group(2)[0] if format_spec_obj
.group(2) else False
1551 not_format_type
= 'v' if format_type
== 'a' else 'a'
1552 format_modified
= format_spec_obj
.group(3) is not None
1554 format_fallback
= not format_type
and not format_modified
# for b, w
1555 filter_f
= ((lambda f
: f
.get(format_type
+ 'codec') != 'none')
1556 if format_type
and format_modified
# bv*, ba*, wv*, wa*
1557 else (lambda f
: f
.get(not_format_type
+ 'codec') == 'none')
1558 if format_type
# bv, ba, wv, wa
1559 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
1560 if not format_modified
# b, w
1564 filter_f
= ((lambda f
: f
.get('ext') == format_spec
)
1565 if format_spec
in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1566 else (lambda f
: f
.get('format_id') == format_spec
)) # id
1568 def selector_function(ctx
):
1569 formats
= list(ctx
['formats'])
1572 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
1574 yield matches
[format_idx
]
1575 elif format_fallback
== 'force' or (format_fallback
and ctx
['incomplete_formats']):
1576 # for extractors with incomplete formats (audio only (soundcloud)
1577 # or video only (imgur)) best/worst will fallback to
1578 # best/worst {video,audio}-only format
1579 yield formats
[format_idx
]
1581 elif selector
.type == MERGE
: # +
1582 def _merge(formats_pair
):
1583 format_1
, format_2
= formats_pair
1586 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1587 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1589 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
1590 get_no_more
= {"video": False, "audio": False}
1591 for (i
, fmt_info
) in enumerate(formats_info
):
1592 for aud_vid
in ["audio", "video"]:
1593 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
1594 if get_no_more
[aud_vid
]:
1596 get_no_more
[aud_vid
] = True
1598 if len(formats_info
) == 1:
1599 return formats_info
[0]
1601 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1602 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1604 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1605 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1607 output_ext
= self
.params
.get('merge_output_format')
1610 output_ext
= the_only_video
['ext']
1611 elif the_only_audio
and not video_fmts
:
1612 output_ext
= the_only_audio
['ext']
1617 'requested_formats': formats_info
,
1618 'format': '+'.join(fmt_info
.get('format') for fmt_info
in formats_info
),
1619 'format_id': '+'.join(fmt_info
.get('format_id') for fmt_info
in formats_info
),
1625 'width': the_only_video
.get('width'),
1626 'height': the_only_video
.get('height'),
1627 'resolution': the_only_video
.get('resolution'),
1628 'fps': the_only_video
.get('fps'),
1629 'vcodec': the_only_video
.get('vcodec'),
1630 'vbr': the_only_video
.get('vbr'),
1631 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1636 'acodec': the_only_audio
.get('acodec'),
1637 'abr': the_only_audio
.get('abr'),
1642 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1644 def selector_function(ctx
):
1645 for pair
in itertools
.product(
1646 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1649 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1651 def final_selector(ctx
):
1652 ctx_copy
= copy
.deepcopy(ctx
)
1653 for _filter
in filters
:
1654 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
1655 return selector_function(ctx_copy
)
1656 return final_selector
1658 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1660 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1661 except tokenize
.TokenError
:
1662 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1664 class TokenIterator(object):
1665 def __init__(self
, tokens
):
1666 self
.tokens
= tokens
1673 if self
.counter
>= len(self
.tokens
):
1674 raise StopIteration()
1675 value
= self
.tokens
[self
.counter
]
1681 def restore_last_token(self
):
1684 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1685 return _build_selector_function(parsed_selector
)
1687 def _calc_headers(self
, info_dict
):
1688 res
= std_headers
.copy()
1690 add_headers
= info_dict
.get('http_headers')
1692 res
.update(add_headers
)
1694 cookies
= self
._calc
_cookies
(info_dict
)
1696 res
['Cookie'] = cookies
1698 if 'X-Forwarded-For' not in res
:
1699 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
1700 if x_forwarded_for_ip
:
1701 res
['X-Forwarded-For'] = x_forwarded_for_ip
1705 def _calc_cookies(self
, info_dict
):
1706 pr
= sanitized_Request(info_dict
['url'])
1707 self
.cookiejar
.add_cookie_header(pr
)
1708 return pr
.get_header('Cookie')
1710 def process_video_result(self
, info_dict
, download
=True):
1711 assert info_dict
.get('_type', 'video') == 'video'
1713 if 'id' not in info_dict
:
1714 raise ExtractorError('Missing "id" field in extractor result')
1715 if 'title' not in info_dict
:
1716 raise ExtractorError('Missing "title" field in extractor result')
1718 def report_force_conversion(field
, field_not
, conversion
):
1719 self
.report_warning(
1720 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1721 % (field
, field_not
, conversion
))
1723 def sanitize_string_field(info
, string_field
):
1724 field
= info
.get(string_field
)
1725 if field
is None or isinstance(field
, compat_str
):
1727 report_force_conversion(string_field
, 'a string', 'string')
1728 info
[string_field
] = compat_str(field
)
1730 def sanitize_numeric_fields(info
):
1731 for numeric_field
in self
._NUMERIC
_FIELDS
:
1732 field
= info
.get(numeric_field
)
1733 if field
is None or isinstance(field
, compat_numeric_types
):
1735 report_force_conversion(numeric_field
, 'numeric', 'int')
1736 info
[numeric_field
] = int_or_none(field
)
1738 sanitize_string_field(info_dict
, 'id')
1739 sanitize_numeric_fields(info_dict
)
1741 if 'playlist' not in info_dict
:
1742 # It isn't part of a playlist
1743 info_dict
['playlist'] = None
1744 info_dict
['playlist_index'] = None
1746 thumbnails
= info_dict
.get('thumbnails')
1747 if thumbnails
is None:
1748 thumbnail
= info_dict
.get('thumbnail')
1750 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
1752 thumbnails
.sort(key
=lambda t
: (
1753 t
.get('preference') if t
.get('preference') is not None else -1,
1754 t
.get('width') if t
.get('width') is not None else -1,
1755 t
.get('height') if t
.get('height') is not None else -1,
1756 t
.get('id') if t
.get('id') is not None else '', t
.get('url')))
1757 for i
, t
in enumerate(thumbnails
):
1758 t
['url'] = sanitize_url(t
['url'])
1759 if t
.get('width') and t
.get('height'):
1760 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
1761 if t
.get('id') is None:
1764 if self
.params
.get('list_thumbnails'):
1765 self
.list_thumbnails(info_dict
)
1768 thumbnail
= info_dict
.get('thumbnail')
1770 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
1772 info_dict
['thumbnail'] = thumbnails
[-1]['url']
1774 if 'display_id' not in info_dict
and 'id' in info_dict
:
1775 info_dict
['display_id'] = info_dict
['id']
1777 if info_dict
.get('upload_date') is None and info_dict
.get('timestamp') is not None:
1778 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1779 # see http://bugs.python.org/issue1646728)
1781 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
['timestamp'])
1782 info_dict
['upload_date'] = upload_date
.strftime('%Y%m%d')
1783 except (ValueError, OverflowError, OSError):
1786 # Auto generate title fields corresponding to the *_number fields when missing
1787 # in order to always have clean titles. This is very common for TV series.
1788 for field
in ('chapter', 'season', 'episode'):
1789 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
1790 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
1792 for cc_kind
in ('subtitles', 'automatic_captions'):
1793 cc
= info_dict
.get(cc_kind
)
1795 for _
, subtitle
in cc
.items():
1796 for subtitle_format
in subtitle
:
1797 if subtitle_format
.get('url'):
1798 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
1799 if subtitle_format
.get('ext') is None:
1800 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
1802 automatic_captions
= info_dict
.get('automatic_captions')
1803 subtitles
= info_dict
.get('subtitles')
1805 if self
.params
.get('listsubtitles', False):
1806 if 'automatic_captions' in info_dict
:
1807 self
.list_subtitles(
1808 info_dict
['id'], automatic_captions
, 'automatic captions')
1809 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
1812 info_dict
['requested_subtitles'] = self
.process_subtitles(
1813 info_dict
['id'], subtitles
, automatic_captions
)
1815 # We now pick which formats have to be downloaded
1816 if info_dict
.get('formats') is None:
1817 # There's only one format available
1818 formats
= [info_dict
]
1820 formats
= info_dict
['formats']
1823 raise ExtractorError('No video formats found!')
1825 def is_wellformed(f
):
1828 self
.report_warning(
1829 '"url" field is missing or empty - skipping format, '
1830 'there is an error in extractor')
1832 if isinstance(url
, bytes):
1833 sanitize_string_field(f
, 'url')
1836 # Filter out malformed formats for better extraction robustness
1837 formats
= list(filter(is_wellformed
, formats
))
1841 # We check that all the formats have the format and format_id fields
1842 for i
, format
in enumerate(formats
):
1843 sanitize_string_field(format
, 'format_id')
1844 sanitize_numeric_fields(format
)
1845 format
['url'] = sanitize_url(format
['url'])
1846 if not format
.get('format_id'):
1847 format
['format_id'] = compat_str(i
)
1849 # Sanitize format_id from characters used in format selector expression
1850 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
1851 format_id
= format
['format_id']
1852 if format_id
not in formats_dict
:
1853 formats_dict
[format_id
] = []
1854 formats_dict
[format_id
].append(format
)
1856 # Make sure all formats have unique format_id
1857 for format_id
, ambiguous_formats
in formats_dict
.items():
1858 if len(ambiguous_formats
) > 1:
1859 for i
, format
in enumerate(ambiguous_formats
):
1860 format
['format_id'] = '%s-%d' % (format_id
, i
)
1862 for i
, format
in enumerate(formats
):
1863 if format
.get('format') is None:
1864 format
['format'] = '{id} - {res}{note}'.format(
1865 id=format
['format_id'],
1866 res
=self
.format_resolution(format
),
1867 note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
1869 # Automatically determine file extension if missing
1870 if format
.get('ext') is None:
1871 format
['ext'] = determine_ext(format
['url']).lower()
1872 # Automatically determine protocol if missing (useful for format
1873 # selection purposes)
1874 if format
.get('protocol') is None:
1875 format
['protocol'] = determine_protocol(format
)
1876 # Add HTTP headers, so that external programs can use them from the
1878 full_format_info
= info_dict
.copy()
1879 full_format_info
.update(format
)
1880 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
1881 # Remove private housekeeping stuff
1882 if '__x_forwarded_for_ip' in info_dict
:
1883 del info_dict
['__x_forwarded_for_ip']
1885 # TODO Central sorting goes here
1887 if formats
[0] is not info_dict
:
1888 # only set the 'formats' fields if the original info_dict list them
1889 # otherwise we end up with a circular reference, the first (and unique)
1890 # element in the 'formats' field in info_dict is info_dict itself,
1891 # which can't be exported to json
1892 info_dict
['formats'] = formats
1893 if self
.params
.get('listformats'):
1894 self
.list_formats(info_dict
)
1897 req_format
= self
.params
.get('format')
1898 if req_format
is None:
1899 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
1900 if self
.params
.get('verbose'):
1901 self
.to_screen('[debug] Default format spec: %s' % req_format
)
1903 format_selector
= self
.build_format_selector(req_format
)
1905 # While in format selection we may need to have an access to the original
1906 # format set in order to calculate some metrics or do some processing.
1907 # For now we need to be able to guess whether original formats provided
1908 # by extractor are incomplete or not (i.e. whether extractor provides only
1909 # video-only or audio-only formats) for proper formats selection for
1910 # extractors with such incomplete formats (see
1911 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1912 # Since formats may be filtered during format selection and may not match
1913 # the original formats the results may be incorrect. Thus original formats
1914 # or pre-calculated metrics should be passed to format selection routines
1916 # We will pass a context object containing all necessary additional data
1917 # instead of just formats.
1918 # This fixes incorrect format selection issue (see
1919 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1920 incomplete_formats
= (
1921 # All formats are video-only or
1922 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
1923 # all formats are audio-only
1924 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
1928 'incomplete_formats': incomplete_formats
,
1931 formats_to_download
= list(format_selector(ctx
))
1932 if not formats_to_download
:
1933 raise ExtractorError('requested format not available',
1937 self
.to_screen('[info] Downloading format(s) %s' % ", ".join([f
['format_id'] for f
in formats_to_download
]))
1938 if len(formats_to_download
) > 1:
1939 self
.to_screen('[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
)))
1940 for format
in formats_to_download
:
1941 new_info
= dict(info_dict
)
1942 new_info
.update(format
)
1943 self
.process_info(new_info
)
1944 # We update the info dict with the best quality format (backwards compatibility)
1945 info_dict
.update(formats_to_download
[-1])
1948 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
1949 """Select the requested subtitles and their format"""
1951 if normal_subtitles
and self
.params
.get('writesubtitles'):
1952 available_subs
.update(normal_subtitles
)
1953 if automatic_captions
and self
.params
.get('writeautomaticsub'):
1954 for lang
, cap_info
in automatic_captions
.items():
1955 if lang
not in available_subs
:
1956 available_subs
[lang
] = cap_info
1958 if (not self
.params
.get('writesubtitles') and not
1959 self
.params
.get('writeautomaticsub') or not
1963 if self
.params
.get('allsubtitles', False):
1964 requested_langs
= available_subs
.keys()
1966 if self
.params
.get('subtitleslangs', False):
1967 requested_langs
= self
.params
.get('subtitleslangs')
1968 elif 'en' in available_subs
:
1969 requested_langs
= ['en']
1971 requested_langs
= [list(available_subs
.keys())[0]]
1973 formats_query
= self
.params
.get('subtitlesformat', 'best')
1974 formats_preference
= formats_query
.split('/') if formats_query
else []
1976 for lang
in requested_langs
:
1977 formats
= available_subs
.get(lang
)
1979 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
1981 for ext
in formats_preference
:
1985 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
1991 self
.report_warning(
1992 'No subtitle format found matching "%s" for language %s, '
1993 'using %s' % (formats_query
, lang
, f
['ext']))
1997 def __forced_printings(self
, info_dict
, filename
, incomplete
):
1998 def print_mandatory(field
):
1999 if (self
.params
.get('force%s' % field
, False)
2000 and (not incomplete
or info_dict
.get(field
) is not None)):
2001 self
.to_stdout(info_dict
[field
])
2003 def print_optional(field
):
2004 if (self
.params
.get('force%s' % field
, False)
2005 and info_dict
.get(field
) is not None):
2006 self
.to_stdout(info_dict
[field
])
2008 print_mandatory('title')
2009 print_mandatory('id')
2010 if self
.params
.get('forceurl', False) and not incomplete
:
2011 if info_dict
.get('requested_formats') is not None:
2012 for f
in info_dict
['requested_formats']:
2013 self
.to_stdout(f
['url'] + f
.get('play_path', ''))
2015 # For RTMP URLs, also include the playpath
2016 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', ''))
2017 print_optional('thumbnail')
2018 print_optional('description')
2019 if self
.params
.get('forcefilename', False) and filename
is not None:
2020 self
.to_stdout(filename
)
2021 if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None:
2022 self
.to_stdout(formatSeconds(info_dict
['duration']))
2023 print_mandatory('format')
2024 if self
.params
.get('forcejson', False):
2025 self
.to_stdout(json
.dumps(info_dict
))
2027 def process_info(self
, info_dict
):
2028 """Process a single resolved IE result."""
2030 assert info_dict
.get('_type', 'video') == 'video'
2032 info_dict
.setdefault('__postprocessors', [])
2034 max_downloads
= self
.params
.get('max_downloads')
2035 if max_downloads
is not None:
2036 if self
._num
_downloads
>= int(max_downloads
):
2037 raise MaxDownloadsReached()
2039 # TODO: backward compatibility, to be removed
2040 info_dict
['fulltitle'] = info_dict
['title']
2042 if 'format' not in info_dict
:
2043 info_dict
['format'] = info_dict
['ext']
2045 if self
._match
_entry
(info_dict
, incomplete
=False) is not None:
2048 self
._num
_downloads
+= 1
2050 info_dict
= self
.pre_process(info_dict
)
2052 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2053 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2055 skip_dl
= self
.params
.get('skip_download', False)
2058 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=False)
2060 if self
.params
.get('simulate', False):
2061 if self
.params
.get('force_write_download_archive', False):
2062 self
.record_download_archive(info_dict
)
2064 # Do nothing else if in simulate mode
2067 if full_filename
is None:
2070 def ensure_dir_exists(path
):
2071 return make_dir(path
, self
.report_error
)
2073 if not ensure_dir_exists(encodeFilename(full_filename
)):
2075 if not ensure_dir_exists(encodeFilename(temp_filename
)):
2078 if self
.params
.get('writedescription', False):
2079 descfn
= self
.prepare_filename(info_dict
, 'description')
2080 if not ensure_dir_exists(encodeFilename(descfn
)):
2082 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
2083 self
.to_screen('[info] Video description is already present')
2084 elif info_dict
.get('description') is None:
2085 self
.report_warning('There\'s no description to write.')
2088 self
.to_screen('[info] Writing video description to: ' + descfn
)
2089 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
2090 descfile
.write(info_dict
['description'])
2091 except (OSError, IOError):
2092 self
.report_error('Cannot write description file ' + descfn
)
2095 if self
.params
.get('writeannotations', False):
2096 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2097 if not ensure_dir_exists(encodeFilename(annofn
)):
2099 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2100 self
.to_screen('[info] Video annotations are already present')
2101 elif not info_dict
.get('annotations'):
2102 self
.report_warning('There are no annotations to write.')
2105 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2106 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2107 annofile
.write(info_dict
['annotations'])
2108 except (KeyError, TypeError):
2109 self
.report_warning('There are no annotations to write.')
2110 except (OSError, IOError):
2111 self
.report_error('Cannot write annotations file: ' + annofn
)
2114 def dl(name
, info
, subtitle
=False):
2115 fd
= get_suitable_downloader(info
, self
.params
)(self
, self
.params
)
2116 for ph
in self
._progress
_hooks
:
2117 fd
.add_progress_hook(ph
)
2118 if self
.params
.get('verbose'):
2119 self
.to_screen('[debug] Invoking downloader on %r' % info
.get('url'))
2120 return fd
.download(name
, info
, subtitle
)
2122 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
2123 self
.params
.get('writeautomaticsub')])
2125 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
2126 # subtitles download errors are already managed as troubles in relevant IE
2127 # that way it will silently go on when used with unsupporting IE
2128 subtitles
= info_dict
['requested_subtitles']
2129 # ie = self.get_info_extractor(info_dict['extractor_key'])
2130 for sub_lang
, sub_info
in subtitles
.items():
2131 sub_format
= sub_info
['ext']
2132 sub_fn
= self
.prepare_filename(info_dict
, 'subtitle')
2133 sub_filename
= subtitles_filename(
2134 temp_filename
if not skip_dl
else sub_fn
,
2135 sub_lang
, sub_format
, info_dict
.get('ext'))
2136 sub_filename_final
= subtitles_filename(sub_fn
, sub_lang
, sub_format
, info_dict
.get('ext'))
2137 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(sub_filename
)):
2138 self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
))
2139 files_to_move
[sub_filename
] = sub_filename_final
2141 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
2142 if sub_info
.get('data') is not None:
2144 # Use newline='' to prevent conversion of newline characters
2145 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2146 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
:
2147 subfile
.write(sub_info
['data'])
2148 files_to_move
[sub_filename
] = sub_filename_final
2149 except (OSError, IOError):
2150 self
.report_error('Cannot write subtitles file ' + sub_filename
)
2154 dl(sub_filename
, sub_info
, subtitle
=True)
2156 if self.params.get('sleep_interval_subtitles', False):
2157 dl(sub_filename, sub_info)
2159 sub_data = ie._request_webpage(
2160 sub_info['url'], info_dict['id'], note=False).read()
2161 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2162 subfile.write(sub_data)
2164 files_to_move
[sub_filename
] = sub_filename_final
2165 except (ExtractorError
, IOError, OSError, ValueError, compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2166 self
.report_warning('Unable to download subtitle for "%s": %s' %
2167 (sub_lang
, error_to_compat_str(err
)))
2171 if self
.params
.get('convertsubtitles', False):
2172 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2173 filename_real_ext
= os
.path
.splitext(full_filename
)[1][1:]
2175 os
.path
.splitext(full_filename
)[0]
2176 if filename_real_ext
== info_dict
['ext']
2178 afilename
= '%s.%s' % (filename_wo_ext
, self
.params
.get('convertsubtitles'))
2179 # if subconv.available:
2180 # info_dict['__postprocessors'].append(subconv)
2181 if os
.path
.exists(encodeFilename(afilename
)):
2183 '[download] %s has already been downloaded and '
2184 'converted' % afilename
)
2187 self
.post_process(full_filename
, info_dict
, files_to_move
)
2188 except PostProcessingError
as err
:
2189 self
.report_error('Postprocessing: %s' % str(err
))
2192 if self
.params
.get('writeinfojson', False):
2193 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2194 if not ensure_dir_exists(encodeFilename(infofn
)):
2196 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
2197 self
.to_screen('[info] Video metadata is already present')
2199 self
.to_screen('[info] Writing video metadata as JSON to: ' + infofn
)
2201 write_json_file(self
.filter_requested_info(info_dict
), infofn
)
2202 except (OSError, IOError):
2203 self
.report_error('Cannot write video metadata to JSON file ' + infofn
)
2205 info_dict
['__infojson_filename'] = infofn
2207 thumbfn
= self
.prepare_filename(info_dict
, 'thumbnail')
2208 thumb_fn_temp
= temp_filename
if not skip_dl
else thumbfn
2209 for thumb_ext
in self
._write
_thumbnails
(info_dict
, thumb_fn_temp
):
2210 thumb_filename_temp
= replace_extension(thumb_fn_temp
, thumb_ext
, info_dict
.get('ext'))
2211 thumb_filename
= replace_extension(thumbfn
, thumb_ext
, info_dict
.get('ext'))
2212 files_to_move
[thumb_filename_temp
] = info_dict
['__thumbnail_filename'] = thumb_filename
2214 # Write internet shortcut files
2215 url_link
= webloc_link
= desktop_link
= False
2216 if self
.params
.get('writelink', False):
2217 if sys
.platform
== "darwin": # macOS.
2219 elif sys
.platform
.startswith("linux"):
2221 else: # if sys.platform in ['win32', 'cygwin']:
2223 if self
.params
.get('writeurllink', False):
2225 if self
.params
.get('writewebloclink', False):
2227 if self
.params
.get('writedesktoplink', False):
2230 if url_link
or webloc_link
or desktop_link
:
2231 if 'webpage_url' not in info_dict
:
2232 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2234 ascii_url
= iri_to_uri(info_dict
['webpage_url'])
2236 def _write_link_file(extension
, template
, newline
, embed_filename
):
2237 linkfn
= replace_extension(full_filename
, extension
, info_dict
.get('ext'))
2238 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2239 self
.to_screen('[info] Internet shortcut is already present')
2242 self
.to_screen('[info] Writing internet shortcut to: ' + linkfn
)
2243 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8', newline
=newline
) as linkfile
:
2244 template_vars
= {'url': ascii_url}
2246 template_vars
['filename'] = linkfn
[:-(len(extension
) + 1)]
2247 linkfile
.write(template
% template_vars
)
2248 except (OSError, IOError):
2249 self
.report_error('Cannot write internet shortcut ' + linkfn
)
2254 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE
, '\r\n', embed_filename
=False):
2257 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE
, '\n', embed_filename
=False):
2260 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE
, '\n', embed_filename
=True):
2264 must_record_download_archive
= False
2268 def existing_file(*filepaths
):
2269 ext
= info_dict
.get('ext')
2270 final_ext
= self
.params
.get('final_ext', ext
)
2272 for file in orderedSet(filepaths
):
2273 if final_ext
!= ext
:
2274 converted
= replace_extension(file, final_ext
, ext
)
2275 if os
.path
.exists(encodeFilename(converted
)):
2276 existing_files
.append(converted
)
2277 if os
.path
.exists(encodeFilename(file)):
2278 existing_files
.append(file)
2280 if not existing_files
or self
.params
.get('overwrites', False):
2281 for file in orderedSet(existing_files
):
2282 self
.report_file_delete(file)
2283 os
.remove(encodeFilename(file))
2286 self
.report_file_already_downloaded(existing_files
[0])
2287 info_dict
['ext'] = os
.path
.splitext(existing_files
[0])[1][1:]
2288 return existing_files
[0]
2291 if info_dict
.get('requested_formats') is not None:
2293 merger
= FFmpegMergerPP(self
)
2294 if not merger
.available
:
2295 self
.report_warning('You have requested multiple '
2296 'formats but ffmpeg is not installed.'
2297 ' The formats won\'t be merged.')
2299 def compatible_formats(formats
):
2300 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2301 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2302 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2303 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2307 exts
= set(format
.get('ext') for format
in formats
)
2309 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2312 for ext_sets
in COMPATIBLE_EXTS
:
2313 if ext_sets
.issuperset(exts
):
2315 # TODO: Check acodec/vcodec
2318 requested_formats
= info_dict
['requested_formats']
2319 old_ext
= info_dict
['ext']
2320 if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
):
2321 info_dict
['ext'] = 'mkv'
2322 self
.report_warning(
2323 'Requested formats are incompatible for merge and will be merged into mkv.')
2325 def correct_ext(filename
):
2326 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2328 os
.path
.splitext(filename
)[0]
2329 if filename_real_ext
== old_ext
2331 return '%s.%s' % (filename_wo_ext
, info_dict
['ext'])
2333 # Ensure filename always has a correct extension for successful merge
2334 full_filename
= correct_ext(full_filename
)
2335 temp_filename
= correct_ext(temp_filename
)
2336 dl_filename
= existing_file(full_filename
, temp_filename
)
2337 if dl_filename
is None:
2338 for f
in requested_formats
:
2339 new_info
= dict(info_dict
)
2341 fname
= prepend_extension(
2342 self
.prepare_filename(new_info
, 'temp'),
2343 'f%s' % f
['format_id'], new_info
['ext'])
2344 if not ensure_dir_exists(fname
):
2346 downloaded
.append(fname
)
2347 partial_success
, real_download
= dl(fname
, new_info
)
2348 success
= success
and partial_success
2349 if merger
.available
:
2350 info_dict
['__postprocessors'].append(merger
)
2351 info_dict
['__files_to_merge'] = downloaded
2352 # Even if there were no downloads, it is being merged only now
2353 info_dict
['__real_download'] = True
2355 # Just a single file
2356 dl_filename
= existing_file(full_filename
, temp_filename
)
2357 if dl_filename
is None:
2358 success
, real_download
= dl(temp_filename
, info_dict
)
2359 info_dict
['__real_download'] = real_download
2361 dl_filename
= dl_filename
or temp_filename
2362 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2364 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2365 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2367 except (OSError, IOError) as err
:
2368 raise UnavailableVideoError(err
)
2369 except (ContentTooShortError
, ) as err
:
2370 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2373 if success
and full_filename
!= '-':
2375 fixup_policy
= self
.params
.get('fixup')
2376 if fixup_policy
is None:
2377 fixup_policy
= 'detect_or_warn'
2379 INSTALL_FFMPEG_MESSAGE
= 'Install ffmpeg to fix this automatically.'
2381 stretched_ratio
= info_dict
.get('stretched_ratio')
2382 if stretched_ratio
is not None and stretched_ratio
!= 1:
2383 if fixup_policy
== 'warn':
2384 self
.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2385 info_dict
['id'], stretched_ratio
))
2386 elif fixup_policy
== 'detect_or_warn':
2387 stretched_pp
= FFmpegFixupStretchedPP(self
)
2388 if stretched_pp
.available
:
2389 info_dict
['__postprocessors'].append(stretched_pp
)
2391 self
.report_warning(
2392 '%s: Non-uniform pixel ratio (%s). %s'
2393 % (info_dict
['id'], stretched_ratio
, INSTALL_FFMPEG_MESSAGE
))
2395 assert fixup_policy
in ('ignore', 'never')
2397 if (info_dict
.get('requested_formats') is None
2398 and info_dict
.get('container') == 'm4a_dash'
2399 and info_dict
.get('ext') == 'm4a'):
2400 if fixup_policy
== 'warn':
2401 self
.report_warning(
2402 '%s: writing DASH m4a. '
2403 'Only some players support this container.'
2405 elif fixup_policy
== 'detect_or_warn':
2406 fixup_pp
= FFmpegFixupM4aPP(self
)
2407 if fixup_pp
.available
:
2408 info_dict
['__postprocessors'].append(fixup_pp
)
2410 self
.report_warning(
2411 '%s: writing DASH m4a. '
2412 'Only some players support this container. %s'
2413 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2415 assert fixup_policy
in ('ignore', 'never')
2417 if (info_dict
.get('protocol') == 'm3u8_native'
2418 or info_dict
.get('protocol') == 'm3u8'
2419 and self
.params
.get('hls_prefer_native')):
2420 if fixup_policy
== 'warn':
2421 self
.report_warning('%s: malformed AAC bitstream detected.' % (
2423 elif fixup_policy
== 'detect_or_warn':
2424 fixup_pp
= FFmpegFixupM3u8PP(self
)
2425 if fixup_pp
.available
:
2426 info_dict
['__postprocessors'].append(fixup_pp
)
2428 self
.report_warning(
2429 '%s: malformed AAC bitstream detected. %s'
2430 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2432 assert fixup_policy
in ('ignore', 'never')
2435 self
.post_process(dl_filename
, info_dict
, files_to_move
)
2436 except PostProcessingError
as err
:
2437 self
.report_error('Postprocessing: %s' % str(err
))
2440 for ph
in self
._post
_hooks
:
2442 except Exception as err
:
2443 self
.report_error('post hooks: %s' % str(err
))
2445 must_record_download_archive
= True
2447 if must_record_download_archive
or self
.params
.get('force_write_download_archive', False):
2448 self
.record_download_archive(info_dict
)
2449 max_downloads
= self
.params
.get('max_downloads')
2450 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
2451 raise MaxDownloadsReached()
2453 def download(self
, url_list
):
2454 """Download a given list of URLs."""
2455 outtmpl
= self
.outtmpl_dict
['default']
2456 if (len(url_list
) > 1
2458 and '%' not in outtmpl
2459 and self
.params
.get('max_downloads') != 1):
2460 raise SameFileError(outtmpl
)
2462 for url
in url_list
:
2464 # It also downloads the videos
2465 res
= self
.extract_info(
2466 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2467 except UnavailableVideoError
:
2468 self
.report_error('unable to download video')
2469 except MaxDownloadsReached
:
2470 self
.to_screen('[info] Maximum number of downloaded files reached')
2472 except ExistingVideoReached
:
2473 self
.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2475 except RejectedVideoReached
:
2476 self
.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2479 if self
.params
.get('dump_single_json', False):
2480 self
.to_stdout(json
.dumps(res
))
2482 return self
._download
_retcode
2484 def download_with_info_file(self
, info_filename
):
2485 with contextlib
.closing(fileinput
.FileInput(
2486 [info_filename
], mode
='r',
2487 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2488 # FileInput doesn't have a read method, we can't call json.load
2489 info
= self
.filter_requested_info(json
.loads('\n'.join(f
)))
2491 self
.process_ie_result(info
, download
=True)
2492 except DownloadError
:
2493 webpage_url
= info
.get('webpage_url')
2494 if webpage_url
is not None:
2495 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2496 return self
.download([webpage_url
])
2499 return self
._download
_retcode
2502 def filter_requested_info(info_dict
):
2503 fields_to_remove
= ('requested_formats', 'requested_subtitles')
2505 (k
, v
) for k
, v
in info_dict
.items()
2506 if (k
[0] != '_' or k
== '_type') and k
not in fields_to_remove
)
2508 def run_pp(self
, pp
, infodict
, files_to_move
={}):
2509 files_to_delete
= []
2510 files_to_delete
, infodict
= pp
.run(infodict
)
2511 if not files_to_delete
:
2512 return files_to_move
, infodict
2514 if self
.params
.get('keepvideo', False):
2515 for f
in files_to_delete
:
2516 files_to_move
.setdefault(f
, '')
2518 for old_filename
in set(files_to_delete
):
2519 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
2521 os
.remove(encodeFilename(old_filename
))
2522 except (IOError, OSError):
2523 self
.report_warning('Unable to remove downloaded original file')
2524 if old_filename
in files_to_move
:
2525 del files_to_move
[old_filename
]
2526 return files_to_move
, infodict
2528 def pre_process(self
, ie_info
):
2529 info
= dict(ie_info
)
2530 for pp
in self
._pps
['beforedl']:
2531 info
= self
.run_pp(pp
, info
)[1]
2534 def post_process(self
, filename
, ie_info
, files_to_move
={}):
2535 """Run all the postprocessors on the given file."""
2536 info
= dict(ie_info
)
2537 info
['filepath'] = filename
2538 info
['__files_to_move'] = {}
2540 for pp
in ie_info
.get('__postprocessors', []) + self
._pps
['normal']:
2541 files_to_move
, info
= self
.run_pp(pp
, info
, files_to_move
)
2542 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
, files_to_move
), info
)[1]
2543 for pp
in self
._pps
['aftermove']:
2544 info
= self
.run_pp(pp
, info
, {})[1]
2546 def _make_archive_id(self
, info_dict
):
2547 video_id
= info_dict
.get('id')
2550 # Future-proof against any change in case
2551 # and backwards compatibility with prior versions
2552 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
2553 if extractor
is None:
2554 url
= str_or_none(info_dict
.get('url'))
2557 # Try to find matching extractor for the URL and take its ie_key
2558 for ie
in self
._ies
:
2559 if ie
.suitable(url
):
2560 extractor
= ie
.ie_key()
2564 return '%s %s' % (extractor
.lower(), video_id
)
2566 def in_download_archive(self
, info_dict
):
2567 fn
= self
.params
.get('download_archive')
2571 vid_id
= self
._make
_archive
_id
(info_dict
)
2573 return False # Incomplete video information
2575 return vid_id
in self
.archive
2577 def record_download_archive(self
, info_dict
):
2578 fn
= self
.params
.get('download_archive')
2581 vid_id
= self
._make
_archive
_id
(info_dict
)
2583 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
2584 archive_file
.write(vid_id
+ '\n')
2585 self
.archive
.add(vid_id
)
2588 def format_resolution(format
, default
='unknown'):
2589 if format
.get('vcodec') == 'none':
2591 if format
.get('resolution') is not None:
2592 return format
['resolution']
2593 if format
.get('height') is not None:
2594 if format
.get('width') is not None:
2595 res
= '%sx%s' % (format
['width'], format
['height'])
2597 res
= '%sp' % format
['height']
2598 elif format
.get('width') is not None:
2599 res
= '%dx?' % format
['width']
2604 def _format_note(self
, fdict
):
2606 if fdict
.get('ext') in ['f4f', 'f4m']:
2607 res
+= '(unsupported) '
2608 if fdict
.get('language'):
2611 res
+= '[%s] ' % fdict
['language']
2612 if fdict
.get('format_note') is not None:
2613 res
+= fdict
['format_note'] + ' '
2614 if fdict
.get('tbr') is not None:
2615 res
+= '%4dk ' % fdict
['tbr']
2616 if fdict
.get('container') is not None:
2619 res
+= '%s container' % fdict
['container']
2620 if (fdict
.get('vcodec') is not None
2621 and fdict
.get('vcodec') != 'none'):
2624 res
+= fdict
['vcodec']
2625 if fdict
.get('vbr') is not None:
2627 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
2629 if fdict
.get('vbr') is not None:
2630 res
+= '%4dk' % fdict
['vbr']
2631 if fdict
.get('fps') is not None:
2634 res
+= '%sfps' % fdict
['fps']
2635 if fdict
.get('acodec') is not None:
2638 if fdict
['acodec'] == 'none':
2641 res
+= '%-5s' % fdict
['acodec']
2642 elif fdict
.get('abr') is not None:
2646 if fdict
.get('abr') is not None:
2647 res
+= '@%3dk' % fdict
['abr']
2648 if fdict
.get('asr') is not None:
2649 res
+= ' (%5dHz)' % fdict
['asr']
2650 if fdict
.get('filesize') is not None:
2653 res
+= format_bytes(fdict
['filesize'])
2654 elif fdict
.get('filesize_approx') is not None:
2657 res
+= '~' + format_bytes(fdict
['filesize_approx'])
2660 def _format_note_table(self
, f
):
2661 def join_fields(*vargs
):
2662 return ', '.join((val
for val
in vargs
if val
!= ''))
2665 'UNSUPPORTED' if f
.get('ext') in ('f4f', 'f4m') else '',
2666 format_field(f
, 'language', '[%s]'),
2667 format_field(f
, 'format_note'),
2668 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
2669 format_field(f
, 'asr', '%5dHz'))
2671 def list_formats(self
, info_dict
):
2672 formats
= info_dict
.get('formats', [info_dict
])
2673 new_format
= self
.params
.get('listformats_table', False)
2677 format_field(f
, 'format_id'),
2678 format_field(f
, 'ext'),
2679 self
.format_resolution(f
),
2680 format_field(f
, 'fps', '%d'),
2682 format_field(f
, 'filesize', ' %s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~%s', func
=format_bytes
),
2683 format_field(f
, 'tbr', '%4dk'),
2684 f
.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
2686 format_field(f
, 'vcodec', default
='unknown').replace('none', ''),
2687 format_field(f
, 'vbr', '%4dk'),
2688 format_field(f
, 'acodec', default
='unknown').replace('none', ''),
2689 format_field(f
, 'abr', '%3dk'),
2690 format_field(f
, 'asr', '%5dHz'),
2691 self
._format
_note
_table
(f
)]
2693 if f
.get('preference') is None or f
['preference'] >= -1000]
2694 header_line
= ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2695 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2699 format_field(f
, 'format_id'),
2700 format_field(f
, 'ext'),
2701 self
.format_resolution(f
),
2702 self
._format
_note
(f
)]
2704 if f
.get('preference') is None or f
['preference'] >= -1000]
2705 header_line
= ['format code', 'extension', 'resolution', 'note']
2707 # if len(formats) > 1:
2708 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2710 '[info] Available formats for %s:\n%s' % (info_dict
['id'], render_table(
2714 extraGap
=(0 if new_format
else 1),
2715 hideEmpty
=new_format
)))
2717 def list_thumbnails(self
, info_dict
):
2718 thumbnails
= info_dict
.get('thumbnails')
2720 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
2724 '[info] Thumbnails for %s:' % info_dict
['id'])
2725 self
.to_screen(render_table(
2726 ['ID', 'width', 'height', 'URL'],
2727 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
2729 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
2731 self
.to_screen('%s has no %s' % (video_id
, name
))
2734 'Available %s for %s:' % (name
, video_id
))
2735 self
.to_screen(render_table(
2736 ['Language', 'formats'],
2737 [[lang
, ', '.join(f
['ext'] for f
in reversed(formats
))]
2738 for lang
, formats
in subtitles
.items()]))
2740 def urlopen(self
, req
):
2741 """ Start an HTTP download """
2742 if isinstance(req
, compat_basestring
):
2743 req
= sanitized_Request(req
)
2744 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
2746 def print_debug_header(self
):
2747 if not self
.params
.get('verbose'):
2750 if type('') is not compat_str
:
2751 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2752 self
.report_warning(
2753 'Your Python is broken! Update to a newer and supported version')
2755 stdout_encoding
= getattr(
2756 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
2758 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2759 locale
.getpreferredencoding(),
2760 sys
.getfilesystemencoding(),
2762 self
.get_encoding()))
2763 write_string(encoding_str
, encoding
=None)
2765 self
._write
_string
('[debug] yt-dlp version %s\n' % __version__
)
2767 self
._write
_string
('[debug] Lazy loading extractors enabled\n')
2770 '[debug] Plugin Extractors: %s\n' % [ie
.ie_key() for ie
in _PLUGIN_CLASSES
])
2772 sp
= subprocess
.Popen(
2773 ['git', 'rev-parse', '--short', 'HEAD'],
2774 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
2775 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
2776 out
, err
= process_communicate_or_kill(sp
)
2777 out
= out
.decode().strip()
2778 if re
.match('[0-9a-f]+', out
):
2779 self
._write
_string
('[debug] Git HEAD: %s\n' % out
)
2786 def python_implementation():
2787 impl_name
= platform
.python_implementation()
2788 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
2789 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
2792 self
._write
_string
('[debug] Python version %s (%s) - %s\n' % (
2793 platform
.python_version(), python_implementation(),
2796 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
2797 exe_versions
['rtmpdump'] = rtmpdump_version()
2798 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
2799 exe_str
= ', '.join(
2801 for exe
, v
in sorted(exe_versions
.items())
2806 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
2809 for handler
in self
._opener
.handlers
:
2810 if hasattr(handler
, 'proxies'):
2811 proxy_map
.update(handler
.proxies
)
2812 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
2814 if self
.params
.get('call_home', False):
2815 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2816 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
2818 latest_version
= self
.urlopen(
2819 'https://yt-dl.org/latest/version').read().decode('utf-8')
2820 if version_tuple(latest_version
) > version_tuple(__version__
):
2821 self
.report_warning(
2822 'You are using an outdated version (newest version: %s)! '
2823 'See https://yt-dl.org/update if you need help updating.' %
2826 def _setup_opener(self
):
2827 timeout_val
= self
.params
.get('socket_timeout')
2828 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
2830 opts_cookiefile
= self
.params
.get('cookiefile')
2831 opts_proxy
= self
.params
.get('proxy')
2833 if opts_cookiefile
is None:
2834 self
.cookiejar
= compat_cookiejar
.CookieJar()
2836 opts_cookiefile
= expand_path(opts_cookiefile
)
2837 self
.cookiejar
= YoutubeDLCookieJar(opts_cookiefile
)
2838 if os
.access(opts_cookiefile
, os
.R_OK
):
2839 self
.cookiejar
.load(ignore_discard
=True, ignore_expires
=True)
2841 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
2842 if opts_proxy
is not None:
2843 if opts_proxy
== '':
2846 proxies
= {'http': opts_proxy, 'https': opts_proxy}
2848 proxies
= compat_urllib_request
.getproxies()
2849 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2850 if 'http' in proxies
and 'https' not in proxies
:
2851 proxies
['https'] = proxies
['http']
2852 proxy_handler
= PerRequestProxyHandler(proxies
)
2854 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
2855 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
2856 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
2857 redirect_handler
= YoutubeDLRedirectHandler()
2858 data_handler
= compat_urllib_request_DataHandler()
2860 # When passing our own FileHandler instance, build_opener won't add the
2861 # default FileHandler and allows us to disable the file protocol, which
2862 # can be used for malicious purposes (see
2863 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2864 file_handler
= compat_urllib_request
.FileHandler()
2866 def file_open(*args
, **kwargs
):
2867 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2868 file_handler
.file_open
= file_open
2870 opener
= compat_urllib_request
.build_opener(
2871 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
2873 # Delete the default user-agent header, which would otherwise apply in
2874 # cases where our custom HTTP handler doesn't come into play
2875 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2876 opener
.addheaders
= []
2877 self
._opener
= opener
2879 def encode(self
, s
):
2880 if isinstance(s
, bytes):
2881 return s
# Already encoded
2884 return s
.encode(self
.get_encoding())
2885 except UnicodeEncodeError as err
:
2886 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
2889 def get_encoding(self
):
2890 encoding
= self
.params
.get('encoding')
2891 if encoding
is None:
2892 encoding
= preferredencoding()
2895 def _write_thumbnails(self
, info_dict
, filename
): # return the extensions
2896 write_all
= self
.params
.get('write_all_thumbnails', False)
2898 if write_all
or self
.params
.get('writethumbnail', False):
2899 thumbnails
= info_dict
.get('thumbnails') or []
2900 multiple
= write_all
and len(thumbnails
) > 1
2903 for t
in thumbnails
[::1 if write_all
else -1]:
2904 thumb_ext
= determine_ext(t
['url'], 'jpg')
2905 suffix
= '%s.' % t
['id'] if multiple
else ''
2906 thumb_display_id
= '%s ' % t
['id'] if multiple
else ''
2907 t
['filename'] = thumb_filename
= replace_extension(filename
, suffix
+ thumb_ext
, info_dict
.get('ext'))
2909 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(thumb_filename
)):
2910 ret
.append(suffix
+ thumb_ext
)
2911 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
2912 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2914 self
.to_screen('[%s] %s: Downloading thumbnail %s...' %
2915 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2917 uf
= self
.urlopen(t
['url'])
2918 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
2919 shutil
.copyfileobj(uf
, thumbf
)
2920 ret
.append(suffix
+ thumb_ext
)
2921 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2922 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
2923 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2924 self
.report_warning('Unable to download thumbnail "%s": %s' %
2925 (t
['url'], error_to_compat_str(err
)))
2926 if ret
and not write_all
: