4 from __future__
import absolute_import
, unicode_literals
29 from string
import ascii_letters
34 compat_get_terminal_size
,
40 compat_tokenize_tokenize
,
42 compat_urllib_request
,
43 compat_urllib_request_DataHandler
,
54 DOT_DESKTOP_LINK_TEMPLATE
,
55 DOT_URL_LINK_TEMPLATE
,
56 DOT_WEBLOC_LINK_TEMPLATE
,
79 PerRequestProxyHandler
,
84 register_socks_protocols
,
98 UnavailableVideoError
,
104 YoutubeDLCookieProcessor
,
106 YoutubeDLRedirectHandler
,
107 process_communicate_or_kill
,
109 from .cache
import Cache
110 from .extractor
import get_info_extractor
, gen_extractor_classes
, _LAZY_LOADER
, _PLUGIN_CLASSES
111 from .extractor
.openload
import PhantomJSwrapper
112 from .downloader
import get_suitable_downloader
113 from .downloader
.rtmp
import rtmpdump_version
114 from .postprocessor
import (
117 FFmpegFixupStretchedPP
,
120 # FFmpegSubtitlesConvertorPP,
122 MoveFilesAfterDownloadPP
,
124 from .version
import __version__
126 if compat_os_name
== 'nt':
130 class YoutubeDL(object):
133 YoutubeDL objects are the ones responsible of downloading the
134 actual video file and writing it to disk if the user has requested
135 it, among some other tasks. In most cases there should be one per
136 program. As, given a video URL, the downloader doesn't know how to
137 extract all the needed information, task that InfoExtractors do, it
138 has to pass the URL to one of them.
140 For this, YoutubeDL objects have a method that allows
141 InfoExtractors to be registered in a given order. When it is passed
142 a URL, the YoutubeDL object handles it to the first InfoExtractor it
143 finds that reports being able to handle it. The InfoExtractor extracts
144 all the information about the video or videos the URL refers to, and
145 YoutubeDL process the extracted information, possibly using a File
146 Downloader to download the video.
148 YoutubeDL objects accept a lot of parameters. In order not to saturate
149 the object constructor with arguments, it receives a dictionary of
150 options instead. These options are available through the params
151 attribute for the InfoExtractors to use. The YoutubeDL also
152 registers itself as the downloader in charge for the InfoExtractors
153 that are added to it, so this is a "mutual registration".
157 username: Username for authentication purposes.
158 password: Password for authentication purposes.
159 videopassword: Password for accessing a video.
160 ap_mso: Adobe Pass multiple-system operator identifier.
161 ap_username: Multiple-system operator account username.
162 ap_password: Multiple-system operator account password.
163 usenetrc: Use netrc for authentication instead.
164 verbose: Print additional info to stdout.
165 quiet: Do not print messages to stdout.
166 no_warnings: Do not print out anything for warnings.
167 forceurl: Force printing final URL.
168 forcetitle: Force printing title.
169 forceid: Force printing ID.
170 forcethumbnail: Force printing thumbnail URL.
171 forcedescription: Force printing description.
172 forcefilename: Force printing final filename.
173 forceduration: Force printing duration.
174 forcejson: Force printing info_dict as JSON.
175 dump_single_json: Force printing the info_dict of the whole playlist
176 (or video) as a single JSON line.
177 force_write_download_archive: Force writing download archive regardless of
178 'skip_download' or 'simulate'.
179 simulate: Do not download the video files.
180 format: Video format code. see "FORMAT SELECTION" for more details.
181 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
182 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
183 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
184 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
185 outtmpl: Template for output names.
186 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
187 restrictfilenames: Do not allow "&" and spaces in file names
188 trim_file_name: Limit length of filename (extension excluded)
189 ignoreerrors: Do not stop on download errors
190 (Default True when running youtube-dlc,
191 but False when directly accessing YoutubeDL class)
192 force_generic_extractor: Force downloader to use the generic extractor
193 overwrites: Overwrite all video and metadata files if True,
194 overwrite only non-video files if None
195 and don't overwrite any file if False
196 playliststart: Playlist item to start at.
197 playlistend: Playlist item to end at.
198 playlist_items: Specific indices of playlist to download.
199 playlistreverse: Download playlist items in reverse order.
200 playlistrandom: Download playlist items in random order.
201 matchtitle: Download only matching titles.
202 rejecttitle: Reject downloads for matching titles.
203 logger: Log messages to a logging.Logger instance.
204 logtostderr: Log messages to stderr instead of stdout.
205 writedescription: Write the video description to a .description file
206 writeinfojson: Write the video description to a .info.json file
207 writecomments: Extract video comments. This will not be written to disk
208 unless writeinfojson is also given
209 writeannotations: Write the video annotations to a .annotations.xml file
210 writethumbnail: Write the thumbnail image to a file
211 allow_playlist_files: Also write playlists' description, infojson etc in a seperate file
212 write_all_thumbnails: Write all thumbnail formats to files
213 writelink: Write an internet shortcut file, depending on the
214 current platform (.url/.webloc/.desktop)
215 writeurllink: Write a Windows internet shortcut file (.url)
216 writewebloclink: Write a macOS internet shortcut file (.webloc)
217 writedesktoplink: Write a Linux internet shortcut file (.desktop)
218 writesubtitles: Write the video subtitles to a file
219 writeautomaticsub: Write the automatically generated subtitles to a file
220 allsubtitles: Downloads all the subtitles of the video
221 (requires writesubtitles or writeautomaticsub)
222 listsubtitles: Lists all available subtitles for the video
223 subtitlesformat: The format code for subtitles
224 subtitleslangs: List of languages of the subtitles to download
225 keepvideo: Keep the video file after post-processing
226 daterange: A DateRange object, download only if the upload_date is in the range.
227 skip_download: Skip the actual download of the video file
228 cachedir: Location of the cache files in the filesystem.
229 False to disable filesystem cache.
230 noplaylist: Download single video instead of a playlist if in doubt.
231 age_limit: An integer representing the user's age in years.
232 Unsuitable videos for the given age are skipped.
233 min_views: An integer representing the minimum view count the video
234 must have in order to not be skipped.
235 Videos without view count information are always
236 downloaded. None for no limit.
237 max_views: An integer representing the maximum view count.
238 Videos that are more popular than that are not
240 Videos without view count information are always
241 downloaded. None for no limit.
242 download_archive: File name of a file where all downloads are recorded.
243 Videos already present in the file are not downloaded
245 break_on_existing: Stop the download process after attempting to download a
246 file that is in the archive.
247 break_on_reject: Stop the download process when encountering a video that
248 has been filtered out.
249 cookiefile: File name where cookies should be read from and dumped to
250 nocheckcertificate:Do not verify SSL certificates
251 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
252 At the moment, this is only supported by YouTube.
253 proxy: URL of the proxy server to use
254 geo_verification_proxy: URL of the proxy to use for IP address verification
255 on geo-restricted sites.
256 socket_timeout: Time to wait for unresponsive hosts, in seconds
257 bidi_workaround: Work around buggy terminals without bidirectional text
258 support, using fridibi
259 debug_printtraffic:Print out sent and received HTTP traffic
260 include_ads: Download ads as well
261 default_search: Prepend this string if an input url is not valid.
262 'auto' for elaborate guessing
263 encoding: Use this encoding instead of the system-specified.
264 extract_flat: Do not resolve URLs, return the immediate result.
265 Pass in 'in_playlist' to only show this behavior for
267 postprocessors: A list of dictionaries, each with an entry
268 * key: The name of the postprocessor. See
269 youtube_dlc/postprocessor/__init__.py for a list.
270 * _after_move: Optional. If True, run this post_processor
271 after 'MoveFilesAfterDownload'
272 as well as any further keyword arguments for the
274 post_hooks: A list of functions that get called as the final step
275 for each video file, after all postprocessors have been
276 called. The filename will be passed as the only argument.
277 progress_hooks: A list of functions that get called on download
278 progress, with a dictionary with the entries
279 * status: One of "downloading", "error", or "finished".
280 Check this first and ignore unknown values.
282 If status is one of "downloading", or "finished", the
283 following properties may also be present:
284 * filename: The final filename (always present)
285 * tmpfilename: The filename we're currently writing to
286 * downloaded_bytes: Bytes on disk
287 * total_bytes: Size of the whole file, None if unknown
288 * total_bytes_estimate: Guess of the eventual file size,
290 * elapsed: The number of seconds since download started.
291 * eta: The estimated time in seconds, None if unknown
292 * speed: The download speed in bytes/second, None if
294 * fragment_index: The counter of the currently
295 downloaded video fragment.
296 * fragment_count: The number of fragments (= individual
297 files that will be merged)
299 Progress hooks are guaranteed to be called at least once
300 (with status "finished") if the download is successful.
301 merge_output_format: Extension to use when merging formats.
302 final_ext: Expected final extension; used to detect when the file was
303 already downloaded and converted. "merge_output_format" is
304 replaced by this extension when given
305 fixup: Automatically correct known faults of the file.
307 - "never": do nothing
308 - "warn": only emit a warning
309 - "detect_or_warn": check whether we can do anything
310 about it, warn otherwise (default)
311 source_address: Client-side IP address to bind to.
312 call_home: Boolean, true iff we are allowed to contact the
313 youtube-dlc servers for debugging.
314 sleep_interval: Number of seconds to sleep before each download when
315 used alone or a lower bound of a range for randomized
316 sleep before each download (minimum possible number
317 of seconds to sleep) when used along with
319 max_sleep_interval:Upper bound of a range for randomized sleep before each
320 download (maximum possible number of seconds to sleep).
321 Must only be used along with sleep_interval.
322 Actual sleep time will be a random float from range
323 [sleep_interval; max_sleep_interval].
324 listformats: Print an overview of available video formats and exit.
325 list_thumbnails: Print a table of all thumbnails and exit.
326 match_filter: A function that gets called with the info_dict of
328 If it returns a message, the video is ignored.
329 If it returns None, the video is downloaded.
330 match_filter_func in utils.py is one example for this.
331 no_color: Do not emit color codes in output.
332 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
335 Two-letter ISO 3166-2 country code that will be used for
336 explicit geographic restriction bypassing via faking
337 X-Forwarded-For HTTP header
339 IP range in CIDR notation that will be used similarly to
342 The following options determine which downloader is picked:
343 external_downloader: Executable of the external downloader to call.
344 None or unset for standard (built-in) downloader.
345 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
346 if True, otherwise use ffmpeg/avconv if False, otherwise
347 use downloader suggested by extractor if None.
349 The following parameters are not used by YoutubeDL itself, they are used by
350 the downloader (see youtube_dlc/downloader/common.py):
351 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
352 noresizebuffer, retries, continuedl, noprogress, consoletitle,
353 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
356 The following options are used by the post processors:
357 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
358 otherwise prefer ffmpeg. (avconv support is deprecated)
359 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
360 to the binary or its containing directory.
361 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
362 and a list of additional command-line arguments for the
363 postprocessor/executable. The dict can also have "PP+EXE" keys
364 which are used when the given exe is used by the given PP.
365 Use 'default' as the name for arguments to passed to all PP
366 The following options are used by the Youtube extractor:
367 youtube_include_dash_manifest: If True (default), DASH manifests and related
368 data will be downloaded and processed by extractor.
369 You can reduce network I/O by disabling it if you don't
373 _NUMERIC_FIELDS
= set((
374 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
375 'timestamp', 'upload_year', 'upload_month', 'upload_day',
376 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
377 'average_rating', 'comment_count', 'age_limit',
378 'start_time', 'end_time',
379 'chapter_number', 'season_number', 'episode_number',
380 'track_number', 'disc_number', 'release_year',
386 _pps
= {'beforedl': [], 'aftermove': [], 'normal': []}
387 __prepare_filename_warned
= False
388 _download_retcode
= None
389 _num_downloads
= None
391 _playlist_urls
= set()
394 def __init__(self
, params
=None, auto_init
=True):
395 """Create a FileDownloader object with the given options."""
399 self
._ies
_instances
= {}
400 self
._pps
= {'beforedl': [], 'aftermove': [], 'normal': []}
401 self
.__prepare
_filename
_warned
= False
402 self
._post
_hooks
= []
403 self
._progress
_hooks
= []
404 self
._download
_retcode
= 0
405 self
._num
_downloads
= 0
406 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
407 self
._err
_file
= sys
.stderr
410 'nocheckcertificate': False,
412 self
.params
.update(params
)
413 self
.cache
= Cache(self
)
416 """Preload the archive, if any is specified"""
417 def preload_download_archive(self
):
418 fn
= self
.params
.get('download_archive')
422 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
423 for line
in archive_file
:
424 self
.archive
.add(line
.strip())
425 except IOError as ioe
:
426 if ioe
.errno
!= errno
.ENOENT
:
431 def check_deprecated(param
, option
, suggestion
):
432 if self
.params
.get(param
) is not None:
434 '%s is deprecated. Use %s instead.' % (option
, suggestion
))
438 if self
.params
.get('verbose'):
439 self
.to_stdout('[debug] Loading archive file %r' % self
.params
.get('download_archive'))
441 preload_download_archive(self
)
443 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
444 if self
.params
.get('geo_verification_proxy') is None:
445 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
447 if self
.params
.get('final_ext'):
448 if self
.params
.get('merge_output_format'):
449 self
.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
450 self
.params
['merge_output_format'] = self
.params
['final_ext']
452 if 'overwrites' in self
.params
and self
.params
['overwrites'] is None:
453 del self
.params
['overwrites']
455 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
456 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
457 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
459 if params
.get('bidi_workaround', False):
462 master
, slave
= pty
.openpty()
463 width
= compat_get_terminal_size().columns
467 width_args
= ['-w', str(width
)]
469 stdin
=subprocess
.PIPE
,
471 stderr
=self
._err
_file
)
473 self
._output
_process
= subprocess
.Popen(
474 ['bidiv'] + width_args
, **sp_kwargs
477 self
._output
_process
= subprocess
.Popen(
478 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
479 self
._output
_channel
= os
.fdopen(master
, 'rb')
480 except OSError as ose
:
481 if ose
.errno
== errno
.ENOENT
:
482 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
486 if (sys
.platform
!= 'win32'
487 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
488 and not params
.get('restrictfilenames', False)):
489 # Unicode filesystem API will throw errors (#1474, #13027)
491 'Assuming --restrict-filenames since file system encoding '
492 'cannot encode all characters. '
493 'Set the LC_ALL environment variable to fix this.')
494 self
.params
['restrictfilenames'] = True
496 if isinstance(params
.get('outtmpl'), bytes):
498 'Parameter outtmpl is bytes, but should be a unicode string. '
499 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
504 self
.print_debug_header()
505 self
.add_default_info_extractors()
507 for pp_def_raw
in self
.params
.get('postprocessors', []):
508 pp_class
= get_postprocessor(pp_def_raw
['key'])
509 pp_def
= dict(pp_def_raw
)
512 when
= pp_def
['when']
516 pp
= pp_class(self
, **compat_kwargs(pp_def
))
517 self
.add_post_processor(pp
, when
=when
)
519 for ph
in self
.params
.get('post_hooks', []):
520 self
.add_post_hook(ph
)
522 for ph
in self
.params
.get('progress_hooks', []):
523 self
.add_progress_hook(ph
)
525 register_socks_protocols()
527 def warn_if_short_id(self
, argv
):
528 # short YouTube ID starting with dash?
530 i
for i
, a
in enumerate(argv
)
531 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
535 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
536 + ['--'] + [argv
[i
] for i
in idxs
]
539 'Long argument string detected. '
540 'Use -- to separate parameters and URLs, like this:\n%s\n' %
541 args_to_str(correct_argv
))
543 def add_info_extractor(self
, ie
):
544 """Add an InfoExtractor object to the end of the list."""
546 if not isinstance(ie
, type):
547 self
._ies
_instances
[ie
.ie_key()] = ie
548 ie
.set_downloader(self
)
550 def get_info_extractor(self
, ie_key
):
552 Get an instance of an IE with name ie_key, it will try to get one from
553 the _ies list, if there's no instance it will create a new one and add
554 it to the extractor list.
556 ie
= self
._ies
_instances
.get(ie_key
)
558 ie
= get_info_extractor(ie_key
)()
559 self
.add_info_extractor(ie
)
562 def add_default_info_extractors(self
):
564 Add the InfoExtractors returned by gen_extractors to the end of the list
566 for ie
in gen_extractor_classes():
567 self
.add_info_extractor(ie
)
569 def add_post_processor(self
, pp
, when
='normal'):
570 """Add a PostProcessor object to the end of the chain."""
571 self
._pps
[when
].append(pp
)
572 pp
.set_downloader(self
)
574 def add_post_hook(self
, ph
):
575 """Add the post hook"""
576 self
._post
_hooks
.append(ph
)
578 def add_progress_hook(self
, ph
):
579 """Add the progress hook (currently only for the file downloader)"""
580 self
._progress
_hooks
.append(ph
)
582 def _bidi_workaround(self
, message
):
583 if not hasattr(self
, '_output_channel'):
586 assert hasattr(self
, '_output_process')
587 assert isinstance(message
, compat_str
)
588 line_count
= message
.count('\n') + 1
589 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
590 self
._output
_process
.stdin
.flush()
591 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
592 for _
in range(line_count
))
593 return res
[:-len('\n')]
595 def to_screen(self
, message
, skip_eol
=False):
596 """Print message to stdout if not in quiet mode."""
597 return self
.to_stdout(message
, skip_eol
, check_quiet
=True)
599 def _write_string(self
, s
, out
=None):
600 write_string(s
, out
=out
, encoding
=self
.params
.get('encoding'))
602 def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False):
603 """Print message to stdout if not in quiet mode."""
604 if self
.params
.get('logger'):
605 self
.params
['logger'].debug(message
)
606 elif not check_quiet
or not self
.params
.get('quiet', False):
607 message
= self
._bidi
_workaround
(message
)
608 terminator
= ['\n', ''][skip_eol
]
609 output
= message
+ terminator
611 self
._write
_string
(output
, self
._screen
_file
)
613 def to_stderr(self
, message
):
614 """Print message to stderr."""
615 assert isinstance(message
, compat_str
)
616 if self
.params
.get('logger'):
617 self
.params
['logger'].error(message
)
619 message
= self
._bidi
_workaround
(message
)
620 output
= message
+ '\n'
621 self
._write
_string
(output
, self
._err
_file
)
623 def to_console_title(self
, message
):
624 if not self
.params
.get('consoletitle', False):
626 if compat_os_name
== 'nt':
627 if ctypes
.windll
.kernel32
.GetConsoleWindow():
628 # c_wchar_p() might not be necessary if `message` is
629 # already of type unicode()
630 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
631 elif 'TERM' in os
.environ
:
632 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
634 def save_console_title(self
):
635 if not self
.params
.get('consoletitle', False):
637 if self
.params
.get('simulate', False):
639 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
640 # Save the title on stack
641 self
._write
_string
('\033[22;0t', self
._screen
_file
)
643 def restore_console_title(self
):
644 if not self
.params
.get('consoletitle', False):
646 if self
.params
.get('simulate', False):
648 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
649 # Restore the title from stack
650 self
._write
_string
('\033[23;0t', self
._screen
_file
)
653 self
.save_console_title()
656 def __exit__(self
, *args
):
657 self
.restore_console_title()
659 if self
.params
.get('cookiefile') is not None:
660 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
662 def trouble(self
, message
=None, tb
=None):
663 """Determine action to take when a download problem appears.
665 Depending on if the downloader has been configured to ignore
666 download errors or not, this method may throw an exception or
667 not when errors are found, after printing the message.
669 tb, if given, is additional traceback information.
671 if message
is not None:
672 self
.to_stderr(message
)
673 if self
.params
.get('verbose'):
675 if sys
.exc_info()[0]: # if .trouble has been called from an except block
677 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
678 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
679 tb
+= encode_compat_str(traceback
.format_exc())
681 tb_data
= traceback
.format_list(traceback
.extract_stack())
682 tb
= ''.join(tb_data
)
684 if not self
.params
.get('ignoreerrors', False):
685 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
686 exc_info
= sys
.exc_info()[1].exc_info
688 exc_info
= sys
.exc_info()
689 raise DownloadError(message
, exc_info
)
690 self
._download
_retcode
= 1
692 def report_warning(self
, message
):
694 Print the message to stderr, it will be prefixed with 'WARNING:'
695 If stderr is a tty file the 'WARNING:' will be colored
697 if self
.params
.get('logger') is not None:
698 self
.params
['logger'].warning(message
)
700 if self
.params
.get('no_warnings'):
702 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
703 _msg_header
= '\033[0;33mWARNING:\033[0m'
705 _msg_header
= 'WARNING:'
706 warning_message
= '%s %s' % (_msg_header
, message
)
707 self
.to_stderr(warning_message
)
709 def report_error(self
, message
, tb
=None):
711 Do the same as trouble, but prefixes the message with 'ERROR:', colored
712 in red if stderr is a tty file.
714 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
715 _msg_header
= '\033[0;31mERROR:\033[0m'
717 _msg_header
= 'ERROR:'
718 error_message
= '%s %s' % (_msg_header
, message
)
719 self
.trouble(error_message
, tb
)
721 def report_file_already_downloaded(self
, file_name
):
722 """Report file has already been fully downloaded."""
724 self
.to_screen('[download] %s has already been downloaded' % file_name
)
725 except UnicodeEncodeError:
726 self
.to_screen('[download] The file has already been downloaded')
728 def report_file_delete(self
, file_name
):
729 """Report that existing file will be deleted."""
731 self
.to_screen('Deleting already existent file %s' % file_name
)
732 except UnicodeEncodeError:
733 self
.to_screen('Deleting already existent file')
735 def prepare_filename(self
, info_dict
, warn
=False):
736 """Generate the output filename."""
738 template_dict
= dict(info_dict
)
740 template_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
741 formatSeconds(info_dict
['duration'], '-')
742 if info_dict
.get('duration', None) is not None
745 template_dict
['epoch'] = int(time
.time())
746 autonumber_size
= self
.params
.get('autonumber_size')
747 if autonumber_size
is None:
749 template_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
750 if template_dict
.get('resolution') is None:
751 if template_dict
.get('width') and template_dict
.get('height'):
752 template_dict
['resolution'] = '%dx%d' % (template_dict
['width'], template_dict
['height'])
753 elif template_dict
.get('height'):
754 template_dict
['resolution'] = '%sp' % template_dict
['height']
755 elif template_dict
.get('width'):
756 template_dict
['resolution'] = '%dx?' % template_dict
['width']
758 sanitize
= lambda k
, v
: sanitize_filename(
760 restricted
=self
.params
.get('restrictfilenames'),
761 is_id
=(k
== 'id' or k
.endswith('_id')))
762 template_dict
= dict((k
, v
if isinstance(v
, compat_numeric_types
) else sanitize(k
, v
))
763 for k
, v
in template_dict
.items()
764 if v
is not None and not isinstance(v
, (list, tuple, dict)))
765 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
766 template_dict
= collections
.defaultdict(lambda: na
, template_dict
)
768 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
770 # For fields playlist_index and autonumber convert all occurrences
771 # of %(field)s to %(field)0Nd for backward compatibility
772 field_size_compat_map
= {
773 'playlist_index': len(str(template_dict
['n_entries'])),
774 'autonumber': autonumber_size
,
776 FIELD_SIZE_COMPAT_RE
= r
'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
777 mobj
= re
.search(FIELD_SIZE_COMPAT_RE
, outtmpl
)
780 FIELD_SIZE_COMPAT_RE
,
781 r
'%%(\1)0%dd' % field_size_compat_map
[mobj
.group('field')],
784 # As of [1] format syntax is:
785 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
786 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
790 \({0}\) # mapping key
791 (?:[#0\-+ ]+)? # conversion flags (optional)
792 (?:\d+)? # minimum field width (optional)
793 (?:\.\d+)? # precision (optional)
794 [hlL]? # length modifier (optional)
795 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
798 numeric_fields
= list(self
._NUMERIC
_FIELDS
)
801 FORMAT_DATE_RE
= FORMAT_RE
.format(r
'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
802 for mobj
in re
.finditer(FORMAT_DATE_RE
, outtmpl
):
803 conv_type
, field
, frmt
, key
= mobj
.group('type', 'field', 'format', 'key')
804 if key
in template_dict
:
806 value
= strftime_or_none(template_dict
.get(field
), frmt
, na
)
807 if conv_type
in 'crs': # string
808 value
= sanitize(field
, value
)
810 numeric_fields
.append(key
)
811 value
= float_or_none(value
, default
=None)
812 if value
is not None:
813 template_dict
[key
] = value
815 # Missing numeric fields used together with integer presentation types
816 # in format specification will break the argument substitution since
817 # string NA placeholder is returned for missing fields. We will patch
818 # output template for missing fields to meet string presentation type.
819 for numeric_field
in numeric_fields
:
820 if numeric_field
not in template_dict
:
822 FORMAT_RE
.format(re
.escape(numeric_field
)),
823 r
'%({0})s'.format(numeric_field
), outtmpl
)
825 # expand_path translates '%%' into '%' and '$$' into '$'
826 # correspondingly that is not what we want since we need to keep
827 # '%%' intact for template dict substitution step. Working around
828 # with boundary-alike separator hack.
829 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
830 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
832 # outtmpl should be expand_path'ed before template dict substitution
833 # because meta fields may contain env variables we don't want to
834 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
835 # title "Hello $PATH", we don't want `$PATH` to be expanded.
836 filename
= expand_path(outtmpl
).replace(sep
, '') % template_dict
838 # https://github.com/blackjack4494/youtube-dlc/issues/85
839 trim_file_name
= self
.params
.get('trim_file_name', False)
841 fn_groups
= filename
.rsplit('.')
844 if len(fn_groups
) > 2:
845 sub_ext
= fn_groups
[-2]
846 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
848 # Temporary fix for #4787
849 # 'Treat' all problem characters by passing filename through preferredencoding
850 # to workaround encoding issues with subprocess on python2 @ Windows
851 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
852 filename
= encodeFilename(filename
, True).decode(preferredencoding())
853 filename
= sanitize_path(filename
)
855 if warn
and not self
.__prepare
_filename
_warned
:
856 if not self
.params
.get('paths'):
858 elif filename
== '-':
859 self
.report_warning('--paths is ignored when an outputting to stdout')
860 elif os
.path
.isabs(filename
):
861 self
.report_warning('--paths is ignored since an absolute path is given in output template')
862 self
.__prepare
_filename
_warned
= True
865 except ValueError as err
:
866 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
869 def prepare_filepath(self
, filename
, dir_type
=''):
872 paths
= self
.params
.get('paths', {})
873 assert isinstance(paths
, dict)
874 homepath
= expand_path(paths
.get('home', '').strip())
875 assert isinstance(homepath
, compat_str
)
876 subdir
= expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else ''
877 assert isinstance(subdir
, compat_str
)
878 return sanitize_path(os
.path
.join(homepath
, subdir
, filename
))
880 def _match_entry(self
, info_dict
, incomplete
):
881 """ Returns None if the file should be downloaded """
884 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
885 if 'title' in info_dict
:
886 # This can happen when we're just evaluating the playlist
887 title
= info_dict
['title']
888 matchtitle
= self
.params
.get('matchtitle', False)
890 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
891 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
892 rejecttitle
= self
.params
.get('rejecttitle', False)
894 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
895 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
896 date
= info_dict
.get('upload_date')
898 dateRange
= self
.params
.get('daterange', DateRange())
899 if date
not in dateRange
:
900 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
901 view_count
= info_dict
.get('view_count')
902 if view_count
is not None:
903 min_views
= self
.params
.get('min_views')
904 if min_views
is not None and view_count
< min_views
:
905 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
906 max_views
= self
.params
.get('max_views')
907 if max_views
is not None and view_count
> max_views
:
908 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
909 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
910 return 'Skipping "%s" because it is age restricted' % video_title
911 if self
.in_download_archive(info_dict
):
912 return '%s has already been recorded in archive' % video_title
915 match_filter
= self
.params
.get('match_filter')
916 if match_filter
is not None:
917 ret
= match_filter(info_dict
)
922 reason
= check_filter()
923 if reason
is not None:
924 self
.to_screen('[download] ' + reason
)
925 if reason
.endswith('has already been recorded in the archive') and self
.params
.get('break_on_existing', False):
926 raise ExistingVideoReached()
927 elif self
.params
.get('break_on_reject', False):
928 raise RejectedVideoReached()
932 def add_extra_info(info_dict
, extra_info
):
933 '''Set the keys from extra_info in info dict if they are missing'''
934 for key
, value
in extra_info
.items():
935 info_dict
.setdefault(key
, value
)
937 def extract_info(self
, url
, download
=True, ie_key
=None, info_dict
=None, extra_info
={},
938 process
=True, force_generic_extractor
=False):
940 Returns a list with a dictionary for each video we find.
941 If 'download', also downloads the videos.
942 extra_info is a dict containing the extra values to add to each result
945 if not ie_key
and force_generic_extractor
:
949 ies
= [self
.get_info_extractor(ie_key
)]
954 if not ie
.suitable(url
):
958 ie
= self
.get_info_extractor(ie_key
)
960 self
.report_warning('The program functionality for this site has been marked as broken, '
961 'and will probably not work.')
964 temp_id
= str_or_none(
965 ie
.extract_id(url
) if callable(getattr(ie
, 'extract_id', None))
966 else ie
._match
_id
(url
))
967 except (AssertionError, IndexError, AttributeError):
969 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
970 self
.to_screen("[%s] %s: has already been recorded in archive" % (
973 return self
.__extract
_info
(url
, ie
, download
, extra_info
, process
, info_dict
)
975 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
977 def __handle_extraction_exceptions(func
):
978 def wrapper(self
, *args
, **kwargs
):
980 return func(self
, *args
, **kwargs
)
981 except GeoRestrictedError
as e
:
984 msg
+= '\nThis video is available in %s.' % ', '.join(
985 map(ISO3166Utils
.short2full
, e
.countries
))
986 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
987 self
.report_error(msg
)
988 except ExtractorError
as e
: # An error we somewhat expected
989 self
.report_error(compat_str(e
), e
.format_traceback())
990 except (MaxDownloadsReached
, ExistingVideoReached
, RejectedVideoReached
):
992 except Exception as e
:
993 if self
.params
.get('ignoreerrors', False):
994 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
999 @__handle_extraction_exceptions
1000 def __extract_info(self
, url
, ie
, download
, extra_info
, process
, info_dict
):
1001 ie_result
= ie
.extract(url
)
1002 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1004 if isinstance(ie_result
, list):
1005 # Backwards compatibility: old IE result format
1007 '_type': 'compat_list',
1008 'entries': ie_result
,
1011 if info_dict
.get('id'):
1012 ie_result
['id'] = info_dict
['id']
1013 if info_dict
.get('title'):
1014 ie_result
['title'] = info_dict
['title']
1015 self
.add_default_extra_info(ie_result
, ie
, url
)
1017 return self
.process_ie_result(ie_result
, download
, extra_info
)
1021 def add_default_extra_info(self
, ie_result
, ie
, url
):
1022 self
.add_extra_info(ie_result
, {
1023 'extractor': ie
.IE_NAME
,
1025 'webpage_url_basename': url_basename(url
),
1026 'extractor_key': ie
.ie_key(),
1029 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
1031 Take the result of the ie(may be modified) and resolve all unresolved
1032 references (URLs, playlist items).
1034 It will also download the videos if 'download'.
1035 Returns the resolved ie_result.
1037 result_type
= ie_result
.get('_type', 'video')
1039 if result_type
in ('url', 'url_transparent'):
1040 ie_result
['url'] = sanitize_url(ie_result
['url'])
1041 extract_flat
= self
.params
.get('extract_flat', False)
1042 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1043 or extract_flat
is True):
1044 self
.__forced
_printings
(
1046 self
.prepare_filepath(self
.prepare_filename(ie_result
)),
1050 if result_type
== 'video':
1051 self
.add_extra_info(ie_result
, extra_info
)
1052 return self
.process_video_result(ie_result
, download
=download
)
1053 elif result_type
== 'url':
1054 # We have to add extra_info to the results because it may be
1055 # contained in a playlist
1056 return self
.extract_info(ie_result
['url'],
1057 download
, info_dict
=ie_result
,
1058 ie_key
=ie_result
.get('ie_key'),
1059 extra_info
=extra_info
)
1060 elif result_type
== 'url_transparent':
1061 # Use the information from the embedding page
1062 info
= self
.extract_info(
1063 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1064 extra_info
=extra_info
, download
=False, process
=False)
1066 # extract_info may return None when ignoreerrors is enabled and
1067 # extraction failed with an error, don't crash and return early
1072 force_properties
= dict(
1073 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
1074 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1075 if f
in force_properties
:
1076 del force_properties
[f
]
1077 new_result
= info
.copy()
1078 new_result
.update(force_properties
)
1080 # Extracted info may not be a video result (i.e.
1081 # info.get('_type', 'video') != video) but rather an url or
1082 # url_transparent. In such cases outer metadata (from ie_result)
1083 # should be propagated to inner one (info). For this to happen
1084 # _type of info should be overridden with url_transparent. This
1085 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1086 if new_result
.get('_type') == 'url':
1087 new_result
['_type'] = 'url_transparent'
1089 return self
.process_ie_result(
1090 new_result
, download
=download
, extra_info
=extra_info
)
1091 elif result_type
in ('playlist', 'multi_video'):
1092 # Protect from infinite recursion due to recursively nested playlists
1093 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1094 webpage_url
= ie_result
['webpage_url']
1095 if webpage_url
in self
._playlist
_urls
:
1097 '[download] Skipping already downloaded playlist: %s'
1098 % ie_result
.get('title') or ie_result
.get('id'))
1101 self
._playlist
_level
+= 1
1102 self
._playlist
_urls
.add(webpage_url
)
1104 return self
.__process
_playlist
(ie_result
, download
)
1106 self
._playlist
_level
-= 1
1107 if not self
._playlist
_level
:
1108 self
._playlist
_urls
.clear()
1109 elif result_type
== 'compat_list':
1110 self
.report_warning(
1111 'Extractor %s returned a compat_list result. '
1112 'It needs to be updated.' % ie_result
.get('extractor'))
1115 self
.add_extra_info(
1118 'extractor': ie_result
['extractor'],
1119 'webpage_url': ie_result
['webpage_url'],
1120 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1121 'extractor_key': ie_result
['extractor_key'],
1125 ie_result
['entries'] = [
1126 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1127 for r
in ie_result
['entries']
1131 raise Exception('Invalid result type: %s' % result_type
)
1133 def __process_playlist(self
, ie_result
, download
):
1134 # We process each entry in the playlist
1135 playlist
= ie_result
.get('title') or ie_result
.get('id')
1136 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1138 if self
.params
.get('allow_playlist_files', True):
1140 'playlist': playlist
,
1141 'playlist_id': ie_result
.get('id'),
1142 'playlist_title': ie_result
.get('title'),
1143 'playlist_uploader': ie_result
.get('uploader'),
1144 'playlist_uploader_id': ie_result
.get('uploader_id'),
1147 ie_copy
.update(dict(ie_result
))
1149 def ensure_dir_exists(path
):
1150 return make_dir(path
, self
.report_error
)
1152 if self
.params
.get('writeinfojson', False):
1153 infofn
= replace_extension(
1154 self
.prepare_filepath(self
.prepare_filename(ie_copy
), 'infojson'),
1155 'info.json', ie_result
.get('ext'))
1156 if not ensure_dir_exists(encodeFilename(infofn
)):
1158 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
1159 self
.to_screen('[info] Playlist metadata is already present')
1161 playlist_info
= dict(ie_result
)
1162 # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
1163 del playlist_info
['entries']
1164 self
.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn
)
1166 write_json_file(self
.filter_requested_info(playlist_info
), infofn
)
1167 except (OSError, IOError):
1168 self
.report_error('Cannot write playlist metadata to JSON file ' + infofn
)
1170 if self
.params
.get('writedescription', False):
1171 descfn
= replace_extension(
1172 self
.prepare_filepath(self
.prepare_filename(ie_copy
), 'description'),
1173 'description', ie_result
.get('ext'))
1174 if not ensure_dir_exists(encodeFilename(descfn
)):
1176 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
1177 self
.to_screen('[info] Playlist description is already present')
1178 elif ie_result
.get('description') is None:
1179 self
.report_warning('There\'s no playlist description to write.')
1182 self
.to_screen('[info] Writing playlist description to: ' + descfn
)
1183 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1184 descfile
.write(ie_result
['description'])
1185 except (OSError, IOError):
1186 self
.report_error('Cannot write playlist description file ' + descfn
)
1189 playlist_results
= []
1191 playliststart
= self
.params
.get('playliststart', 1) - 1
1192 playlistend
= self
.params
.get('playlistend')
1193 # For backwards compatibility, interpret -1 as whole list
1194 if playlistend
== -1:
1197 playlistitems_str
= self
.params
.get('playlist_items')
1198 playlistitems
= None
1199 if playlistitems_str
is not None:
1200 def iter_playlistitems(format
):
1201 for string_segment
in format
.split(','):
1202 if '-' in string_segment
:
1203 start
, end
= string_segment
.split('-')
1204 for item
in range(int(start
), int(end
) + 1):
1207 yield int(string_segment
)
1208 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1210 ie_entries
= ie_result
['entries']
1212 def make_playlistitems_entries(list_ie_entries
):
1213 num_entries
= len(list_ie_entries
)
1215 list_ie_entries
[i
- 1] for i
in playlistitems
1216 if -num_entries
<= i
- 1 < num_entries
]
1218 def report_download(num_entries
):
1220 '[%s] playlist %s: Downloading %d videos' %
1221 (ie_result
['extractor'], playlist
, num_entries
))
1223 if isinstance(ie_entries
, list):
1224 n_all_entries
= len(ie_entries
)
1226 entries
= make_playlistitems_entries(ie_entries
)
1228 entries
= ie_entries
[playliststart
:playlistend
]
1229 n_entries
= len(entries
)
1231 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1232 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
1233 elif isinstance(ie_entries
, PagedList
):
1236 for item
in playlistitems
:
1237 entries
.extend(ie_entries
.getslice(
1241 entries
= ie_entries
.getslice(
1242 playliststart
, playlistend
)
1243 n_entries
= len(entries
)
1244 report_download(n_entries
)
1247 entries
= make_playlistitems_entries(list(itertools
.islice(
1248 ie_entries
, 0, max(playlistitems
))))
1250 entries
= list(itertools
.islice(
1251 ie_entries
, playliststart
, playlistend
))
1252 n_entries
= len(entries
)
1253 report_download(n_entries
)
1255 if self
.params
.get('playlistreverse', False):
1256 entries
= entries
[::-1]
1258 if self
.params
.get('playlistrandom', False):
1259 random
.shuffle(entries
)
1261 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1263 for i
, entry
in enumerate(entries
, 1):
1264 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1265 # This __x_forwarded_for_ip thing is a bit ugly but requires
1268 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1270 'n_entries': n_entries
,
1271 'playlist': playlist
,
1272 'playlist_id': ie_result
.get('id'),
1273 'playlist_title': ie_result
.get('title'),
1274 'playlist_uploader': ie_result
.get('uploader'),
1275 'playlist_uploader_id': ie_result
.get('uploader_id'),
1276 'playlist_index': playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
,
1277 'extractor': ie_result
['extractor'],
1278 'webpage_url': ie_result
['webpage_url'],
1279 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1280 'extractor_key': ie_result
['extractor_key'],
1283 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1286 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1287 # TODO: skip failed (empty) entries?
1288 playlist_results
.append(entry_result
)
1289 ie_result
['entries'] = playlist_results
1290 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1293 @__handle_extraction_exceptions
1294 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1295 return self
.process_ie_result(
1296 entry
, download
=download
, extra_info
=extra_info
)
1298 def _build_format_filter(self
, filter_spec
):
1299 " Returns a function to filter the formats according to the filter_spec "
1309 operator_rex
= re
.compile(r
'''(?x)\s*
1310 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1311 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1312 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1314 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1315 m
= operator_rex
.search(filter_spec
)
1318 comparison_value
= int(m
.group('value'))
1320 comparison_value
= parse_filesize(m
.group('value'))
1321 if comparison_value
is None:
1322 comparison_value
= parse_filesize(m
.group('value') + 'B')
1323 if comparison_value
is None:
1325 'Invalid value %r in format specification %r' % (
1326 m
.group('value'), filter_spec
))
1327 op
= OPERATORS
[m
.group('op')]
1332 '^=': lambda attr
, value
: attr
.startswith(value
),
1333 '$=': lambda attr
, value
: attr
.endswith(value
),
1334 '*=': lambda attr
, value
: value
in attr
,
1336 str_operator_rex
= re
.compile(r
'''(?x)
1337 \s*(?P<key>[a-zA-Z0-9._-]+)
1338 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1339 \s*(?P<value>[a-zA-Z0-9._-]+)
1341 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1342 m
= str_operator_rex
.search(filter_spec
)
1344 comparison_value
= m
.group('value')
1345 str_op
= STR_OPERATORS
[m
.group('op')]
1346 if m
.group('negation'):
1347 op
= lambda attr
, value
: not str_op(attr
, value
)
1352 raise ValueError('Invalid filter specification %r' % filter_spec
)
1355 actual_value
= f
.get(m
.group('key'))
1356 if actual_value
is None:
1357 return m
.group('none_inclusive')
1358 return op(actual_value
, comparison_value
)
1361 def _default_format_spec(self
, info_dict
, download
=True):
1364 merger
= FFmpegMergerPP(self
)
1365 return merger
.available
and merger
.can_merge()
1368 not self
.params
.get('simulate', False)
1372 or info_dict
.get('is_live', False)
1373 or self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) == '-'))
1376 'best/bestvideo+bestaudio'
1378 else 'bestvideo*+bestaudio/best'
1379 if not self
.params
.get('allow_multiple_audio_streams', False)
1380 else 'bestvideo+bestaudio/best')
1382 def build_format_selector(self
, format_spec
):
1383 def syntax_error(note
, start
):
1385 'Invalid format specification: '
1386 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1387 return SyntaxError(message
)
1389 PICKFIRST
= 'PICKFIRST'
1393 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1395 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1396 'video': self
.params
.get('allow_multiple_video_streams', False)}
1398 def _parse_filter(tokens
):
1400 for type, string
, start
, _
, _
in tokens
:
1401 if type == tokenize
.OP
and string
== ']':
1402 return ''.join(filter_parts
)
1404 filter_parts
.append(string
)
1406 def _remove_unused_ops(tokens
):
1407 # Remove operators that we don't use and join them with the surrounding strings
1408 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1409 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1410 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1411 for type, string
, start
, end
, line
in tokens
:
1412 if type == tokenize
.OP
and string
== '[':
1414 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1416 yield type, string
, start
, end
, line
1417 # everything inside brackets will be handled by _parse_filter
1418 for type, string
, start
, end
, line
in tokens
:
1419 yield type, string
, start
, end
, line
1420 if type == tokenize
.OP
and string
== ']':
1422 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1424 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1426 yield type, string
, start
, end
, line
1427 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1429 last_string
= string
1433 last_string
+= string
1435 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1437 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1439 current_selector
= None
1440 for type, string
, start
, _
, _
in tokens
:
1441 # ENCODING is only defined in python 3.x
1442 if type == getattr(tokenize
, 'ENCODING', None):
1444 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1445 current_selector
= FormatSelector(SINGLE
, string
, [])
1446 elif type == tokenize
.OP
:
1448 if not inside_group
:
1449 # ')' will be handled by the parentheses group
1450 tokens
.restore_last_token()
1452 elif inside_merge
and string
in ['/', ',']:
1453 tokens
.restore_last_token()
1455 elif inside_choice
and string
== ',':
1456 tokens
.restore_last_token()
1459 if not current_selector
:
1460 raise syntax_error('"," must follow a format selector', start
)
1461 selectors
.append(current_selector
)
1462 current_selector
= None
1464 if not current_selector
:
1465 raise syntax_error('"/" must follow a format selector', start
)
1466 first_choice
= current_selector
1467 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1468 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1470 if not current_selector
:
1471 current_selector
= FormatSelector(SINGLE
, 'best', [])
1472 format_filter
= _parse_filter(tokens
)
1473 current_selector
.filters
.append(format_filter
)
1475 if current_selector
:
1476 raise syntax_error('Unexpected "("', start
)
1477 group
= _parse_format_selection(tokens
, inside_group
=True)
1478 current_selector
= FormatSelector(GROUP
, group
, [])
1480 if not current_selector
:
1481 raise syntax_error('Unexpected "+"', start
)
1482 selector_1
= current_selector
1483 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1485 raise syntax_error('Expected a selector', start
)
1486 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1488 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1489 elif type == tokenize
.ENDMARKER
:
1491 if current_selector
:
1492 selectors
.append(current_selector
)
1495 def _build_selector_function(selector
):
1496 if isinstance(selector
, list): # ,
1497 fs
= [_build_selector_function(s
) for s
in selector
]
1499 def selector_function(ctx
):
1501 for format
in f(ctx
):
1503 return selector_function
1505 elif selector
.type == GROUP
: # ()
1506 selector_function
= _build_selector_function(selector
.selector
)
1508 elif selector
.type == PICKFIRST
: # /
1509 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1511 def selector_function(ctx
):
1513 picked_formats
= list(f(ctx
))
1515 return picked_formats
1518 elif selector
.type == SINGLE
: # atom
1519 format_spec
= selector
.selector
if selector
.selector
is not None else 'best'
1521 if format_spec
== 'all':
1522 def selector_function(ctx
):
1523 formats
= list(ctx
['formats'])
1529 format_fallback
= False
1530 format_spec_obj
= re
.match(r
'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec
)
1531 if format_spec_obj
is not None:
1532 format_idx
= 0 if format_spec_obj
.group(1)[0] == 'w' else -1
1533 format_type
= format_spec_obj
.group(2)[0] if format_spec_obj
.group(2) else False
1534 not_format_type
= 'v' if format_type
== 'a' else 'a'
1535 format_modified
= format_spec_obj
.group(3) is not None
1537 format_fallback
= not format_type
and not format_modified
# for b, w
1538 filter_f
= ((lambda f
: f
.get(format_type
+ 'codec') != 'none')
1539 if format_type
and format_modified
# bv*, ba*, wv*, wa*
1540 else (lambda f
: f
.get(not_format_type
+ 'codec') == 'none')
1541 if format_type
# bv, ba, wv, wa
1542 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
1543 if not format_modified
# b, w
1547 filter_f
= ((lambda f
: f
.get('ext') == format_spec
)
1548 if format_spec
in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1549 else (lambda f
: f
.get('format_id') == format_spec
)) # id
1551 def selector_function(ctx
):
1552 formats
= list(ctx
['formats'])
1555 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
1557 yield matches
[format_idx
]
1558 elif format_fallback
== 'force' or (format_fallback
and ctx
['incomplete_formats']):
1559 # for extractors with incomplete formats (audio only (soundcloud)
1560 # or video only (imgur)) best/worst will fallback to
1561 # best/worst {video,audio}-only format
1562 yield formats
[format_idx
]
1564 elif selector
.type == MERGE
: # +
1565 def _merge(formats_pair
):
1566 format_1
, format_2
= formats_pair
1569 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1570 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1572 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
1573 get_no_more
= {"video": False, "audio": False}
1574 for (i
, fmt_info
) in enumerate(formats_info
):
1575 for aud_vid
in ["audio", "video"]:
1576 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
1577 if get_no_more
[aud_vid
]:
1579 get_no_more
[aud_vid
] = True
1581 if len(formats_info
) == 1:
1582 return formats_info
[0]
1584 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1585 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1587 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1588 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1590 output_ext
= self
.params
.get('merge_output_format')
1593 output_ext
= the_only_video
['ext']
1594 elif the_only_audio
and not video_fmts
:
1595 output_ext
= the_only_audio
['ext']
1600 'requested_formats': formats_info
,
1601 'format': '+'.join(fmt_info
.get('format') for fmt_info
in formats_info
),
1602 'format_id': '+'.join(fmt_info
.get('format_id') for fmt_info
in formats_info
),
1608 'width': the_only_video
.get('width'),
1609 'height': the_only_video
.get('height'),
1610 'resolution': the_only_video
.get('resolution'),
1611 'fps': the_only_video
.get('fps'),
1612 'vcodec': the_only_video
.get('vcodec'),
1613 'vbr': the_only_video
.get('vbr'),
1614 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1619 'acodec': the_only_audio
.get('acodec'),
1620 'abr': the_only_audio
.get('abr'),
1625 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1627 def selector_function(ctx
):
1628 for pair
in itertools
.product(
1629 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1632 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1634 def final_selector(ctx
):
1635 ctx_copy
= copy
.deepcopy(ctx
)
1636 for _filter
in filters
:
1637 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
1638 return selector_function(ctx_copy
)
1639 return final_selector
1641 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1643 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1644 except tokenize
.TokenError
:
1645 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1647 class TokenIterator(object):
1648 def __init__(self
, tokens
):
1649 self
.tokens
= tokens
1656 if self
.counter
>= len(self
.tokens
):
1657 raise StopIteration()
1658 value
= self
.tokens
[self
.counter
]
1664 def restore_last_token(self
):
1667 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1668 return _build_selector_function(parsed_selector
)
1670 def _calc_headers(self
, info_dict
):
1671 res
= std_headers
.copy()
1673 add_headers
= info_dict
.get('http_headers')
1675 res
.update(add_headers
)
1677 cookies
= self
._calc
_cookies
(info_dict
)
1679 res
['Cookie'] = cookies
1681 if 'X-Forwarded-For' not in res
:
1682 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
1683 if x_forwarded_for_ip
:
1684 res
['X-Forwarded-For'] = x_forwarded_for_ip
1688 def _calc_cookies(self
, info_dict
):
1689 pr
= sanitized_Request(info_dict
['url'])
1690 self
.cookiejar
.add_cookie_header(pr
)
1691 return pr
.get_header('Cookie')
1693 def process_video_result(self
, info_dict
, download
=True):
1694 assert info_dict
.get('_type', 'video') == 'video'
1696 if 'id' not in info_dict
:
1697 raise ExtractorError('Missing "id" field in extractor result')
1698 if 'title' not in info_dict
:
1699 raise ExtractorError('Missing "title" field in extractor result')
1701 def report_force_conversion(field
, field_not
, conversion
):
1702 self
.report_warning(
1703 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1704 % (field
, field_not
, conversion
))
1706 def sanitize_string_field(info
, string_field
):
1707 field
= info
.get(string_field
)
1708 if field
is None or isinstance(field
, compat_str
):
1710 report_force_conversion(string_field
, 'a string', 'string')
1711 info
[string_field
] = compat_str(field
)
1713 def sanitize_numeric_fields(info
):
1714 for numeric_field
in self
._NUMERIC
_FIELDS
:
1715 field
= info
.get(numeric_field
)
1716 if field
is None or isinstance(field
, compat_numeric_types
):
1718 report_force_conversion(numeric_field
, 'numeric', 'int')
1719 info
[numeric_field
] = int_or_none(field
)
1721 sanitize_string_field(info_dict
, 'id')
1722 sanitize_numeric_fields(info_dict
)
1724 if 'playlist' not in info_dict
:
1725 # It isn't part of a playlist
1726 info_dict
['playlist'] = None
1727 info_dict
['playlist_index'] = None
1729 thumbnails
= info_dict
.get('thumbnails')
1730 if thumbnails
is None:
1731 thumbnail
= info_dict
.get('thumbnail')
1733 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
1735 thumbnails
.sort(key
=lambda t
: (
1736 t
.get('preference') if t
.get('preference') is not None else -1,
1737 t
.get('width') if t
.get('width') is not None else -1,
1738 t
.get('height') if t
.get('height') is not None else -1,
1739 t
.get('id') if t
.get('id') is not None else '', t
.get('url')))
1740 for i
, t
in enumerate(thumbnails
):
1741 t
['url'] = sanitize_url(t
['url'])
1742 if t
.get('width') and t
.get('height'):
1743 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
1744 if t
.get('id') is None:
1747 if self
.params
.get('list_thumbnails'):
1748 self
.list_thumbnails(info_dict
)
1751 thumbnail
= info_dict
.get('thumbnail')
1753 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
1755 info_dict
['thumbnail'] = thumbnails
[-1]['url']
1757 if 'display_id' not in info_dict
and 'id' in info_dict
:
1758 info_dict
['display_id'] = info_dict
['id']
1760 if info_dict
.get('upload_date') is None and info_dict
.get('timestamp') is not None:
1761 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1762 # see http://bugs.python.org/issue1646728)
1764 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
['timestamp'])
1765 info_dict
['upload_date'] = upload_date
.strftime('%Y%m%d')
1766 except (ValueError, OverflowError, OSError):
1769 # Auto generate title fields corresponding to the *_number fields when missing
1770 # in order to always have clean titles. This is very common for TV series.
1771 for field
in ('chapter', 'season', 'episode'):
1772 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
1773 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
1775 for cc_kind
in ('subtitles', 'automatic_captions'):
1776 cc
= info_dict
.get(cc_kind
)
1778 for _
, subtitle
in cc
.items():
1779 for subtitle_format
in subtitle
:
1780 if subtitle_format
.get('url'):
1781 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
1782 if subtitle_format
.get('ext') is None:
1783 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
1785 automatic_captions
= info_dict
.get('automatic_captions')
1786 subtitles
= info_dict
.get('subtitles')
1788 if self
.params
.get('listsubtitles', False):
1789 if 'automatic_captions' in info_dict
:
1790 self
.list_subtitles(
1791 info_dict
['id'], automatic_captions
, 'automatic captions')
1792 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
1795 info_dict
['requested_subtitles'] = self
.process_subtitles(
1796 info_dict
['id'], subtitles
, automatic_captions
)
1798 # We now pick which formats have to be downloaded
1799 if info_dict
.get('formats') is None:
1800 # There's only one format available
1801 formats
= [info_dict
]
1803 formats
= info_dict
['formats']
1806 raise ExtractorError('No video formats found!')
1808 def is_wellformed(f
):
1811 self
.report_warning(
1812 '"url" field is missing or empty - skipping format, '
1813 'there is an error in extractor')
1815 if isinstance(url
, bytes):
1816 sanitize_string_field(f
, 'url')
1819 # Filter out malformed formats for better extraction robustness
1820 formats
= list(filter(is_wellformed
, formats
))
1824 # We check that all the formats have the format and format_id fields
1825 for i
, format
in enumerate(formats
):
1826 sanitize_string_field(format
, 'format_id')
1827 sanitize_numeric_fields(format
)
1828 format
['url'] = sanitize_url(format
['url'])
1829 if not format
.get('format_id'):
1830 format
['format_id'] = compat_str(i
)
1832 # Sanitize format_id from characters used in format selector expression
1833 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
1834 format_id
= format
['format_id']
1835 if format_id
not in formats_dict
:
1836 formats_dict
[format_id
] = []
1837 formats_dict
[format_id
].append(format
)
1839 # Make sure all formats have unique format_id
1840 for format_id
, ambiguous_formats
in formats_dict
.items():
1841 if len(ambiguous_formats
) > 1:
1842 for i
, format
in enumerate(ambiguous_formats
):
1843 format
['format_id'] = '%s-%d' % (format_id
, i
)
1845 for i
, format
in enumerate(formats
):
1846 if format
.get('format') is None:
1847 format
['format'] = '{id} - {res}{note}'.format(
1848 id=format
['format_id'],
1849 res
=self
.format_resolution(format
),
1850 note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
1852 # Automatically determine file extension if missing
1853 if format
.get('ext') is None:
1854 format
['ext'] = determine_ext(format
['url']).lower()
1855 # Automatically determine protocol if missing (useful for format
1856 # selection purposes)
1857 if format
.get('protocol') is None:
1858 format
['protocol'] = determine_protocol(format
)
1859 # Add HTTP headers, so that external programs can use them from the
1861 full_format_info
= info_dict
.copy()
1862 full_format_info
.update(format
)
1863 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
1864 # Remove private housekeeping stuff
1865 if '__x_forwarded_for_ip' in info_dict
:
1866 del info_dict
['__x_forwarded_for_ip']
1868 # TODO Central sorting goes here
1870 if formats
[0] is not info_dict
:
1871 # only set the 'formats' fields if the original info_dict list them
1872 # otherwise we end up with a circular reference, the first (and unique)
1873 # element in the 'formats' field in info_dict is info_dict itself,
1874 # which can't be exported to json
1875 info_dict
['formats'] = formats
1876 if self
.params
.get('listformats'):
1877 self
.list_formats(info_dict
)
1880 req_format
= self
.params
.get('format')
1881 if req_format
is None:
1882 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
1883 if self
.params
.get('verbose'):
1884 self
.to_screen('[debug] Default format spec: %s' % req_format
)
1886 format_selector
= self
.build_format_selector(req_format
)
1888 # While in format selection we may need to have an access to the original
1889 # format set in order to calculate some metrics or do some processing.
1890 # For now we need to be able to guess whether original formats provided
1891 # by extractor are incomplete or not (i.e. whether extractor provides only
1892 # video-only or audio-only formats) for proper formats selection for
1893 # extractors with such incomplete formats (see
1894 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1895 # Since formats may be filtered during format selection and may not match
1896 # the original formats the results may be incorrect. Thus original formats
1897 # or pre-calculated metrics should be passed to format selection routines
1899 # We will pass a context object containing all necessary additional data
1900 # instead of just formats.
1901 # This fixes incorrect format selection issue (see
1902 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1903 incomplete_formats
= (
1904 # All formats are video-only or
1905 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
1906 # all formats are audio-only
1907 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
1911 'incomplete_formats': incomplete_formats
,
1914 formats_to_download
= list(format_selector(ctx
))
1915 if not formats_to_download
:
1916 raise ExtractorError('requested format not available',
1920 self
.to_screen('[info] Downloading format(s) %s' % ", ".join([f
['format_id'] for f
in formats_to_download
]))
1921 if len(formats_to_download
) > 1:
1922 self
.to_screen('[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
)))
1923 for format
in formats_to_download
:
1924 new_info
= dict(info_dict
)
1925 new_info
.update(format
)
1926 self
.process_info(new_info
)
1927 # We update the info dict with the best quality format (backwards compatibility)
1928 info_dict
.update(formats_to_download
[-1])
1931 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
1932 """Select the requested subtitles and their format"""
1934 if normal_subtitles
and self
.params
.get('writesubtitles'):
1935 available_subs
.update(normal_subtitles
)
1936 if automatic_captions
and self
.params
.get('writeautomaticsub'):
1937 for lang
, cap_info
in automatic_captions
.items():
1938 if lang
not in available_subs
:
1939 available_subs
[lang
] = cap_info
1941 if (not self
.params
.get('writesubtitles') and not
1942 self
.params
.get('writeautomaticsub') or not
1946 if self
.params
.get('allsubtitles', False):
1947 requested_langs
= available_subs
.keys()
1949 if self
.params
.get('subtitleslangs', False):
1950 requested_langs
= self
.params
.get('subtitleslangs')
1951 elif 'en' in available_subs
:
1952 requested_langs
= ['en']
1954 requested_langs
= [list(available_subs
.keys())[0]]
1956 formats_query
= self
.params
.get('subtitlesformat', 'best')
1957 formats_preference
= formats_query
.split('/') if formats_query
else []
1959 for lang
in requested_langs
:
1960 formats
= available_subs
.get(lang
)
1962 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
1964 for ext
in formats_preference
:
1968 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
1974 self
.report_warning(
1975 'No subtitle format found matching "%s" for language %s, '
1976 'using %s' % (formats_query
, lang
, f
['ext']))
1980 def __forced_printings(self
, info_dict
, filename
, incomplete
):
1981 def print_mandatory(field
):
1982 if (self
.params
.get('force%s' % field
, False)
1983 and (not incomplete
or info_dict
.get(field
) is not None)):
1984 self
.to_stdout(info_dict
[field
])
1986 def print_optional(field
):
1987 if (self
.params
.get('force%s' % field
, False)
1988 and info_dict
.get(field
) is not None):
1989 self
.to_stdout(info_dict
[field
])
1991 print_mandatory('title')
1992 print_mandatory('id')
1993 if self
.params
.get('forceurl', False) and not incomplete
:
1994 if info_dict
.get('requested_formats') is not None:
1995 for f
in info_dict
['requested_formats']:
1996 self
.to_stdout(f
['url'] + f
.get('play_path', ''))
1998 # For RTMP URLs, also include the playpath
1999 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', ''))
2000 print_optional('thumbnail')
2001 print_optional('description')
2002 if self
.params
.get('forcefilename', False) and filename
is not None:
2003 self
.to_stdout(filename
)
2004 if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None:
2005 self
.to_stdout(formatSeconds(info_dict
['duration']))
2006 print_mandatory('format')
2007 if self
.params
.get('forcejson', False):
2008 self
.to_stdout(json
.dumps(info_dict
))
2010 def process_info(self
, info_dict
):
2011 """Process a single resolved IE result."""
2013 assert info_dict
.get('_type', 'video') == 'video'
2015 info_dict
.setdefault('__postprocessors', [])
2017 max_downloads
= self
.params
.get('max_downloads')
2018 if max_downloads
is not None:
2019 if self
._num
_downloads
>= int(max_downloads
):
2020 raise MaxDownloadsReached()
2022 # TODO: backward compatibility, to be removed
2023 info_dict
['fulltitle'] = info_dict
['title']
2025 if 'format' not in info_dict
:
2026 info_dict
['format'] = info_dict
['ext']
2028 if self
._match
_entry
(info_dict
, incomplete
=False) is not None:
2031 self
._num
_downloads
+= 1
2033 info_dict
= self
.pre_process(info_dict
)
2035 filename
= self
.prepare_filename(info_dict
, warn
=True)
2036 info_dict
['_filename'] = full_filename
= self
.prepare_filepath(filename
)
2037 temp_filename
= self
.prepare_filepath(filename
, 'temp')
2041 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=False)
2043 if self
.params
.get('simulate', False):
2044 if self
.params
.get('force_write_download_archive', False):
2045 self
.record_download_archive(info_dict
)
2047 # Do nothing else if in simulate mode
2050 if filename
is None:
2053 def ensure_dir_exists(path
):
2054 return make_dir(path
, self
.report_error
)
2056 if not ensure_dir_exists(encodeFilename(full_filename
)):
2058 if not ensure_dir_exists(encodeFilename(temp_filename
)):
2061 if self
.params
.get('writedescription', False):
2062 descfn
= replace_extension(
2063 self
.prepare_filepath(filename
, 'description'),
2064 'description', info_dict
.get('ext'))
2065 if not ensure_dir_exists(encodeFilename(descfn
)):
2067 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
2068 self
.to_screen('[info] Video description is already present')
2069 elif info_dict
.get('description') is None:
2070 self
.report_warning('There\'s no description to write.')
2073 self
.to_screen('[info] Writing video description to: ' + descfn
)
2074 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
2075 descfile
.write(info_dict
['description'])
2076 except (OSError, IOError):
2077 self
.report_error('Cannot write description file ' + descfn
)
2080 if self
.params
.get('writeannotations', False):
2081 annofn
= replace_extension(
2082 self
.prepare_filepath(filename
, 'annotation'),
2083 'annotations.xml', info_dict
.get('ext'))
2084 if not ensure_dir_exists(encodeFilename(annofn
)):
2086 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2087 self
.to_screen('[info] Video annotations are already present')
2088 elif not info_dict
.get('annotations'):
2089 self
.report_warning('There are no annotations to write.')
2092 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2093 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2094 annofile
.write(info_dict
['annotations'])
2095 except (KeyError, TypeError):
2096 self
.report_warning('There are no annotations to write.')
2097 except (OSError, IOError):
2098 self
.report_error('Cannot write annotations file: ' + annofn
)
2101 def dl(name
, info
, subtitle
=False):
2102 fd
= get_suitable_downloader(info
, self
.params
)(self
, self
.params
)
2103 for ph
in self
._progress
_hooks
:
2104 fd
.add_progress_hook(ph
)
2105 if self
.params
.get('verbose'):
2106 self
.to_screen('[debug] Invoking downloader on %r' % info
.get('url'))
2107 return fd
.download(name
, info
, subtitle
)
2109 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
2110 self
.params
.get('writeautomaticsub')])
2112 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
2113 # subtitles download errors are already managed as troubles in relevant IE
2114 # that way it will silently go on when used with unsupporting IE
2115 subtitles
= info_dict
['requested_subtitles']
2116 # ie = self.get_info_extractor(info_dict['extractor_key'])
2117 for sub_lang
, sub_info
in subtitles
.items():
2118 sub_format
= sub_info
['ext']
2119 sub_filename
= subtitles_filename(temp_filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
2120 sub_filename_final
= subtitles_filename(
2121 self
.prepare_filepath(filename
, 'subtitle'),
2122 sub_lang
, sub_format
, info_dict
.get('ext'))
2123 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(sub_filename
)):
2124 self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
))
2125 files_to_move
[sub_filename
] = sub_filename_final
2127 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
2128 if sub_info
.get('data') is not None:
2130 # Use newline='' to prevent conversion of newline characters
2131 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2132 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
:
2133 subfile
.write(sub_info
['data'])
2134 files_to_move
[sub_filename
] = sub_filename_final
2135 except (OSError, IOError):
2136 self
.report_error('Cannot write subtitles file ' + sub_filename
)
2140 dl(sub_filename
, sub_info
, subtitle
=True)
2142 if self.params.get('sleep_interval_subtitles', False):
2143 dl(sub_filename, sub_info)
2145 sub_data = ie._request_webpage(
2146 sub_info['url'], info_dict['id'], note=False).read()
2147 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2148 subfile.write(sub_data)
2150 files_to_move
[sub_filename
] = sub_filename_final
2151 except (ExtractorError
, IOError, OSError, ValueError, compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2152 self
.report_warning('Unable to download subtitle for "%s": %s' %
2153 (sub_lang
, error_to_compat_str(err
)))
2156 if self
.params
.get('skip_download', False):
2157 if self
.params
.get('convertsubtitles', False):
2158 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2159 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2161 os
.path
.splitext(full_filename
)[0]
2162 if filename_real_ext
== info_dict
['ext']
2164 afilename
= '%s.%s' % (filename_wo_ext
, self
.params
.get('convertsubtitles'))
2165 # if subconv.available:
2166 # info_dict['__postprocessors'].append(subconv)
2167 if os
.path
.exists(encodeFilename(afilename
)):
2169 '[download] %s has already been downloaded and '
2170 'converted' % afilename
)
2173 self
.post_process(full_filename
, info_dict
, files_to_move
)
2174 except PostProcessingError
as err
:
2175 self
.report_error('Postprocessing: %s' % str(err
))
2178 if self
.params
.get('writeinfojson', False):
2179 infofn
= replace_extension(
2180 self
.prepare_filepath(filename
, 'infojson'),
2181 'info.json', info_dict
.get('ext'))
2182 if not ensure_dir_exists(encodeFilename(infofn
)):
2184 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
2185 self
.to_screen('[info] Video metadata is already present')
2187 self
.to_screen('[info] Writing video metadata as JSON to: ' + infofn
)
2189 write_json_file(self
.filter_requested_info(info_dict
), infofn
)
2190 except (OSError, IOError):
2191 self
.report_error('Cannot write video metadata to JSON file ' + infofn
)
2193 info_dict
['__infojson_filepath'] = infofn
2195 thumbdir
= os
.path
.dirname(self
.prepare_filepath(filename
, 'thumbnail'))
2196 for thumbfn
in self
._write
_thumbnails
(info_dict
, temp_filename
):
2197 files_to_move
[thumbfn
] = os
.path
.join(thumbdir
, os
.path
.basename(thumbfn
))
2199 # Write internet shortcut files
2200 url_link
= webloc_link
= desktop_link
= False
2201 if self
.params
.get('writelink', False):
2202 if sys
.platform
== "darwin": # macOS.
2204 elif sys
.platform
.startswith("linux"):
2206 else: # if sys.platform in ['win32', 'cygwin']:
2208 if self
.params
.get('writeurllink', False):
2210 if self
.params
.get('writewebloclink', False):
2212 if self
.params
.get('writedesktoplink', False):
2215 if url_link
or webloc_link
or desktop_link
:
2216 if 'webpage_url' not in info_dict
:
2217 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2219 ascii_url
= iri_to_uri(info_dict
['webpage_url'])
2221 def _write_link_file(extension
, template
, newline
, embed_filename
):
2222 linkfn
= replace_extension(full_filename
, extension
, info_dict
.get('ext'))
2223 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2224 self
.to_screen('[info] Internet shortcut is already present')
2227 self
.to_screen('[info] Writing internet shortcut to: ' + linkfn
)
2228 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8', newline
=newline
) as linkfile
:
2229 template_vars
= {'url': ascii_url}
2231 template_vars
['filename'] = linkfn
[:-(len(extension
) + 1)]
2232 linkfile
.write(template
% template_vars
)
2233 except (OSError, IOError):
2234 self
.report_error('Cannot write internet shortcut ' + linkfn
)
2239 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE
, '\r\n', embed_filename
=False):
2242 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE
, '\n', embed_filename
=False):
2245 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE
, '\n', embed_filename
=True):
2249 must_record_download_archive
= False
2250 if not self
.params
.get('skip_download', False):
2253 def existing_file(*filepaths
):
2254 ext
= info_dict
.get('ext')
2255 final_ext
= self
.params
.get('final_ext', ext
)
2257 for file in orderedSet(filepaths
):
2258 if final_ext
!= ext
:
2259 converted
= replace_extension(file, final_ext
, ext
)
2260 if os
.path
.exists(encodeFilename(converted
)):
2261 existing_files
.append(converted
)
2262 if os
.path
.exists(encodeFilename(file)):
2263 existing_files
.append(file)
2265 if not existing_files
or self
.params
.get('overwrites', False):
2266 for file in orderedSet(existing_files
):
2267 self
.report_file_delete(file)
2268 os
.remove(encodeFilename(file))
2271 self
.report_file_already_downloaded(existing_files
[0])
2272 info_dict
['ext'] = os
.path
.splitext(existing_files
[0])[1][1:]
2273 return existing_files
[0]
2276 if info_dict
.get('requested_formats') is not None:
2278 merger
= FFmpegMergerPP(self
)
2279 if not merger
.available
:
2281 self
.report_warning('You have requested multiple '
2282 'formats but ffmpeg is not installed.'
2283 ' The formats won\'t be merged.')
2285 postprocessors
= [merger
]
2287 def compatible_formats(formats
):
2288 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2289 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2290 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2291 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2295 exts
= set(format
.get('ext') for format
in formats
)
2297 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2300 for ext_sets
in COMPATIBLE_EXTS
:
2301 if ext_sets
.issuperset(exts
):
2303 # TODO: Check acodec/vcodec
2306 requested_formats
= info_dict
['requested_formats']
2307 old_ext
= info_dict
['ext']
2308 if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
):
2309 info_dict
['ext'] = 'mkv'
2310 self
.report_warning(
2311 'Requested formats are incompatible for merge and will be merged into mkv.')
2313 def correct_ext(filename
):
2314 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2316 os
.path
.splitext(filename
)[0]
2317 if filename_real_ext
== old_ext
2319 return '%s.%s' % (filename_wo_ext
, info_dict
['ext'])
2321 # Ensure filename always has a correct extension for successful merge
2322 full_filename
= correct_ext(full_filename
)
2323 temp_filename
= correct_ext(temp_filename
)
2324 dl_filename
= existing_file(full_filename
, temp_filename
)
2325 if dl_filename
is None:
2326 for f
in requested_formats
:
2327 new_info
= dict(info_dict
)
2329 fname
= prepend_extension(
2330 self
.prepare_filepath(self
.prepare_filename(new_info
), 'temp'),
2331 'f%s' % f
['format_id'], new_info
['ext'])
2332 if not ensure_dir_exists(fname
):
2334 downloaded
.append(fname
)
2335 partial_success
, real_download
= dl(fname
, new_info
)
2336 success
= success
and partial_success
2337 info_dict
['__postprocessors'] = postprocessors
2338 info_dict
['__files_to_merge'] = downloaded
2339 # Even if there were no downloads, it is being merged only now
2340 info_dict
['__real_download'] = True
2342 # Just a single file
2343 dl_filename
= existing_file(full_filename
, temp_filename
)
2344 if dl_filename
is None:
2345 success
, real_download
= dl(temp_filename
, info_dict
)
2346 info_dict
['__real_download'] = real_download
2348 dl_filename
= dl_filename
or temp_filename
2349 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2351 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2352 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2354 except (OSError, IOError) as err
:
2355 raise UnavailableVideoError(err
)
2356 except (ContentTooShortError
, ) as err
:
2357 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2360 if success
and filename
!= '-':
2362 fixup_policy
= self
.params
.get('fixup')
2363 if fixup_policy
is None:
2364 fixup_policy
= 'detect_or_warn'
2366 INSTALL_FFMPEG_MESSAGE
= 'Install ffmpeg to fix this automatically.'
2368 stretched_ratio
= info_dict
.get('stretched_ratio')
2369 if stretched_ratio
is not None and stretched_ratio
!= 1:
2370 if fixup_policy
== 'warn':
2371 self
.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2372 info_dict
['id'], stretched_ratio
))
2373 elif fixup_policy
== 'detect_or_warn':
2374 stretched_pp
= FFmpegFixupStretchedPP(self
)
2375 if stretched_pp
.available
:
2376 info_dict
['__postprocessors'].append(stretched_pp
)
2378 self
.report_warning(
2379 '%s: Non-uniform pixel ratio (%s). %s'
2380 % (info_dict
['id'], stretched_ratio
, INSTALL_FFMPEG_MESSAGE
))
2382 assert fixup_policy
in ('ignore', 'never')
2384 if (info_dict
.get('requested_formats') is None
2385 and info_dict
.get('container') == 'm4a_dash'
2386 and info_dict
.get('ext') == 'm4a'):
2387 if fixup_policy
== 'warn':
2388 self
.report_warning(
2389 '%s: writing DASH m4a. '
2390 'Only some players support this container.'
2392 elif fixup_policy
== 'detect_or_warn':
2393 fixup_pp
= FFmpegFixupM4aPP(self
)
2394 if fixup_pp
.available
:
2395 info_dict
['__postprocessors'].append(fixup_pp
)
2397 self
.report_warning(
2398 '%s: writing DASH m4a. '
2399 'Only some players support this container. %s'
2400 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2402 assert fixup_policy
in ('ignore', 'never')
2404 if (info_dict
.get('protocol') == 'm3u8_native'
2405 or info_dict
.get('protocol') == 'm3u8'
2406 and self
.params
.get('hls_prefer_native')):
2407 if fixup_policy
== 'warn':
2408 self
.report_warning('%s: malformed AAC bitstream detected.' % (
2410 elif fixup_policy
== 'detect_or_warn':
2411 fixup_pp
= FFmpegFixupM3u8PP(self
)
2412 if fixup_pp
.available
:
2413 info_dict
['__postprocessors'].append(fixup_pp
)
2415 self
.report_warning(
2416 '%s: malformed AAC bitstream detected. %s'
2417 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2419 assert fixup_policy
in ('ignore', 'never')
2422 self
.post_process(dl_filename
, info_dict
, files_to_move
)
2423 except PostProcessingError
as err
:
2424 self
.report_error('Postprocessing: %s' % str(err
))
2427 for ph
in self
._post
_hooks
:
2429 except Exception as err
:
2430 self
.report_error('post hooks: %s' % str(err
))
2432 must_record_download_archive
= True
2434 if must_record_download_archive
or self
.params
.get('force_write_download_archive', False):
2435 self
.record_download_archive(info_dict
)
2436 max_downloads
= self
.params
.get('max_downloads')
2437 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
2438 raise MaxDownloadsReached()
2440 def download(self
, url_list
):
2441 """Download a given list of URLs."""
2442 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
2443 if (len(url_list
) > 1
2445 and '%' not in outtmpl
2446 and self
.params
.get('max_downloads') != 1):
2447 raise SameFileError(outtmpl
)
2449 for url
in url_list
:
2451 # It also downloads the videos
2452 res
= self
.extract_info(
2453 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2454 except UnavailableVideoError
:
2455 self
.report_error('unable to download video')
2456 except MaxDownloadsReached
:
2457 self
.to_screen('[info] Maximum number of downloaded files reached')
2459 except ExistingVideoReached
:
2460 self
.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2462 except RejectedVideoReached
:
2463 self
.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2466 if self
.params
.get('dump_single_json', False):
2467 self
.to_stdout(json
.dumps(res
))
2469 return self
._download
_retcode
2471 def download_with_info_file(self
, info_filename
):
2472 with contextlib
.closing(fileinput
.FileInput(
2473 [info_filename
], mode
='r',
2474 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2475 # FileInput doesn't have a read method, we can't call json.load
2476 info
= self
.filter_requested_info(json
.loads('\n'.join(f
)))
2478 self
.process_ie_result(info
, download
=True)
2479 except DownloadError
:
2480 webpage_url
= info
.get('webpage_url')
2481 if webpage_url
is not None:
2482 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2483 return self
.download([webpage_url
])
2486 return self
._download
_retcode
2489 def filter_requested_info(info_dict
):
2490 fields_to_remove
= ('requested_formats', 'requested_subtitles')
2492 (k
, v
) for k
, v
in info_dict
.items()
2493 if (k
[0] != '_' or k
== '_type') and k
not in fields_to_remove
)
2495 def run_pp(self
, pp
, infodict
, files_to_move
={}):
2496 files_to_delete
= []
2497 files_to_delete
, infodict
= pp
.run(infodict
)
2498 if not files_to_delete
:
2499 return files_to_move
, infodict
2501 if self
.params
.get('keepvideo', False):
2502 for f
in files_to_delete
:
2503 files_to_move
.setdefault(f
, '')
2505 for old_filename
in set(files_to_delete
):
2506 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
2508 os
.remove(encodeFilename(old_filename
))
2509 except (IOError, OSError):
2510 self
.report_warning('Unable to remove downloaded original file')
2511 if old_filename
in files_to_move
:
2512 del files_to_move
[old_filename
]
2513 return files_to_move
, infodict
2515 def pre_process(self
, ie_info
):
2516 info
= dict(ie_info
)
2517 for pp
in self
._pps
['beforedl']:
2518 info
= self
.run_pp(pp
, info
)[1]
2521 def post_process(self
, filename
, ie_info
, files_to_move
={}):
2522 """Run all the postprocessors on the given file."""
2523 info
= dict(ie_info
)
2524 info
['filepath'] = filename
2526 for pp
in ie_info
.get('__postprocessors', []) + self
._pps
['normal']:
2527 files_to_move
, info
= self
.run_pp(pp
, info
, files_to_move
)
2528 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
, files_to_move
), info
, files_to_move
)[1]
2529 for pp
in self
._pps
['aftermove']:
2530 files_to_move
, info
= self
.run_pp(pp
, info
, {})
2532 def _make_archive_id(self
, info_dict
):
2533 video_id
= info_dict
.get('id')
2536 # Future-proof against any change in case
2537 # and backwards compatibility with prior versions
2538 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
2539 if extractor
is None:
2540 url
= str_or_none(info_dict
.get('url'))
2543 # Try to find matching extractor for the URL and take its ie_key
2544 for ie
in self
._ies
:
2545 if ie
.suitable(url
):
2546 extractor
= ie
.ie_key()
2550 return '%s %s' % (extractor
.lower(), video_id
)
2552 def in_download_archive(self
, info_dict
):
2553 fn
= self
.params
.get('download_archive')
2557 vid_id
= self
._make
_archive
_id
(info_dict
)
2559 return False # Incomplete video information
2561 return vid_id
in self
.archive
2563 def record_download_archive(self
, info_dict
):
2564 fn
= self
.params
.get('download_archive')
2567 vid_id
= self
._make
_archive
_id
(info_dict
)
2569 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
2570 archive_file
.write(vid_id
+ '\n')
2571 self
.archive
.add(vid_id
)
2574 def format_resolution(format
, default
='unknown'):
2575 if format
.get('vcodec') == 'none':
2577 if format
.get('resolution') is not None:
2578 return format
['resolution']
2579 if format
.get('height') is not None:
2580 if format
.get('width') is not None:
2581 res
= '%sx%s' % (format
['width'], format
['height'])
2583 res
= '%sp' % format
['height']
2584 elif format
.get('width') is not None:
2585 res
= '%dx?' % format
['width']
2590 def _format_note(self
, fdict
):
2592 if fdict
.get('ext') in ['f4f', 'f4m']:
2593 res
+= '(unsupported) '
2594 if fdict
.get('language'):
2597 res
+= '[%s] ' % fdict
['language']
2598 if fdict
.get('format_note') is not None:
2599 res
+= fdict
['format_note'] + ' '
2600 if fdict
.get('tbr') is not None:
2601 res
+= '%4dk ' % fdict
['tbr']
2602 if fdict
.get('container') is not None:
2605 res
+= '%s container' % fdict
['container']
2606 if (fdict
.get('vcodec') is not None
2607 and fdict
.get('vcodec') != 'none'):
2610 res
+= fdict
['vcodec']
2611 if fdict
.get('vbr') is not None:
2613 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
2615 if fdict
.get('vbr') is not None:
2616 res
+= '%4dk' % fdict
['vbr']
2617 if fdict
.get('fps') is not None:
2620 res
+= '%sfps' % fdict
['fps']
2621 if fdict
.get('acodec') is not None:
2624 if fdict
['acodec'] == 'none':
2627 res
+= '%-5s' % fdict
['acodec']
2628 elif fdict
.get('abr') is not None:
2632 if fdict
.get('abr') is not None:
2633 res
+= '@%3dk' % fdict
['abr']
2634 if fdict
.get('asr') is not None:
2635 res
+= ' (%5dHz)' % fdict
['asr']
2636 if fdict
.get('filesize') is not None:
2639 res
+= format_bytes(fdict
['filesize'])
2640 elif fdict
.get('filesize_approx') is not None:
2643 res
+= '~' + format_bytes(fdict
['filesize_approx'])
2646 def _format_note_table(self
, f
):
2647 def join_fields(*vargs
):
2648 return ', '.join((val
for val
in vargs
if val
!= ''))
2651 'UNSUPPORTED' if f
.get('ext') in ('f4f', 'f4m') else '',
2652 format_field(f
, 'language', '[%s]'),
2653 format_field(f
, 'format_note'),
2654 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
2655 format_field(f
, 'asr', '%5dHz'))
2657 def list_formats(self
, info_dict
):
2658 formats
= info_dict
.get('formats', [info_dict
])
2659 new_format
= self
.params
.get('listformats_table', False)
2663 format_field(f
, 'format_id'),
2664 format_field(f
, 'ext'),
2665 self
.format_resolution(f
),
2666 format_field(f
, 'fps', '%d'),
2668 format_field(f
, 'filesize', ' %s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~%s', func
=format_bytes
),
2669 format_field(f
, 'tbr', '%4dk'),
2670 f
.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2672 format_field(f
, 'vcodec', default
='unknown').replace('none', ''),
2673 format_field(f
, 'vbr', '%4dk'),
2674 format_field(f
, 'acodec', default
='unknown').replace('none', ''),
2675 format_field(f
, 'abr', '%3dk'),
2676 format_field(f
, 'asr', '%5dHz'),
2677 self
._format
_note
_table
(f
)]
2679 if f
.get('preference') is None or f
['preference'] >= -1000]
2680 header_line
= ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2681 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2685 format_field(f
, 'format_id'),
2686 format_field(f
, 'ext'),
2687 self
.format_resolution(f
),
2688 self
._format
_note
(f
)]
2690 if f
.get('preference') is None or f
['preference'] >= -1000]
2691 header_line
= ['format code', 'extension', 'resolution', 'note']
2693 # if len(formats) > 1:
2694 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2696 '[info] Available formats for %s:\n%s' % (info_dict
['id'], render_table(
2700 extraGap
=(0 if new_format
else 1),
2701 hideEmpty
=new_format
)))
2703 def list_thumbnails(self
, info_dict
):
2704 thumbnails
= info_dict
.get('thumbnails')
2706 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
2710 '[info] Thumbnails for %s:' % info_dict
['id'])
2711 self
.to_screen(render_table(
2712 ['ID', 'width', 'height', 'URL'],
2713 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
2715 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
2717 self
.to_screen('%s has no %s' % (video_id
, name
))
2720 'Available %s for %s:' % (name
, video_id
))
2721 self
.to_screen(render_table(
2722 ['Language', 'formats'],
2723 [[lang
, ', '.join(f
['ext'] for f
in reversed(formats
))]
2724 for lang
, formats
in subtitles
.items()]))
2726 def urlopen(self
, req
):
2727 """ Start an HTTP download """
2728 if isinstance(req
, compat_basestring
):
2729 req
= sanitized_Request(req
)
2730 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
2732 def print_debug_header(self
):
2733 if not self
.params
.get('verbose'):
2736 if type('') is not compat_str
:
2737 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2738 self
.report_warning(
2739 'Your Python is broken! Update to a newer and supported version')
2741 stdout_encoding
= getattr(
2742 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
2744 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2745 locale
.getpreferredencoding(),
2746 sys
.getfilesystemencoding(),
2748 self
.get_encoding()))
2749 write_string(encoding_str
, encoding
=None)
2751 self
._write
_string
('[debug] yt-dlp version %s\n' % __version__
)
2753 self
._write
_string
('[debug] Lazy loading extractors enabled\n')
2756 '[debug] Plugin Extractors: %s\n' % [ie
.ie_key() for ie
in _PLUGIN_CLASSES
])
2758 sp
= subprocess
.Popen(
2759 ['git', 'rev-parse', '--short', 'HEAD'],
2760 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
2761 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
2762 out
, err
= process_communicate_or_kill(sp
)
2763 out
= out
.decode().strip()
2764 if re
.match('[0-9a-f]+', out
):
2765 self
._write
_string
('[debug] Git HEAD: %s\n' % out
)
2772 def python_implementation():
2773 impl_name
= platform
.python_implementation()
2774 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
2775 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
2778 self
._write
_string
('[debug] Python version %s (%s) - %s\n' % (
2779 platform
.python_version(), python_implementation(),
2782 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
2783 exe_versions
['rtmpdump'] = rtmpdump_version()
2784 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
2785 exe_str
= ', '.join(
2787 for exe
, v
in sorted(exe_versions
.items())
2792 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
2795 for handler
in self
._opener
.handlers
:
2796 if hasattr(handler
, 'proxies'):
2797 proxy_map
.update(handler
.proxies
)
2798 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
2800 if self
.params
.get('call_home', False):
2801 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2802 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
2804 latest_version
= self
.urlopen(
2805 'https://yt-dl.org/latest/version').read().decode('utf-8')
2806 if version_tuple(latest_version
) > version_tuple(__version__
):
2807 self
.report_warning(
2808 'You are using an outdated version (newest version: %s)! '
2809 'See https://yt-dl.org/update if you need help updating.' %
2812 def _setup_opener(self
):
2813 timeout_val
= self
.params
.get('socket_timeout')
2814 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
2816 opts_cookiefile
= self
.params
.get('cookiefile')
2817 opts_proxy
= self
.params
.get('proxy')
2819 if opts_cookiefile
is None:
2820 self
.cookiejar
= compat_cookiejar
.CookieJar()
2822 opts_cookiefile
= expand_path(opts_cookiefile
)
2823 self
.cookiejar
= YoutubeDLCookieJar(opts_cookiefile
)
2824 if os
.access(opts_cookiefile
, os
.R_OK
):
2825 self
.cookiejar
.load(ignore_discard
=True, ignore_expires
=True)
2827 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
2828 if opts_proxy
is not None:
2829 if opts_proxy
== '':
2832 proxies
= {'http': opts_proxy, 'https': opts_proxy}
2834 proxies
= compat_urllib_request
.getproxies()
2835 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2836 if 'http' in proxies
and 'https' not in proxies
:
2837 proxies
['https'] = proxies
['http']
2838 proxy_handler
= PerRequestProxyHandler(proxies
)
2840 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
2841 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
2842 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
2843 redirect_handler
= YoutubeDLRedirectHandler()
2844 data_handler
= compat_urllib_request_DataHandler()
2846 # When passing our own FileHandler instance, build_opener won't add the
2847 # default FileHandler and allows us to disable the file protocol, which
2848 # can be used for malicious purposes (see
2849 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2850 file_handler
= compat_urllib_request
.FileHandler()
2852 def file_open(*args
, **kwargs
):
2853 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2854 file_handler
.file_open
= file_open
2856 opener
= compat_urllib_request
.build_opener(
2857 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
2859 # Delete the default user-agent header, which would otherwise apply in
2860 # cases where our custom HTTP handler doesn't come into play
2861 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2862 opener
.addheaders
= []
2863 self
._opener
= opener
2865 def encode(self
, s
):
2866 if isinstance(s
, bytes):
2867 return s
# Already encoded
2870 return s
.encode(self
.get_encoding())
2871 except UnicodeEncodeError as err
:
2872 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
2875 def get_encoding(self
):
2876 encoding
= self
.params
.get('encoding')
2877 if encoding
is None:
2878 encoding
= preferredencoding()
2881 def _write_thumbnails(self
, info_dict
, filename
):
2882 if self
.params
.get('writethumbnail', False):
2883 thumbnails
= info_dict
.get('thumbnails')
2885 thumbnails
= [thumbnails
[-1]]
2886 elif self
.params
.get('write_all_thumbnails', False):
2887 thumbnails
= info_dict
.get('thumbnails') or []
2892 for t
in thumbnails
:
2893 thumb_ext
= determine_ext(t
['url'], 'jpg')
2894 suffix
= '_%s' % t
['id'] if len(thumbnails
) > 1 else ''
2895 thumb_display_id
= '%s ' % t
['id'] if len(thumbnails
) > 1 else ''
2896 t
['filename'] = thumb_filename
= replace_extension(filename
+ suffix
, thumb_ext
, info_dict
.get('ext'))
2898 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(thumb_filename
)):
2899 ret
.append(thumb_filename
)
2900 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
2901 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2903 self
.to_screen('[%s] %s: Downloading thumbnail %s...' %
2904 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2906 uf
= self
.urlopen(t
['url'])
2907 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
2908 shutil
.copyfileobj(uf
, thumbf
)
2909 ret
.append(thumb_filename
)
2910 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2911 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
2912 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2913 self
.report_warning('Unable to download thumbnail "%s": %s' %
2914 (t
['url'], error_to_compat_str(err
)))