4 from __future__
import absolute_import
, unicode_literals
29 from string
import ascii_letters
34 compat_get_terminal_size
,
40 compat_tokenize_tokenize
,
42 compat_urllib_request
,
43 compat_urllib_request_DataHandler
,
55 DOT_DESKTOP_LINK_TEMPLATE
,
56 DOT_URL_LINK_TEMPLATE
,
57 DOT_WEBLOC_LINK_TEMPLATE
,
80 PerRequestProxyHandler
,
85 register_socks_protocols
,
99 UnavailableVideoError
,
105 YoutubeDLCookieProcessor
,
107 YoutubeDLRedirectHandler
,
108 process_communicate_or_kill
,
110 from .cache
import Cache
111 from .extractor
import get_info_extractor
, gen_extractor_classes
, _LAZY_LOADER
, _PLUGIN_CLASSES
112 from .extractor
.openload
import PhantomJSwrapper
113 from .downloader
import get_suitable_downloader
114 from .downloader
.rtmp
import rtmpdump_version
115 from .postprocessor
import (
118 FFmpegFixupStretchedPP
,
121 # FFmpegSubtitlesConvertorPP,
123 MoveFilesAfterDownloadPP
,
125 from .version
import __version__
127 if compat_os_name
== 'nt':
131 class YoutubeDL(object):
134 YoutubeDL objects are the ones responsible of downloading the
135 actual video file and writing it to disk if the user has requested
136 it, among some other tasks. In most cases there should be one per
137 program. As, given a video URL, the downloader doesn't know how to
138 extract all the needed information, task that InfoExtractors do, it
139 has to pass the URL to one of them.
141 For this, YoutubeDL objects have a method that allows
142 InfoExtractors to be registered in a given order. When it is passed
143 a URL, the YoutubeDL object handles it to the first InfoExtractor it
144 finds that reports being able to handle it. The InfoExtractor extracts
145 all the information about the video or videos the URL refers to, and
146 YoutubeDL process the extracted information, possibly using a File
147 Downloader to download the video.
149 YoutubeDL objects accept a lot of parameters. In order not to saturate
150 the object constructor with arguments, it receives a dictionary of
151 options instead. These options are available through the params
152 attribute for the InfoExtractors to use. The YoutubeDL also
153 registers itself as the downloader in charge for the InfoExtractors
154 that are added to it, so this is a "mutual registration".
158 username: Username for authentication purposes.
159 password: Password for authentication purposes.
160 videopassword: Password for accessing a video.
161 ap_mso: Adobe Pass multiple-system operator identifier.
162 ap_username: Multiple-system operator account username.
163 ap_password: Multiple-system operator account password.
164 usenetrc: Use netrc for authentication instead.
165 verbose: Print additional info to stdout.
166 quiet: Do not print messages to stdout.
167 no_warnings: Do not print out anything for warnings.
168 forceurl: Force printing final URL.
169 forcetitle: Force printing title.
170 forceid: Force printing ID.
171 forcethumbnail: Force printing thumbnail URL.
172 forcedescription: Force printing description.
173 forcefilename: Force printing final filename.
174 forceduration: Force printing duration.
175 forcejson: Force printing info_dict as JSON.
176 dump_single_json: Force printing the info_dict of the whole playlist
177 (or video) as a single JSON line.
178 force_write_download_archive: Force writing download archive regardless of
179 'skip_download' or 'simulate'.
180 simulate: Do not download the video files.
181 format: Video format code. see "FORMAT SELECTION" for more details.
182 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
183 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
184 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
185 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
186 outtmpl: Dictionary of templates for output names. Allowed keys
187 are 'default' and the keys of OUTTMPL_TYPES (in utils.py)
188 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
189 restrictfilenames: Do not allow "&" and spaces in file names
190 trim_file_name: Limit length of filename (extension excluded)
191 ignoreerrors: Do not stop on download errors
192 (Default True when running youtube-dlc,
193 but False when directly accessing YoutubeDL class)
194 force_generic_extractor: Force downloader to use the generic extractor
195 overwrites: Overwrite all video and metadata files if True,
196 overwrite only non-video files if None
197 and don't overwrite any file if False
198 playliststart: Playlist item to start at.
199 playlistend: Playlist item to end at.
200 playlist_items: Specific indices of playlist to download.
201 playlistreverse: Download playlist items in reverse order.
202 playlistrandom: Download playlist items in random order.
203 matchtitle: Download only matching titles.
204 rejecttitle: Reject downloads for matching titles.
205 logger: Log messages to a logging.Logger instance.
206 logtostderr: Log messages to stderr instead of stdout.
207 writedescription: Write the video description to a .description file
208 writeinfojson: Write the video description to a .info.json file
209 writecomments: Extract video comments. This will not be written to disk
210 unless writeinfojson is also given
211 writeannotations: Write the video annotations to a .annotations.xml file
212 writethumbnail: Write the thumbnail image to a file
213 allow_playlist_files: Also write playlists' description, infojson etc in a seperate file
214 write_all_thumbnails: Write all thumbnail formats to files
215 writelink: Write an internet shortcut file, depending on the
216 current platform (.url/.webloc/.desktop)
217 writeurllink: Write a Windows internet shortcut file (.url)
218 writewebloclink: Write a macOS internet shortcut file (.webloc)
219 writedesktoplink: Write a Linux internet shortcut file (.desktop)
220 writesubtitles: Write the video subtitles to a file
221 writeautomaticsub: Write the automatically generated subtitles to a file
222 allsubtitles: Downloads all the subtitles of the video
223 (requires writesubtitles or writeautomaticsub)
224 listsubtitles: Lists all available subtitles for the video
225 subtitlesformat: The format code for subtitles
226 subtitleslangs: List of languages of the subtitles to download
227 keepvideo: Keep the video file after post-processing
228 daterange: A DateRange object, download only if the upload_date is in the range.
229 skip_download: Skip the actual download of the video file
230 cachedir: Location of the cache files in the filesystem.
231 False to disable filesystem cache.
232 noplaylist: Download single video instead of a playlist if in doubt.
233 age_limit: An integer representing the user's age in years.
234 Unsuitable videos for the given age are skipped.
235 min_views: An integer representing the minimum view count the video
236 must have in order to not be skipped.
237 Videos without view count information are always
238 downloaded. None for no limit.
239 max_views: An integer representing the maximum view count.
240 Videos that are more popular than that are not
242 Videos without view count information are always
243 downloaded. None for no limit.
244 download_archive: File name of a file where all downloads are recorded.
245 Videos already present in the file are not downloaded
247 break_on_existing: Stop the download process after attempting to download a
248 file that is in the archive.
249 break_on_reject: Stop the download process when encountering a video that
250 has been filtered out.
251 cookiefile: File name where cookies should be read from and dumped to
252 nocheckcertificate:Do not verify SSL certificates
253 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
254 At the moment, this is only supported by YouTube.
255 proxy: URL of the proxy server to use
256 geo_verification_proxy: URL of the proxy to use for IP address verification
257 on geo-restricted sites.
258 socket_timeout: Time to wait for unresponsive hosts, in seconds
259 bidi_workaround: Work around buggy terminals without bidirectional text
260 support, using fridibi
261 debug_printtraffic:Print out sent and received HTTP traffic
262 include_ads: Download ads as well
263 default_search: Prepend this string if an input url is not valid.
264 'auto' for elaborate guessing
265 encoding: Use this encoding instead of the system-specified.
266 extract_flat: Do not resolve URLs, return the immediate result.
267 Pass in 'in_playlist' to only show this behavior for
269 postprocessors: A list of dictionaries, each with an entry
270 * key: The name of the postprocessor. See
271 youtube_dlc/postprocessor/__init__.py for a list.
272 * _after_move: Optional. If True, run this post_processor
273 after 'MoveFilesAfterDownload'
274 as well as any further keyword arguments for the
276 post_hooks: A list of functions that get called as the final step
277 for each video file, after all postprocessors have been
278 called. The filename will be passed as the only argument.
279 progress_hooks: A list of functions that get called on download
280 progress, with a dictionary with the entries
281 * status: One of "downloading", "error", or "finished".
282 Check this first and ignore unknown values.
284 If status is one of "downloading", or "finished", the
285 following properties may also be present:
286 * filename: The final filename (always present)
287 * tmpfilename: The filename we're currently writing to
288 * downloaded_bytes: Bytes on disk
289 * total_bytes: Size of the whole file, None if unknown
290 * total_bytes_estimate: Guess of the eventual file size,
292 * elapsed: The number of seconds since download started.
293 * eta: The estimated time in seconds, None if unknown
294 * speed: The download speed in bytes/second, None if
296 * fragment_index: The counter of the currently
297 downloaded video fragment.
298 * fragment_count: The number of fragments (= individual
299 files that will be merged)
301 Progress hooks are guaranteed to be called at least once
302 (with status "finished") if the download is successful.
303 merge_output_format: Extension to use when merging formats.
304 final_ext: Expected final extension; used to detect when the file was
305 already downloaded and converted. "merge_output_format" is
306 replaced by this extension when given
307 fixup: Automatically correct known faults of the file.
309 - "never": do nothing
310 - "warn": only emit a warning
311 - "detect_or_warn": check whether we can do anything
312 about it, warn otherwise (default)
313 source_address: Client-side IP address to bind to.
314 call_home: Boolean, true iff we are allowed to contact the
315 youtube-dlc servers for debugging.
316 sleep_interval: Number of seconds to sleep before each download when
317 used alone or a lower bound of a range for randomized
318 sleep before each download (minimum possible number
319 of seconds to sleep) when used along with
321 max_sleep_interval:Upper bound of a range for randomized sleep before each
322 download (maximum possible number of seconds to sleep).
323 Must only be used along with sleep_interval.
324 Actual sleep time will be a random float from range
325 [sleep_interval; max_sleep_interval].
326 listformats: Print an overview of available video formats and exit.
327 list_thumbnails: Print a table of all thumbnails and exit.
328 match_filter: A function that gets called with the info_dict of
330 If it returns a message, the video is ignored.
331 If it returns None, the video is downloaded.
332 match_filter_func in utils.py is one example for this.
333 no_color: Do not emit color codes in output.
334 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
337 Two-letter ISO 3166-2 country code that will be used for
338 explicit geographic restriction bypassing via faking
339 X-Forwarded-For HTTP header
341 IP range in CIDR notation that will be used similarly to
344 The following options determine which downloader is picked:
345 external_downloader: Executable of the external downloader to call.
346 None or unset for standard (built-in) downloader.
347 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
348 if True, otherwise use ffmpeg/avconv if False, otherwise
349 use downloader suggested by extractor if None.
351 The following parameters are not used by YoutubeDL itself, they are used by
352 the downloader (see youtube_dlc/downloader/common.py):
353 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
354 noresizebuffer, retries, continuedl, noprogress, consoletitle,
355 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
358 The following options are used by the post processors:
359 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
360 otherwise prefer ffmpeg. (avconv support is deprecated)
361 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
362 to the binary or its containing directory.
363 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
364 and a list of additional command-line arguments for the
365 postprocessor/executable. The dict can also have "PP+EXE" keys
366 which are used when the given exe is used by the given PP.
367 Use 'default' as the name for arguments to passed to all PP
368 The following options are used by the Youtube extractor:
369 youtube_include_dash_manifest: If True (default), DASH manifests and related
370 data will be downloaded and processed by extractor.
371 You can reduce network I/O by disabling it if you don't
375 _NUMERIC_FIELDS
= set((
376 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
377 'timestamp', 'upload_year', 'upload_month', 'upload_day',
378 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
379 'average_rating', 'comment_count', 'age_limit',
380 'start_time', 'end_time',
381 'chapter_number', 'season_number', 'episode_number',
382 'track_number', 'disc_number', 'release_year',
388 _pps
= {'beforedl': [], 'aftermove': [], 'normal': []}
389 __prepare_filename_warned
= False
390 _download_retcode
= None
391 _num_downloads
= None
393 _playlist_urls
= set()
396 def __init__(self
, params
=None, auto_init
=True):
397 """Create a FileDownloader object with the given options."""
401 self
._ies
_instances
= {}
402 self
._pps
= {'beforedl': [], 'aftermove': [], 'normal': []}
403 self
.__prepare
_filename
_warned
= False
404 self
._post
_hooks
= []
405 self
._progress
_hooks
= []
406 self
._download
_retcode
= 0
407 self
._num
_downloads
= 0
408 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
409 self
._err
_file
= sys
.stderr
412 'nocheckcertificate': False,
414 self
.params
.update(params
)
415 self
.cache
= Cache(self
)
418 """Preload the archive, if any is specified"""
419 def preload_download_archive(self
):
420 fn
= self
.params
.get('download_archive')
424 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
425 for line
in archive_file
:
426 self
.archive
.add(line
.strip())
427 except IOError as ioe
:
428 if ioe
.errno
!= errno
.ENOENT
:
433 def check_deprecated(param
, option
, suggestion
):
434 if self
.params
.get(param
) is not None:
436 '%s is deprecated. Use %s instead.' % (option
, suggestion
))
440 if self
.params
.get('verbose'):
441 self
.to_stdout('[debug] Loading archive file %r' % self
.params
.get('download_archive'))
443 preload_download_archive(self
)
445 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
446 if self
.params
.get('geo_verification_proxy') is None:
447 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
449 if self
.params
.get('final_ext'):
450 if self
.params
.get('merge_output_format'):
451 self
.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
452 self
.params
['merge_output_format'] = self
.params
['final_ext']
454 if 'overwrites' in self
.params
and self
.params
['overwrites'] is None:
455 del self
.params
['overwrites']
457 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
458 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
459 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
461 if params
.get('bidi_workaround', False):
464 master
, slave
= pty
.openpty()
465 width
= compat_get_terminal_size().columns
469 width_args
= ['-w', str(width
)]
471 stdin
=subprocess
.PIPE
,
473 stderr
=self
._err
_file
)
475 self
._output
_process
= subprocess
.Popen(
476 ['bidiv'] + width_args
, **sp_kwargs
479 self
._output
_process
= subprocess
.Popen(
480 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
481 self
._output
_channel
= os
.fdopen(master
, 'rb')
482 except OSError as ose
:
483 if ose
.errno
== errno
.ENOENT
:
484 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
488 if (sys
.platform
!= 'win32'
489 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
490 and not params
.get('restrictfilenames', False)):
491 # Unicode filesystem API will throw errors (#1474, #13027)
493 'Assuming --restrict-filenames since file system encoding '
494 'cannot encode all characters. '
495 'Set the LC_ALL environment variable to fix this.')
496 self
.params
['restrictfilenames'] = True
498 self
.outtmpl_dict
= self
.parse_outtmpl()
503 self
.print_debug_header()
504 self
.add_default_info_extractors()
506 for pp_def_raw
in self
.params
.get('postprocessors', []):
507 pp_class
= get_postprocessor(pp_def_raw
['key'])
508 pp_def
= dict(pp_def_raw
)
511 when
= pp_def
['when']
515 pp
= pp_class(self
, **compat_kwargs(pp_def
))
516 self
.add_post_processor(pp
, when
=when
)
518 for ph
in self
.params
.get('post_hooks', []):
519 self
.add_post_hook(ph
)
521 for ph
in self
.params
.get('progress_hooks', []):
522 self
.add_progress_hook(ph
)
524 register_socks_protocols()
526 def warn_if_short_id(self
, argv
):
527 # short YouTube ID starting with dash?
529 i
for i
, a
in enumerate(argv
)
530 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
534 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
535 + ['--'] + [argv
[i
] for i
in idxs
]
538 'Long argument string detected. '
539 'Use -- to separate parameters and URLs, like this:\n%s\n' %
540 args_to_str(correct_argv
))
542 def add_info_extractor(self
, ie
):
543 """Add an InfoExtractor object to the end of the list."""
545 if not isinstance(ie
, type):
546 self
._ies
_instances
[ie
.ie_key()] = ie
547 ie
.set_downloader(self
)
549 def get_info_extractor(self
, ie_key
):
551 Get an instance of an IE with name ie_key, it will try to get one from
552 the _ies list, if there's no instance it will create a new one and add
553 it to the extractor list.
555 ie
= self
._ies
_instances
.get(ie_key
)
557 ie
= get_info_extractor(ie_key
)()
558 self
.add_info_extractor(ie
)
561 def add_default_info_extractors(self
):
563 Add the InfoExtractors returned by gen_extractors to the end of the list
565 for ie
in gen_extractor_classes():
566 self
.add_info_extractor(ie
)
568 def add_post_processor(self
, pp
, when
='normal'):
569 """Add a PostProcessor object to the end of the chain."""
570 self
._pps
[when
].append(pp
)
571 pp
.set_downloader(self
)
573 def add_post_hook(self
, ph
):
574 """Add the post hook"""
575 self
._post
_hooks
.append(ph
)
577 def add_progress_hook(self
, ph
):
578 """Add the progress hook (currently only for the file downloader)"""
579 self
._progress
_hooks
.append(ph
)
581 def _bidi_workaround(self
, message
):
582 if not hasattr(self
, '_output_channel'):
585 assert hasattr(self
, '_output_process')
586 assert isinstance(message
, compat_str
)
587 line_count
= message
.count('\n') + 1
588 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
589 self
._output
_process
.stdin
.flush()
590 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
591 for _
in range(line_count
))
592 return res
[:-len('\n')]
594 def to_screen(self
, message
, skip_eol
=False):
595 """Print message to stdout if not in quiet mode."""
596 return self
.to_stdout(message
, skip_eol
, check_quiet
=True)
598 def _write_string(self
, s
, out
=None):
599 write_string(s
, out
=out
, encoding
=self
.params
.get('encoding'))
601 def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False):
602 """Print message to stdout if not in quiet mode."""
603 if self
.params
.get('logger'):
604 self
.params
['logger'].debug(message
)
605 elif not check_quiet
or not self
.params
.get('quiet', False):
606 message
= self
._bidi
_workaround
(message
)
607 terminator
= ['\n', ''][skip_eol
]
608 output
= message
+ terminator
610 self
._write
_string
(output
, self
._screen
_file
)
612 def to_stderr(self
, message
):
613 """Print message to stderr."""
614 assert isinstance(message
, compat_str
)
615 if self
.params
.get('logger'):
616 self
.params
['logger'].error(message
)
618 message
= self
._bidi
_workaround
(message
)
619 output
= message
+ '\n'
620 self
._write
_string
(output
, self
._err
_file
)
622 def to_console_title(self
, message
):
623 if not self
.params
.get('consoletitle', False):
625 if compat_os_name
== 'nt':
626 if ctypes
.windll
.kernel32
.GetConsoleWindow():
627 # c_wchar_p() might not be necessary if `message` is
628 # already of type unicode()
629 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
630 elif 'TERM' in os
.environ
:
631 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
633 def save_console_title(self
):
634 if not self
.params
.get('consoletitle', False):
636 if self
.params
.get('simulate', False):
638 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
639 # Save the title on stack
640 self
._write
_string
('\033[22;0t', self
._screen
_file
)
642 def restore_console_title(self
):
643 if not self
.params
.get('consoletitle', False):
645 if self
.params
.get('simulate', False):
647 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
648 # Restore the title from stack
649 self
._write
_string
('\033[23;0t', self
._screen
_file
)
652 self
.save_console_title()
655 def __exit__(self
, *args
):
656 self
.restore_console_title()
658 if self
.params
.get('cookiefile') is not None:
659 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
661 def trouble(self
, message
=None, tb
=None):
662 """Determine action to take when a download problem appears.
664 Depending on if the downloader has been configured to ignore
665 download errors or not, this method may throw an exception or
666 not when errors are found, after printing the message.
668 tb, if given, is additional traceback information.
670 if message
is not None:
671 self
.to_stderr(message
)
672 if self
.params
.get('verbose'):
674 if sys
.exc_info()[0]: # if .trouble has been called from an except block
676 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
677 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
678 tb
+= encode_compat_str(traceback
.format_exc())
680 tb_data
= traceback
.format_list(traceback
.extract_stack())
681 tb
= ''.join(tb_data
)
683 if not self
.params
.get('ignoreerrors', False):
684 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
685 exc_info
= sys
.exc_info()[1].exc_info
687 exc_info
= sys
.exc_info()
688 raise DownloadError(message
, exc_info
)
689 self
._download
_retcode
= 1
691 def report_warning(self
, message
):
693 Print the message to stderr, it will be prefixed with 'WARNING:'
694 If stderr is a tty file the 'WARNING:' will be colored
696 if self
.params
.get('logger') is not None:
697 self
.params
['logger'].warning(message
)
699 if self
.params
.get('no_warnings'):
701 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
702 _msg_header
= '\033[0;33mWARNING:\033[0m'
704 _msg_header
= 'WARNING:'
705 warning_message
= '%s %s' % (_msg_header
, message
)
706 self
.to_stderr(warning_message
)
708 def report_error(self
, message
, tb
=None):
710 Do the same as trouble, but prefixes the message with 'ERROR:', colored
711 in red if stderr is a tty file.
713 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
714 _msg_header
= '\033[0;31mERROR:\033[0m'
716 _msg_header
= 'ERROR:'
717 error_message
= '%s %s' % (_msg_header
, message
)
718 self
.trouble(error_message
, tb
)
720 def report_file_already_downloaded(self
, file_name
):
721 """Report file has already been fully downloaded."""
723 self
.to_screen('[download] %s has already been downloaded' % file_name
)
724 except UnicodeEncodeError:
725 self
.to_screen('[download] The file has already been downloaded')
727 def report_file_delete(self
, file_name
):
728 """Report that existing file will be deleted."""
730 self
.to_screen('Deleting already existent file %s' % file_name
)
731 except UnicodeEncodeError:
732 self
.to_screen('Deleting already existent file')
734 def parse_outtmpl(self
):
735 outtmpl_dict
= self
.params
.get('outtmpl', {})
736 if not isinstance(outtmpl_dict
, dict):
737 outtmpl_dict
= {'default': outtmpl_dict}
738 outtmpl_dict
.update({
739 k
: v
for k
, v
in DEFAULT_OUTTMPL
.items()
740 if not outtmpl_dict
.get(k
)})
741 for key
, val
in outtmpl_dict
.items():
742 if isinstance(val
, bytes):
744 'Parameter outtmpl is bytes, but should be a unicode string. '
745 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
748 def _prepare_filename(self
, info_dict
, tmpl_type
='default'):
750 template_dict
= dict(info_dict
)
752 template_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
753 formatSeconds(info_dict
['duration'], '-')
754 if info_dict
.get('duration', None) is not None
757 template_dict
['epoch'] = int(time
.time())
758 autonumber_size
= self
.params
.get('autonumber_size')
759 if autonumber_size
is None:
761 template_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
762 if template_dict
.get('resolution') is None:
763 if template_dict
.get('width') and template_dict
.get('height'):
764 template_dict
['resolution'] = '%dx%d' % (template_dict
['width'], template_dict
['height'])
765 elif template_dict
.get('height'):
766 template_dict
['resolution'] = '%sp' % template_dict
['height']
767 elif template_dict
.get('width'):
768 template_dict
['resolution'] = '%dx?' % template_dict
['width']
770 sanitize
= lambda k
, v
: sanitize_filename(
772 restricted
=self
.params
.get('restrictfilenames'),
773 is_id
=(k
== 'id' or k
.endswith('_id')))
774 template_dict
= dict((k
, v
if isinstance(v
, compat_numeric_types
) else sanitize(k
, v
))
775 for k
, v
in template_dict
.items()
776 if v
is not None and not isinstance(v
, (list, tuple, dict)))
777 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
778 template_dict
= collections
.defaultdict(lambda: na
, template_dict
)
780 outtmpl
= self
.outtmpl_dict
.get(tmpl_type
, self
.outtmpl_dict
['default'])
781 force_ext
= OUTTMPL_TYPES
.get(tmpl_type
)
783 # For fields playlist_index and autonumber convert all occurrences
784 # of %(field)s to %(field)0Nd for backward compatibility
785 field_size_compat_map
= {
786 'playlist_index': len(str(template_dict
['n_entries'])),
787 'autonumber': autonumber_size
,
789 FIELD_SIZE_COMPAT_RE
= r
'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
790 mobj
= re
.search(FIELD_SIZE_COMPAT_RE
, outtmpl
)
793 FIELD_SIZE_COMPAT_RE
,
794 r
'%%(\1)0%dd' % field_size_compat_map
[mobj
.group('field')],
797 # As of [1] format syntax is:
798 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
799 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
803 \({0}\) # mapping key
804 (?:[#0\-+ ]+)? # conversion flags (optional)
805 (?:\d+)? # minimum field width (optional)
806 (?:\.\d+)? # precision (optional)
807 [hlL]? # length modifier (optional)
808 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
811 numeric_fields
= list(self
._NUMERIC
_FIELDS
)
814 FORMAT_DATE_RE
= FORMAT_RE
.format(r
'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
815 for mobj
in re
.finditer(FORMAT_DATE_RE
, outtmpl
):
816 conv_type
, field
, frmt
, key
= mobj
.group('type', 'field', 'format', 'key')
817 if key
in template_dict
:
819 value
= strftime_or_none(template_dict
.get(field
), frmt
, na
)
820 if conv_type
in 'crs': # string
821 value
= sanitize(field
, value
)
823 numeric_fields
.append(key
)
824 value
= float_or_none(value
, default
=None)
825 if value
is not None:
826 template_dict
[key
] = value
828 # Missing numeric fields used together with integer presentation types
829 # in format specification will break the argument substitution since
830 # string NA placeholder is returned for missing fields. We will patch
831 # output template for missing fields to meet string presentation type.
832 for numeric_field
in numeric_fields
:
833 if numeric_field
not in template_dict
:
835 FORMAT_RE
.format(re
.escape(numeric_field
)),
836 r
'%({0})s'.format(numeric_field
), outtmpl
)
838 # expand_path translates '%%' into '%' and '$$' into '$'
839 # correspondingly that is not what we want since we need to keep
840 # '%%' intact for template dict substitution step. Working around
841 # with boundary-alike separator hack.
842 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
843 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
845 # outtmpl should be expand_path'ed before template dict substitution
846 # because meta fields may contain env variables we don't want to
847 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
848 # title "Hello $PATH", we don't want `$PATH` to be expanded.
849 filename
= expand_path(outtmpl
).replace(sep
, '') % template_dict
851 if force_ext
is not None:
852 filename
= replace_extension(filename
, force_ext
, template_dict
.get('ext'))
854 # https://github.com/blackjack4494/youtube-dlc/issues/85
855 trim_file_name
= self
.params
.get('trim_file_name', False)
857 fn_groups
= filename
.rsplit('.')
860 if len(fn_groups
) > 2:
861 sub_ext
= fn_groups
[-2]
862 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
864 # Temporary fix for #4787
865 # 'Treat' all problem characters by passing filename through preferredencoding
866 # to workaround encoding issues with subprocess on python2 @ Windows
867 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
868 filename
= encodeFilename(filename
, True).decode(preferredencoding())
869 filename
= sanitize_path(filename
)
872 except ValueError as err
:
873 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
876 def prepare_filename(self
, info_dict
, dir_type
='', warn
=False):
877 """Generate the output filename."""
878 paths
= self
.params
.get('paths', {})
879 assert isinstance(paths
, dict)
880 filename
= self
._prepare
_filename
(info_dict
, dir_type
or 'default')
882 if warn
and not self
.__prepare
_filename
_warned
:
885 elif filename
== '-':
886 self
.report_warning('--paths is ignored when an outputting to stdout')
887 elif os
.path
.isabs(filename
):
888 self
.report_warning('--paths is ignored since an absolute path is given in output template')
889 self
.__prepare
_filename
_warned
= True
890 if filename
== '-' or not filename
:
893 homepath
= expand_path(paths
.get('home', '').strip())
894 assert isinstance(homepath
, compat_str
)
895 subdir
= expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else ''
896 assert isinstance(subdir
, compat_str
)
897 return sanitize_path(os
.path
.join(homepath
, subdir
, filename
))
899 def _match_entry(self
, info_dict
, incomplete
):
900 """ Returns None if the file should be downloaded """
903 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
904 if 'title' in info_dict
:
905 # This can happen when we're just evaluating the playlist
906 title
= info_dict
['title']
907 matchtitle
= self
.params
.get('matchtitle', False)
909 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
910 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
911 rejecttitle
= self
.params
.get('rejecttitle', False)
913 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
914 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
915 date
= info_dict
.get('upload_date')
917 dateRange
= self
.params
.get('daterange', DateRange())
918 if date
not in dateRange
:
919 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
920 view_count
= info_dict
.get('view_count')
921 if view_count
is not None:
922 min_views
= self
.params
.get('min_views')
923 if min_views
is not None and view_count
< min_views
:
924 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
925 max_views
= self
.params
.get('max_views')
926 if max_views
is not None and view_count
> max_views
:
927 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
928 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
929 return 'Skipping "%s" because it is age restricted' % video_title
930 if self
.in_download_archive(info_dict
):
931 return '%s has already been recorded in archive' % video_title
934 match_filter
= self
.params
.get('match_filter')
935 if match_filter
is not None:
936 ret
= match_filter(info_dict
)
941 reason
= check_filter()
942 if reason
is not None:
943 self
.to_screen('[download] ' + reason
)
944 if reason
.endswith('has already been recorded in the archive') and self
.params
.get('break_on_existing', False):
945 raise ExistingVideoReached()
946 elif self
.params
.get('break_on_reject', False):
947 raise RejectedVideoReached()
951 def add_extra_info(info_dict
, extra_info
):
952 '''Set the keys from extra_info in info dict if they are missing'''
953 for key
, value
in extra_info
.items():
954 info_dict
.setdefault(key
, value
)
956 def extract_info(self
, url
, download
=True, ie_key
=None, info_dict
=None, extra_info
={},
957 process
=True, force_generic_extractor
=False):
959 Returns a list with a dictionary for each video we find.
960 If 'download', also downloads the videos.
961 extra_info is a dict containing the extra values to add to each result
964 if not ie_key
and force_generic_extractor
:
968 ies
= [self
.get_info_extractor(ie_key
)]
973 if not ie
.suitable(url
):
977 ie
= self
.get_info_extractor(ie_key
)
979 self
.report_warning('The program functionality for this site has been marked as broken, '
980 'and will probably not work.')
983 temp_id
= str_or_none(
984 ie
.extract_id(url
) if callable(getattr(ie
, 'extract_id', None))
985 else ie
._match
_id
(url
))
986 except (AssertionError, IndexError, AttributeError):
988 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
989 self
.to_screen("[%s] %s: has already been recorded in archive" % (
992 return self
.__extract
_info
(url
, ie
, download
, extra_info
, process
, info_dict
)
994 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
996 def __handle_extraction_exceptions(func
):
997 def wrapper(self
, *args
, **kwargs
):
999 return func(self
, *args
, **kwargs
)
1000 except GeoRestrictedError
as e
:
1003 msg
+= '\nThis video is available in %s.' % ', '.join(
1004 map(ISO3166Utils
.short2full
, e
.countries
))
1005 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1006 self
.report_error(msg
)
1007 except ExtractorError
as e
: # An error we somewhat expected
1008 self
.report_error(compat_str(e
), e
.format_traceback())
1009 except (MaxDownloadsReached
, ExistingVideoReached
, RejectedVideoReached
):
1011 except Exception as e
:
1012 if self
.params
.get('ignoreerrors', False):
1013 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1018 @__handle_extraction_exceptions
1019 def __extract_info(self
, url
, ie
, download
, extra_info
, process
, info_dict
):
1020 ie_result
= ie
.extract(url
)
1021 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1023 if isinstance(ie_result
, list):
1024 # Backwards compatibility: old IE result format
1026 '_type': 'compat_list',
1027 'entries': ie_result
,
1030 if info_dict
.get('id'):
1031 ie_result
['id'] = info_dict
['id']
1032 if info_dict
.get('title'):
1033 ie_result
['title'] = info_dict
['title']
1034 self
.add_default_extra_info(ie_result
, ie
, url
)
1036 return self
.process_ie_result(ie_result
, download
, extra_info
)
1040 def add_default_extra_info(self
, ie_result
, ie
, url
):
1041 self
.add_extra_info(ie_result
, {
1042 'extractor': ie
.IE_NAME
,
1044 'webpage_url_basename': url_basename(url
),
1045 'extractor_key': ie
.ie_key(),
1048 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
1050 Take the result of the ie(may be modified) and resolve all unresolved
1051 references (URLs, playlist items).
1053 It will also download the videos if 'download'.
1054 Returns the resolved ie_result.
1056 result_type
= ie_result
.get('_type', 'video')
1058 if result_type
in ('url', 'url_transparent'):
1059 ie_result
['url'] = sanitize_url(ie_result
['url'])
1060 extract_flat
= self
.params
.get('extract_flat', False)
1061 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1062 or extract_flat
is True):
1063 self
.__forced
_printings
(ie_result
, self
.prepare_filename(ie_result
), incomplete
=True)
1066 if result_type
== 'video':
1067 self
.add_extra_info(ie_result
, extra_info
)
1068 return self
.process_video_result(ie_result
, download
=download
)
1069 elif result_type
== 'url':
1070 # We have to add extra_info to the results because it may be
1071 # contained in a playlist
1072 return self
.extract_info(ie_result
['url'],
1073 download
, info_dict
=ie_result
,
1074 ie_key
=ie_result
.get('ie_key'),
1075 extra_info
=extra_info
)
1076 elif result_type
== 'url_transparent':
1077 # Use the information from the embedding page
1078 info
= self
.extract_info(
1079 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1080 extra_info
=extra_info
, download
=False, process
=False)
1082 # extract_info may return None when ignoreerrors is enabled and
1083 # extraction failed with an error, don't crash and return early
1088 force_properties
= dict(
1089 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
1090 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1091 if f
in force_properties
:
1092 del force_properties
[f
]
1093 new_result
= info
.copy()
1094 new_result
.update(force_properties
)
1096 # Extracted info may not be a video result (i.e.
1097 # info.get('_type', 'video') != video) but rather an url or
1098 # url_transparent. In such cases outer metadata (from ie_result)
1099 # should be propagated to inner one (info). For this to happen
1100 # _type of info should be overridden with url_transparent. This
1101 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1102 if new_result
.get('_type') == 'url':
1103 new_result
['_type'] = 'url_transparent'
1105 return self
.process_ie_result(
1106 new_result
, download
=download
, extra_info
=extra_info
)
1107 elif result_type
in ('playlist', 'multi_video'):
1108 # Protect from infinite recursion due to recursively nested playlists
1109 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1110 webpage_url
= ie_result
['webpage_url']
1111 if webpage_url
in self
._playlist
_urls
:
1113 '[download] Skipping already downloaded playlist: %s'
1114 % ie_result
.get('title') or ie_result
.get('id'))
1117 self
._playlist
_level
+= 1
1118 self
._playlist
_urls
.add(webpage_url
)
1120 return self
.__process
_playlist
(ie_result
, download
)
1122 self
._playlist
_level
-= 1
1123 if not self
._playlist
_level
:
1124 self
._playlist
_urls
.clear()
1125 elif result_type
== 'compat_list':
1126 self
.report_warning(
1127 'Extractor %s returned a compat_list result. '
1128 'It needs to be updated.' % ie_result
.get('extractor'))
1131 self
.add_extra_info(
1134 'extractor': ie_result
['extractor'],
1135 'webpage_url': ie_result
['webpage_url'],
1136 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1137 'extractor_key': ie_result
['extractor_key'],
1141 ie_result
['entries'] = [
1142 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1143 for r
in ie_result
['entries']
1147 raise Exception('Invalid result type: %s' % result_type
)
1149 def __process_playlist(self
, ie_result
, download
):
1150 # We process each entry in the playlist
1151 playlist
= ie_result
.get('title') or ie_result
.get('id')
1152 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1154 if self
.params
.get('allow_playlist_files', True):
1156 'playlist': playlist
,
1157 'playlist_id': ie_result
.get('id'),
1158 'playlist_title': ie_result
.get('title'),
1159 'playlist_uploader': ie_result
.get('uploader'),
1160 'playlist_uploader_id': ie_result
.get('uploader_id'),
1163 ie_copy
.update(dict(ie_result
))
1165 def ensure_dir_exists(path
):
1166 return make_dir(path
, self
.report_error
)
1168 if self
.params
.get('writeinfojson', False):
1169 infofn
= self
.prepare_filename(ie_copy
, 'pl_infojson')
1170 if not ensure_dir_exists(encodeFilename(infofn
)):
1172 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
1173 self
.to_screen('[info] Playlist metadata is already present')
1175 playlist_info
= dict(ie_result
)
1176 # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
1177 del playlist_info
['entries']
1178 self
.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn
)
1180 write_json_file(self
.filter_requested_info(playlist_info
), infofn
)
1181 except (OSError, IOError):
1182 self
.report_error('Cannot write playlist metadata to JSON file ' + infofn
)
1184 if self
.params
.get('writedescription', False):
1185 descfn
= self
.prepare_filename(ie_copy
, 'pl_description')
1186 if not ensure_dir_exists(encodeFilename(descfn
)):
1188 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
1189 self
.to_screen('[info] Playlist description is already present')
1190 elif ie_result
.get('description') is None:
1191 self
.report_warning('There\'s no playlist description to write.')
1194 self
.to_screen('[info] Writing playlist description to: ' + descfn
)
1195 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1196 descfile
.write(ie_result
['description'])
1197 except (OSError, IOError):
1198 self
.report_error('Cannot write playlist description file ' + descfn
)
1201 playlist_results
= []
1203 playliststart
= self
.params
.get('playliststart', 1) - 1
1204 playlistend
= self
.params
.get('playlistend')
1205 # For backwards compatibility, interpret -1 as whole list
1206 if playlistend
== -1:
1209 playlistitems_str
= self
.params
.get('playlist_items')
1210 playlistitems
= None
1211 if playlistitems_str
is not None:
1212 def iter_playlistitems(format
):
1213 for string_segment
in format
.split(','):
1214 if '-' in string_segment
:
1215 start
, end
= string_segment
.split('-')
1216 for item
in range(int(start
), int(end
) + 1):
1219 yield int(string_segment
)
1220 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1222 ie_entries
= ie_result
['entries']
1224 def make_playlistitems_entries(list_ie_entries
):
1225 num_entries
= len(list_ie_entries
)
1227 list_ie_entries
[i
- 1] for i
in playlistitems
1228 if -num_entries
<= i
- 1 < num_entries
]
1230 def report_download(num_entries
):
1232 '[%s] playlist %s: Downloading %d videos' %
1233 (ie_result
['extractor'], playlist
, num_entries
))
1235 if isinstance(ie_entries
, list):
1236 n_all_entries
= len(ie_entries
)
1238 entries
= make_playlistitems_entries(ie_entries
)
1240 entries
= ie_entries
[playliststart
:playlistend
]
1241 n_entries
= len(entries
)
1243 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1244 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
1245 elif isinstance(ie_entries
, PagedList
):
1248 for item
in playlistitems
:
1249 entries
.extend(ie_entries
.getslice(
1253 entries
= ie_entries
.getslice(
1254 playliststart
, playlistend
)
1255 n_entries
= len(entries
)
1256 report_download(n_entries
)
1259 entries
= make_playlistitems_entries(list(itertools
.islice(
1260 ie_entries
, 0, max(playlistitems
))))
1262 entries
= list(itertools
.islice(
1263 ie_entries
, playliststart
, playlistend
))
1264 n_entries
= len(entries
)
1265 report_download(n_entries
)
1267 if self
.params
.get('playlistreverse', False):
1268 entries
= entries
[::-1]
1270 if self
.params
.get('playlistrandom', False):
1271 random
.shuffle(entries
)
1273 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1275 for i
, entry
in enumerate(entries
, 1):
1276 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1277 # This __x_forwarded_for_ip thing is a bit ugly but requires
1280 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1282 'n_entries': n_entries
,
1283 'playlist': playlist
,
1284 'playlist_id': ie_result
.get('id'),
1285 'playlist_title': ie_result
.get('title'),
1286 'playlist_uploader': ie_result
.get('uploader'),
1287 'playlist_uploader_id': ie_result
.get('uploader_id'),
1288 'playlist_index': playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
,
1289 'extractor': ie_result
['extractor'],
1290 'webpage_url': ie_result
['webpage_url'],
1291 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1292 'extractor_key': ie_result
['extractor_key'],
1295 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1298 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1299 # TODO: skip failed (empty) entries?
1300 playlist_results
.append(entry_result
)
1301 ie_result
['entries'] = playlist_results
1302 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1305 @__handle_extraction_exceptions
1306 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1307 return self
.process_ie_result(
1308 entry
, download
=download
, extra_info
=extra_info
)
1310 def _build_format_filter(self
, filter_spec
):
1311 " Returns a function to filter the formats according to the filter_spec "
1321 operator_rex
= re
.compile(r
'''(?x)\s*
1322 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1323 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1324 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1326 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1327 m
= operator_rex
.search(filter_spec
)
1330 comparison_value
= int(m
.group('value'))
1332 comparison_value
= parse_filesize(m
.group('value'))
1333 if comparison_value
is None:
1334 comparison_value
= parse_filesize(m
.group('value') + 'B')
1335 if comparison_value
is None:
1337 'Invalid value %r in format specification %r' % (
1338 m
.group('value'), filter_spec
))
1339 op
= OPERATORS
[m
.group('op')]
1344 '^=': lambda attr
, value
: attr
.startswith(value
),
1345 '$=': lambda attr
, value
: attr
.endswith(value
),
1346 '*=': lambda attr
, value
: value
in attr
,
1348 str_operator_rex
= re
.compile(r
'''(?x)
1349 \s*(?P<key>[a-zA-Z0-9._-]+)
1350 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1351 \s*(?P<value>[a-zA-Z0-9._-]+)
1353 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1354 m
= str_operator_rex
.search(filter_spec
)
1356 comparison_value
= m
.group('value')
1357 str_op
= STR_OPERATORS
[m
.group('op')]
1358 if m
.group('negation'):
1359 op
= lambda attr
, value
: not str_op(attr
, value
)
1364 raise ValueError('Invalid filter specification %r' % filter_spec
)
1367 actual_value
= f
.get(m
.group('key'))
1368 if actual_value
is None:
1369 return m
.group('none_inclusive')
1370 return op(actual_value
, comparison_value
)
1373 def _default_format_spec(self
, info_dict
, download
=True):
1376 merger
= FFmpegMergerPP(self
)
1377 return merger
.available
and merger
.can_merge()
1380 not self
.params
.get('simulate', False)
1384 or info_dict
.get('is_live', False)
1385 or self
.outtmpl_dict
['default'] == '-'))
1388 'best/bestvideo+bestaudio'
1390 else 'bestvideo*+bestaudio/best'
1391 if not self
.params
.get('allow_multiple_audio_streams', False)
1392 else 'bestvideo+bestaudio/best')
1394 def build_format_selector(self
, format_spec
):
1395 def syntax_error(note
, start
):
1397 'Invalid format specification: '
1398 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1399 return SyntaxError(message
)
1401 PICKFIRST
= 'PICKFIRST'
1405 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1407 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1408 'video': self
.params
.get('allow_multiple_video_streams', False)}
1410 def _parse_filter(tokens
):
1412 for type, string
, start
, _
, _
in tokens
:
1413 if type == tokenize
.OP
and string
== ']':
1414 return ''.join(filter_parts
)
1416 filter_parts
.append(string
)
1418 def _remove_unused_ops(tokens
):
1419 # Remove operators that we don't use and join them with the surrounding strings
1420 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1421 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1422 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1423 for type, string
, start
, end
, line
in tokens
:
1424 if type == tokenize
.OP
and string
== '[':
1426 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1428 yield type, string
, start
, end
, line
1429 # everything inside brackets will be handled by _parse_filter
1430 for type, string
, start
, end
, line
in tokens
:
1431 yield type, string
, start
, end
, line
1432 if type == tokenize
.OP
and string
== ']':
1434 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1436 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1438 yield type, string
, start
, end
, line
1439 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1441 last_string
= string
1445 last_string
+= string
1447 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1449 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1451 current_selector
= None
1452 for type, string
, start
, _
, _
in tokens
:
1453 # ENCODING is only defined in python 3.x
1454 if type == getattr(tokenize
, 'ENCODING', None):
1456 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1457 current_selector
= FormatSelector(SINGLE
, string
, [])
1458 elif type == tokenize
.OP
:
1460 if not inside_group
:
1461 # ')' will be handled by the parentheses group
1462 tokens
.restore_last_token()
1464 elif inside_merge
and string
in ['/', ',']:
1465 tokens
.restore_last_token()
1467 elif inside_choice
and string
== ',':
1468 tokens
.restore_last_token()
1471 if not current_selector
:
1472 raise syntax_error('"," must follow a format selector', start
)
1473 selectors
.append(current_selector
)
1474 current_selector
= None
1476 if not current_selector
:
1477 raise syntax_error('"/" must follow a format selector', start
)
1478 first_choice
= current_selector
1479 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1480 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1482 if not current_selector
:
1483 current_selector
= FormatSelector(SINGLE
, 'best', [])
1484 format_filter
= _parse_filter(tokens
)
1485 current_selector
.filters
.append(format_filter
)
1487 if current_selector
:
1488 raise syntax_error('Unexpected "("', start
)
1489 group
= _parse_format_selection(tokens
, inside_group
=True)
1490 current_selector
= FormatSelector(GROUP
, group
, [])
1492 if not current_selector
:
1493 raise syntax_error('Unexpected "+"', start
)
1494 selector_1
= current_selector
1495 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1497 raise syntax_error('Expected a selector', start
)
1498 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1500 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1501 elif type == tokenize
.ENDMARKER
:
1503 if current_selector
:
1504 selectors
.append(current_selector
)
1507 def _build_selector_function(selector
):
1508 if isinstance(selector
, list): # ,
1509 fs
= [_build_selector_function(s
) for s
in selector
]
1511 def selector_function(ctx
):
1513 for format
in f(ctx
):
1515 return selector_function
1517 elif selector
.type == GROUP
: # ()
1518 selector_function
= _build_selector_function(selector
.selector
)
1520 elif selector
.type == PICKFIRST
: # /
1521 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1523 def selector_function(ctx
):
1525 picked_formats
= list(f(ctx
))
1527 return picked_formats
1530 elif selector
.type == SINGLE
: # atom
1531 format_spec
= selector
.selector
if selector
.selector
is not None else 'best'
1533 if format_spec
== 'all':
1534 def selector_function(ctx
):
1535 formats
= list(ctx
['formats'])
1541 format_fallback
= False
1542 format_spec_obj
= re
.match(r
'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec
)
1543 if format_spec_obj
is not None:
1544 format_idx
= 0 if format_spec_obj
.group(1)[0] == 'w' else -1
1545 format_type
= format_spec_obj
.group(2)[0] if format_spec_obj
.group(2) else False
1546 not_format_type
= 'v' if format_type
== 'a' else 'a'
1547 format_modified
= format_spec_obj
.group(3) is not None
1549 format_fallback
= not format_type
and not format_modified
# for b, w
1550 filter_f
= ((lambda f
: f
.get(format_type
+ 'codec') != 'none')
1551 if format_type
and format_modified
# bv*, ba*, wv*, wa*
1552 else (lambda f
: f
.get(not_format_type
+ 'codec') == 'none')
1553 if format_type
# bv, ba, wv, wa
1554 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
1555 if not format_modified
# b, w
1559 filter_f
= ((lambda f
: f
.get('ext') == format_spec
)
1560 if format_spec
in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1561 else (lambda f
: f
.get('format_id') == format_spec
)) # id
1563 def selector_function(ctx
):
1564 formats
= list(ctx
['formats'])
1567 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
1569 yield matches
[format_idx
]
1570 elif format_fallback
== 'force' or (format_fallback
and ctx
['incomplete_formats']):
1571 # for extractors with incomplete formats (audio only (soundcloud)
1572 # or video only (imgur)) best/worst will fallback to
1573 # best/worst {video,audio}-only format
1574 yield formats
[format_idx
]
1576 elif selector
.type == MERGE
: # +
1577 def _merge(formats_pair
):
1578 format_1
, format_2
= formats_pair
1581 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1582 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1584 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
1585 get_no_more
= {"video": False, "audio": False}
1586 for (i
, fmt_info
) in enumerate(formats_info
):
1587 for aud_vid
in ["audio", "video"]:
1588 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
1589 if get_no_more
[aud_vid
]:
1591 get_no_more
[aud_vid
] = True
1593 if len(formats_info
) == 1:
1594 return formats_info
[0]
1596 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1597 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1599 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1600 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1602 output_ext
= self
.params
.get('merge_output_format')
1605 output_ext
= the_only_video
['ext']
1606 elif the_only_audio
and not video_fmts
:
1607 output_ext
= the_only_audio
['ext']
1612 'requested_formats': formats_info
,
1613 'format': '+'.join(fmt_info
.get('format') for fmt_info
in formats_info
),
1614 'format_id': '+'.join(fmt_info
.get('format_id') for fmt_info
in formats_info
),
1620 'width': the_only_video
.get('width'),
1621 'height': the_only_video
.get('height'),
1622 'resolution': the_only_video
.get('resolution'),
1623 'fps': the_only_video
.get('fps'),
1624 'vcodec': the_only_video
.get('vcodec'),
1625 'vbr': the_only_video
.get('vbr'),
1626 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1631 'acodec': the_only_audio
.get('acodec'),
1632 'abr': the_only_audio
.get('abr'),
1637 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1639 def selector_function(ctx
):
1640 for pair
in itertools
.product(
1641 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1644 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1646 def final_selector(ctx
):
1647 ctx_copy
= copy
.deepcopy(ctx
)
1648 for _filter
in filters
:
1649 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
1650 return selector_function(ctx_copy
)
1651 return final_selector
1653 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1655 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1656 except tokenize
.TokenError
:
1657 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1659 class TokenIterator(object):
1660 def __init__(self
, tokens
):
1661 self
.tokens
= tokens
1668 if self
.counter
>= len(self
.tokens
):
1669 raise StopIteration()
1670 value
= self
.tokens
[self
.counter
]
1676 def restore_last_token(self
):
1679 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1680 return _build_selector_function(parsed_selector
)
1682 def _calc_headers(self
, info_dict
):
1683 res
= std_headers
.copy()
1685 add_headers
= info_dict
.get('http_headers')
1687 res
.update(add_headers
)
1689 cookies
= self
._calc
_cookies
(info_dict
)
1691 res
['Cookie'] = cookies
1693 if 'X-Forwarded-For' not in res
:
1694 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
1695 if x_forwarded_for_ip
:
1696 res
['X-Forwarded-For'] = x_forwarded_for_ip
1700 def _calc_cookies(self
, info_dict
):
1701 pr
= sanitized_Request(info_dict
['url'])
1702 self
.cookiejar
.add_cookie_header(pr
)
1703 return pr
.get_header('Cookie')
1705 def process_video_result(self
, info_dict
, download
=True):
1706 assert info_dict
.get('_type', 'video') == 'video'
1708 if 'id' not in info_dict
:
1709 raise ExtractorError('Missing "id" field in extractor result')
1710 if 'title' not in info_dict
:
1711 raise ExtractorError('Missing "title" field in extractor result')
1713 def report_force_conversion(field
, field_not
, conversion
):
1714 self
.report_warning(
1715 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1716 % (field
, field_not
, conversion
))
1718 def sanitize_string_field(info
, string_field
):
1719 field
= info
.get(string_field
)
1720 if field
is None or isinstance(field
, compat_str
):
1722 report_force_conversion(string_field
, 'a string', 'string')
1723 info
[string_field
] = compat_str(field
)
1725 def sanitize_numeric_fields(info
):
1726 for numeric_field
in self
._NUMERIC
_FIELDS
:
1727 field
= info
.get(numeric_field
)
1728 if field
is None or isinstance(field
, compat_numeric_types
):
1730 report_force_conversion(numeric_field
, 'numeric', 'int')
1731 info
[numeric_field
] = int_or_none(field
)
1733 sanitize_string_field(info_dict
, 'id')
1734 sanitize_numeric_fields(info_dict
)
1736 if 'playlist' not in info_dict
:
1737 # It isn't part of a playlist
1738 info_dict
['playlist'] = None
1739 info_dict
['playlist_index'] = None
1741 thumbnails
= info_dict
.get('thumbnails')
1742 if thumbnails
is None:
1743 thumbnail
= info_dict
.get('thumbnail')
1745 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
1747 thumbnails
.sort(key
=lambda t
: (
1748 t
.get('preference') if t
.get('preference') is not None else -1,
1749 t
.get('width') if t
.get('width') is not None else -1,
1750 t
.get('height') if t
.get('height') is not None else -1,
1751 t
.get('id') if t
.get('id') is not None else '', t
.get('url')))
1752 for i
, t
in enumerate(thumbnails
):
1753 t
['url'] = sanitize_url(t
['url'])
1754 if t
.get('width') and t
.get('height'):
1755 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
1756 if t
.get('id') is None:
1759 if self
.params
.get('list_thumbnails'):
1760 self
.list_thumbnails(info_dict
)
1763 thumbnail
= info_dict
.get('thumbnail')
1765 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
1767 info_dict
['thumbnail'] = thumbnails
[-1]['url']
1769 if 'display_id' not in info_dict
and 'id' in info_dict
:
1770 info_dict
['display_id'] = info_dict
['id']
1772 if info_dict
.get('upload_date') is None and info_dict
.get('timestamp') is not None:
1773 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1774 # see http://bugs.python.org/issue1646728)
1776 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
['timestamp'])
1777 info_dict
['upload_date'] = upload_date
.strftime('%Y%m%d')
1778 except (ValueError, OverflowError, OSError):
1781 # Auto generate title fields corresponding to the *_number fields when missing
1782 # in order to always have clean titles. This is very common for TV series.
1783 for field
in ('chapter', 'season', 'episode'):
1784 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
1785 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
1787 for cc_kind
in ('subtitles', 'automatic_captions'):
1788 cc
= info_dict
.get(cc_kind
)
1790 for _
, subtitle
in cc
.items():
1791 for subtitle_format
in subtitle
:
1792 if subtitle_format
.get('url'):
1793 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
1794 if subtitle_format
.get('ext') is None:
1795 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
1797 automatic_captions
= info_dict
.get('automatic_captions')
1798 subtitles
= info_dict
.get('subtitles')
1800 if self
.params
.get('listsubtitles', False):
1801 if 'automatic_captions' in info_dict
:
1802 self
.list_subtitles(
1803 info_dict
['id'], automatic_captions
, 'automatic captions')
1804 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
1807 info_dict
['requested_subtitles'] = self
.process_subtitles(
1808 info_dict
['id'], subtitles
, automatic_captions
)
1810 # We now pick which formats have to be downloaded
1811 if info_dict
.get('formats') is None:
1812 # There's only one format available
1813 formats
= [info_dict
]
1815 formats
= info_dict
['formats']
1818 raise ExtractorError('No video formats found!')
1820 def is_wellformed(f
):
1823 self
.report_warning(
1824 '"url" field is missing or empty - skipping format, '
1825 'there is an error in extractor')
1827 if isinstance(url
, bytes):
1828 sanitize_string_field(f
, 'url')
1831 # Filter out malformed formats for better extraction robustness
1832 formats
= list(filter(is_wellformed
, formats
))
1836 # We check that all the formats have the format and format_id fields
1837 for i
, format
in enumerate(formats
):
1838 sanitize_string_field(format
, 'format_id')
1839 sanitize_numeric_fields(format
)
1840 format
['url'] = sanitize_url(format
['url'])
1841 if not format
.get('format_id'):
1842 format
['format_id'] = compat_str(i
)
1844 # Sanitize format_id from characters used in format selector expression
1845 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
1846 format_id
= format
['format_id']
1847 if format_id
not in formats_dict
:
1848 formats_dict
[format_id
] = []
1849 formats_dict
[format_id
].append(format
)
1851 # Make sure all formats have unique format_id
1852 for format_id
, ambiguous_formats
in formats_dict
.items():
1853 if len(ambiguous_formats
) > 1:
1854 for i
, format
in enumerate(ambiguous_formats
):
1855 format
['format_id'] = '%s-%d' % (format_id
, i
)
1857 for i
, format
in enumerate(formats
):
1858 if format
.get('format') is None:
1859 format
['format'] = '{id} - {res}{note}'.format(
1860 id=format
['format_id'],
1861 res
=self
.format_resolution(format
),
1862 note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
1864 # Automatically determine file extension if missing
1865 if format
.get('ext') is None:
1866 format
['ext'] = determine_ext(format
['url']).lower()
1867 # Automatically determine protocol if missing (useful for format
1868 # selection purposes)
1869 if format
.get('protocol') is None:
1870 format
['protocol'] = determine_protocol(format
)
1871 # Add HTTP headers, so that external programs can use them from the
1873 full_format_info
= info_dict
.copy()
1874 full_format_info
.update(format
)
1875 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
1876 # Remove private housekeeping stuff
1877 if '__x_forwarded_for_ip' in info_dict
:
1878 del info_dict
['__x_forwarded_for_ip']
1880 # TODO Central sorting goes here
1882 if formats
[0] is not info_dict
:
1883 # only set the 'formats' fields if the original info_dict list them
1884 # otherwise we end up with a circular reference, the first (and unique)
1885 # element in the 'formats' field in info_dict is info_dict itself,
1886 # which can't be exported to json
1887 info_dict
['formats'] = formats
1888 if self
.params
.get('listformats'):
1889 self
.list_formats(info_dict
)
1892 req_format
= self
.params
.get('format')
1893 if req_format
is None:
1894 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
1895 if self
.params
.get('verbose'):
1896 self
.to_screen('[debug] Default format spec: %s' % req_format
)
1898 format_selector
= self
.build_format_selector(req_format
)
1900 # While in format selection we may need to have an access to the original
1901 # format set in order to calculate some metrics or do some processing.
1902 # For now we need to be able to guess whether original formats provided
1903 # by extractor are incomplete or not (i.e. whether extractor provides only
1904 # video-only or audio-only formats) for proper formats selection for
1905 # extractors with such incomplete formats (see
1906 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1907 # Since formats may be filtered during format selection and may not match
1908 # the original formats the results may be incorrect. Thus original formats
1909 # or pre-calculated metrics should be passed to format selection routines
1911 # We will pass a context object containing all necessary additional data
1912 # instead of just formats.
1913 # This fixes incorrect format selection issue (see
1914 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1915 incomplete_formats
= (
1916 # All formats are video-only or
1917 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
1918 # all formats are audio-only
1919 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
1923 'incomplete_formats': incomplete_formats
,
1926 formats_to_download
= list(format_selector(ctx
))
1927 if not formats_to_download
:
1928 raise ExtractorError('requested format not available',
1932 self
.to_screen('[info] Downloading format(s) %s' % ", ".join([f
['format_id'] for f
in formats_to_download
]))
1933 if len(formats_to_download
) > 1:
1934 self
.to_screen('[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
)))
1935 for format
in formats_to_download
:
1936 new_info
= dict(info_dict
)
1937 new_info
.update(format
)
1938 self
.process_info(new_info
)
1939 # We update the info dict with the best quality format (backwards compatibility)
1940 info_dict
.update(formats_to_download
[-1])
1943 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
1944 """Select the requested subtitles and their format"""
1946 if normal_subtitles
and self
.params
.get('writesubtitles'):
1947 available_subs
.update(normal_subtitles
)
1948 if automatic_captions
and self
.params
.get('writeautomaticsub'):
1949 for lang
, cap_info
in automatic_captions
.items():
1950 if lang
not in available_subs
:
1951 available_subs
[lang
] = cap_info
1953 if (not self
.params
.get('writesubtitles') and not
1954 self
.params
.get('writeautomaticsub') or not
1958 if self
.params
.get('allsubtitles', False):
1959 requested_langs
= available_subs
.keys()
1961 if self
.params
.get('subtitleslangs', False):
1962 requested_langs
= self
.params
.get('subtitleslangs')
1963 elif 'en' in available_subs
:
1964 requested_langs
= ['en']
1966 requested_langs
= [list(available_subs
.keys())[0]]
1968 formats_query
= self
.params
.get('subtitlesformat', 'best')
1969 formats_preference
= formats_query
.split('/') if formats_query
else []
1971 for lang
in requested_langs
:
1972 formats
= available_subs
.get(lang
)
1974 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
1976 for ext
in formats_preference
:
1980 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
1986 self
.report_warning(
1987 'No subtitle format found matching "%s" for language %s, '
1988 'using %s' % (formats_query
, lang
, f
['ext']))
1992 def __forced_printings(self
, info_dict
, filename
, incomplete
):
1993 def print_mandatory(field
):
1994 if (self
.params
.get('force%s' % field
, False)
1995 and (not incomplete
or info_dict
.get(field
) is not None)):
1996 self
.to_stdout(info_dict
[field
])
1998 def print_optional(field
):
1999 if (self
.params
.get('force%s' % field
, False)
2000 and info_dict
.get(field
) is not None):
2001 self
.to_stdout(info_dict
[field
])
2003 print_mandatory('title')
2004 print_mandatory('id')
2005 if self
.params
.get('forceurl', False) and not incomplete
:
2006 if info_dict
.get('requested_formats') is not None:
2007 for f
in info_dict
['requested_formats']:
2008 self
.to_stdout(f
['url'] + f
.get('play_path', ''))
2010 # For RTMP URLs, also include the playpath
2011 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', ''))
2012 print_optional('thumbnail')
2013 print_optional('description')
2014 if self
.params
.get('forcefilename', False) and filename
is not None:
2015 self
.to_stdout(filename
)
2016 if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None:
2017 self
.to_stdout(formatSeconds(info_dict
['duration']))
2018 print_mandatory('format')
2019 if self
.params
.get('forcejson', False):
2020 self
.to_stdout(json
.dumps(info_dict
))
2022 def process_info(self
, info_dict
):
2023 """Process a single resolved IE result."""
2025 assert info_dict
.get('_type', 'video') == 'video'
2027 info_dict
.setdefault('__postprocessors', [])
2029 max_downloads
= self
.params
.get('max_downloads')
2030 if max_downloads
is not None:
2031 if self
._num
_downloads
>= int(max_downloads
):
2032 raise MaxDownloadsReached()
2034 # TODO: backward compatibility, to be removed
2035 info_dict
['fulltitle'] = info_dict
['title']
2037 if 'format' not in info_dict
:
2038 info_dict
['format'] = info_dict
['ext']
2040 if self
._match
_entry
(info_dict
, incomplete
=False) is not None:
2043 self
._num
_downloads
+= 1
2045 info_dict
= self
.pre_process(info_dict
)
2047 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2048 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2050 skip_dl
= self
.params
.get('skip_download', False)
2053 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=False)
2055 if self
.params
.get('simulate', False):
2056 if self
.params
.get('force_write_download_archive', False):
2057 self
.record_download_archive(info_dict
)
2059 # Do nothing else if in simulate mode
2062 if full_filename
is None:
2065 def ensure_dir_exists(path
):
2066 return make_dir(path
, self
.report_error
)
2068 if not ensure_dir_exists(encodeFilename(full_filename
)):
2070 if not ensure_dir_exists(encodeFilename(temp_filename
)):
2073 if self
.params
.get('writedescription', False):
2074 descfn
= self
.prepare_filename(info_dict
, 'description')
2075 if not ensure_dir_exists(encodeFilename(descfn
)):
2077 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
2078 self
.to_screen('[info] Video description is already present')
2079 elif info_dict
.get('description') is None:
2080 self
.report_warning('There\'s no description to write.')
2083 self
.to_screen('[info] Writing video description to: ' + descfn
)
2084 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
2085 descfile
.write(info_dict
['description'])
2086 except (OSError, IOError):
2087 self
.report_error('Cannot write description file ' + descfn
)
2090 if self
.params
.get('writeannotations', False):
2091 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2092 if not ensure_dir_exists(encodeFilename(annofn
)):
2094 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2095 self
.to_screen('[info] Video annotations are already present')
2096 elif not info_dict
.get('annotations'):
2097 self
.report_warning('There are no annotations to write.')
2100 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2101 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2102 annofile
.write(info_dict
['annotations'])
2103 except (KeyError, TypeError):
2104 self
.report_warning('There are no annotations to write.')
2105 except (OSError, IOError):
2106 self
.report_error('Cannot write annotations file: ' + annofn
)
2109 def dl(name
, info
, subtitle
=False):
2110 fd
= get_suitable_downloader(info
, self
.params
)(self
, self
.params
)
2111 for ph
in self
._progress
_hooks
:
2112 fd
.add_progress_hook(ph
)
2113 if self
.params
.get('verbose'):
2114 self
.to_screen('[debug] Invoking downloader on %r' % info
.get('url'))
2115 return fd
.download(name
, info
, subtitle
)
2117 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
2118 self
.params
.get('writeautomaticsub')])
2120 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
2121 # subtitles download errors are already managed as troubles in relevant IE
2122 # that way it will silently go on when used with unsupporting IE
2123 subtitles
= info_dict
['requested_subtitles']
2124 # ie = self.get_info_extractor(info_dict['extractor_key'])
2125 for sub_lang
, sub_info
in subtitles
.items():
2126 sub_format
= sub_info
['ext']
2127 sub_fn
= self
.prepare_filename(info_dict
, 'subtitle')
2128 sub_filename
= subtitles_filename(
2129 temp_filename
if not skip_dl
else sub_fn
,
2130 sub_lang
, sub_format
, info_dict
.get('ext'))
2131 sub_filename_final
= subtitles_filename(sub_fn
, sub_lang
, sub_format
, info_dict
.get('ext'))
2132 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(sub_filename
)):
2133 self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
))
2134 files_to_move
[sub_filename
] = sub_filename_final
2136 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
2137 if sub_info
.get('data') is not None:
2139 # Use newline='' to prevent conversion of newline characters
2140 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2141 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
:
2142 subfile
.write(sub_info
['data'])
2143 files_to_move
[sub_filename
] = sub_filename_final
2144 except (OSError, IOError):
2145 self
.report_error('Cannot write subtitles file ' + sub_filename
)
2149 dl(sub_filename
, sub_info
, subtitle
=True)
2151 if self.params.get('sleep_interval_subtitles', False):
2152 dl(sub_filename, sub_info)
2154 sub_data = ie._request_webpage(
2155 sub_info['url'], info_dict['id'], note=False).read()
2156 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2157 subfile.write(sub_data)
2159 files_to_move
[sub_filename
] = sub_filename_final
2160 except (ExtractorError
, IOError, OSError, ValueError, compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2161 self
.report_warning('Unable to download subtitle for "%s": %s' %
2162 (sub_lang
, error_to_compat_str(err
)))
2166 if self
.params
.get('convertsubtitles', False):
2167 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2168 filename_real_ext
= os
.path
.splitext(full_filename
)[1][1:]
2170 os
.path
.splitext(full_filename
)[0]
2171 if filename_real_ext
== info_dict
['ext']
2173 afilename
= '%s.%s' % (filename_wo_ext
, self
.params
.get('convertsubtitles'))
2174 # if subconv.available:
2175 # info_dict['__postprocessors'].append(subconv)
2176 if os
.path
.exists(encodeFilename(afilename
)):
2178 '[download] %s has already been downloaded and '
2179 'converted' % afilename
)
2182 self
.post_process(full_filename
, info_dict
, files_to_move
)
2183 except PostProcessingError
as err
:
2184 self
.report_error('Postprocessing: %s' % str(err
))
2187 if self
.params
.get('writeinfojson', False):
2188 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2189 if not ensure_dir_exists(encodeFilename(infofn
)):
2191 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
2192 self
.to_screen('[info] Video metadata is already present')
2194 self
.to_screen('[info] Writing video metadata as JSON to: ' + infofn
)
2196 write_json_file(self
.filter_requested_info(info_dict
), infofn
)
2197 except (OSError, IOError):
2198 self
.report_error('Cannot write video metadata to JSON file ' + infofn
)
2200 info_dict
['__infojson_filename'] = infofn
2202 thumbfn
= self
.prepare_filename(info_dict
, 'thumbnail')
2203 thumb_fn_temp
= temp_filename
if not skip_dl
else thumbfn
2204 for thumb_ext
in self
._write
_thumbnails
(info_dict
, thumb_fn_temp
):
2205 thumb_filename_temp
= replace_extension(thumb_fn_temp
, thumb_ext
, info_dict
.get('ext'))
2206 thumb_filename
= replace_extension(thumbfn
, thumb_ext
, info_dict
.get('ext'))
2207 files_to_move
[thumb_filename_temp
] = info_dict
['__thumbnail_filename'] = thumb_filename
2209 # Write internet shortcut files
2210 url_link
= webloc_link
= desktop_link
= False
2211 if self
.params
.get('writelink', False):
2212 if sys
.platform
== "darwin": # macOS.
2214 elif sys
.platform
.startswith("linux"):
2216 else: # if sys.platform in ['win32', 'cygwin']:
2218 if self
.params
.get('writeurllink', False):
2220 if self
.params
.get('writewebloclink', False):
2222 if self
.params
.get('writedesktoplink', False):
2225 if url_link
or webloc_link
or desktop_link
:
2226 if 'webpage_url' not in info_dict
:
2227 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2229 ascii_url
= iri_to_uri(info_dict
['webpage_url'])
2231 def _write_link_file(extension
, template
, newline
, embed_filename
):
2232 linkfn
= replace_extension(full_filename
, extension
, info_dict
.get('ext'))
2233 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2234 self
.to_screen('[info] Internet shortcut is already present')
2237 self
.to_screen('[info] Writing internet shortcut to: ' + linkfn
)
2238 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8', newline
=newline
) as linkfile
:
2239 template_vars
= {'url': ascii_url}
2241 template_vars
['filename'] = linkfn
[:-(len(extension
) + 1)]
2242 linkfile
.write(template
% template_vars
)
2243 except (OSError, IOError):
2244 self
.report_error('Cannot write internet shortcut ' + linkfn
)
2249 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE
, '\r\n', embed_filename
=False):
2252 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE
, '\n', embed_filename
=False):
2255 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE
, '\n', embed_filename
=True):
2259 must_record_download_archive
= False
2263 def existing_file(*filepaths
):
2264 ext
= info_dict
.get('ext')
2265 final_ext
= self
.params
.get('final_ext', ext
)
2267 for file in orderedSet(filepaths
):
2268 if final_ext
!= ext
:
2269 converted
= replace_extension(file, final_ext
, ext
)
2270 if os
.path
.exists(encodeFilename(converted
)):
2271 existing_files
.append(converted
)
2272 if os
.path
.exists(encodeFilename(file)):
2273 existing_files
.append(file)
2275 if not existing_files
or self
.params
.get('overwrites', False):
2276 for file in orderedSet(existing_files
):
2277 self
.report_file_delete(file)
2278 os
.remove(encodeFilename(file))
2281 self
.report_file_already_downloaded(existing_files
[0])
2282 info_dict
['ext'] = os
.path
.splitext(existing_files
[0])[1][1:]
2283 return existing_files
[0]
2286 if info_dict
.get('requested_formats') is not None:
2288 merger
= FFmpegMergerPP(self
)
2289 if not merger
.available
:
2291 self
.report_warning('You have requested multiple '
2292 'formats but ffmpeg is not installed.'
2293 ' The formats won\'t be merged.')
2295 postprocessors
= [merger
]
2297 def compatible_formats(formats
):
2298 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2299 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2300 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2301 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2305 exts
= set(format
.get('ext') for format
in formats
)
2307 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2310 for ext_sets
in COMPATIBLE_EXTS
:
2311 if ext_sets
.issuperset(exts
):
2313 # TODO: Check acodec/vcodec
2316 requested_formats
= info_dict
['requested_formats']
2317 old_ext
= info_dict
['ext']
2318 if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
):
2319 info_dict
['ext'] = 'mkv'
2320 self
.report_warning(
2321 'Requested formats are incompatible for merge and will be merged into mkv.')
2323 def correct_ext(filename
):
2324 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2326 os
.path
.splitext(filename
)[0]
2327 if filename_real_ext
== old_ext
2329 return '%s.%s' % (filename_wo_ext
, info_dict
['ext'])
2331 # Ensure filename always has a correct extension for successful merge
2332 full_filename
= correct_ext(full_filename
)
2333 temp_filename
= correct_ext(temp_filename
)
2334 dl_filename
= existing_file(full_filename
, temp_filename
)
2335 if dl_filename
is None:
2336 for f
in requested_formats
:
2337 new_info
= dict(info_dict
)
2339 fname
= prepend_extension(
2340 self
.prepare_filename(new_info
, 'temp'),
2341 'f%s' % f
['format_id'], new_info
['ext'])
2342 if not ensure_dir_exists(fname
):
2344 downloaded
.append(fname
)
2345 partial_success
, real_download
= dl(fname
, new_info
)
2346 success
= success
and partial_success
2347 info_dict
['__postprocessors'] = postprocessors
2348 info_dict
['__files_to_merge'] = downloaded
2349 # Even if there were no downloads, it is being merged only now
2350 info_dict
['__real_download'] = True
2352 # Just a single file
2353 dl_filename
= existing_file(full_filename
, temp_filename
)
2354 if dl_filename
is None:
2355 success
, real_download
= dl(temp_filename
, info_dict
)
2356 info_dict
['__real_download'] = real_download
2358 dl_filename
= dl_filename
or temp_filename
2359 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2361 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2362 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2364 except (OSError, IOError) as err
:
2365 raise UnavailableVideoError(err
)
2366 except (ContentTooShortError
, ) as err
:
2367 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2370 if success
and full_filename
!= '-':
2372 fixup_policy
= self
.params
.get('fixup')
2373 if fixup_policy
is None:
2374 fixup_policy
= 'detect_or_warn'
2376 INSTALL_FFMPEG_MESSAGE
= 'Install ffmpeg to fix this automatically.'
2378 stretched_ratio
= info_dict
.get('stretched_ratio')
2379 if stretched_ratio
is not None and stretched_ratio
!= 1:
2380 if fixup_policy
== 'warn':
2381 self
.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2382 info_dict
['id'], stretched_ratio
))
2383 elif fixup_policy
== 'detect_or_warn':
2384 stretched_pp
= FFmpegFixupStretchedPP(self
)
2385 if stretched_pp
.available
:
2386 info_dict
['__postprocessors'].append(stretched_pp
)
2388 self
.report_warning(
2389 '%s: Non-uniform pixel ratio (%s). %s'
2390 % (info_dict
['id'], stretched_ratio
, INSTALL_FFMPEG_MESSAGE
))
2392 assert fixup_policy
in ('ignore', 'never')
2394 if (info_dict
.get('requested_formats') is None
2395 and info_dict
.get('container') == 'm4a_dash'
2396 and info_dict
.get('ext') == 'm4a'):
2397 if fixup_policy
== 'warn':
2398 self
.report_warning(
2399 '%s: writing DASH m4a. '
2400 'Only some players support this container.'
2402 elif fixup_policy
== 'detect_or_warn':
2403 fixup_pp
= FFmpegFixupM4aPP(self
)
2404 if fixup_pp
.available
:
2405 info_dict
['__postprocessors'].append(fixup_pp
)
2407 self
.report_warning(
2408 '%s: writing DASH m4a. '
2409 'Only some players support this container. %s'
2410 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2412 assert fixup_policy
in ('ignore', 'never')
2414 if (info_dict
.get('protocol') == 'm3u8_native'
2415 or info_dict
.get('protocol') == 'm3u8'
2416 and self
.params
.get('hls_prefer_native')):
2417 if fixup_policy
== 'warn':
2418 self
.report_warning('%s: malformed AAC bitstream detected.' % (
2420 elif fixup_policy
== 'detect_or_warn':
2421 fixup_pp
= FFmpegFixupM3u8PP(self
)
2422 if fixup_pp
.available
:
2423 info_dict
['__postprocessors'].append(fixup_pp
)
2425 self
.report_warning(
2426 '%s: malformed AAC bitstream detected. %s'
2427 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2429 assert fixup_policy
in ('ignore', 'never')
2432 self
.post_process(dl_filename
, info_dict
, files_to_move
)
2433 except PostProcessingError
as err
:
2434 self
.report_error('Postprocessing: %s' % str(err
))
2437 for ph
in self
._post
_hooks
:
2439 except Exception as err
:
2440 self
.report_error('post hooks: %s' % str(err
))
2442 must_record_download_archive
= True
2444 if must_record_download_archive
or self
.params
.get('force_write_download_archive', False):
2445 self
.record_download_archive(info_dict
)
2446 max_downloads
= self
.params
.get('max_downloads')
2447 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
2448 raise MaxDownloadsReached()
2450 def download(self
, url_list
):
2451 """Download a given list of URLs."""
2452 outtmpl
= self
.outtmpl_dict
['default']
2453 if (len(url_list
) > 1
2455 and '%' not in outtmpl
2456 and self
.params
.get('max_downloads') != 1):
2457 raise SameFileError(outtmpl
)
2459 for url
in url_list
:
2461 # It also downloads the videos
2462 res
= self
.extract_info(
2463 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2464 except UnavailableVideoError
:
2465 self
.report_error('unable to download video')
2466 except MaxDownloadsReached
:
2467 self
.to_screen('[info] Maximum number of downloaded files reached')
2469 except ExistingVideoReached
:
2470 self
.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2472 except RejectedVideoReached
:
2473 self
.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2476 if self
.params
.get('dump_single_json', False):
2477 self
.to_stdout(json
.dumps(res
))
2479 return self
._download
_retcode
2481 def download_with_info_file(self
, info_filename
):
2482 with contextlib
.closing(fileinput
.FileInput(
2483 [info_filename
], mode
='r',
2484 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2485 # FileInput doesn't have a read method, we can't call json.load
2486 info
= self
.filter_requested_info(json
.loads('\n'.join(f
)))
2488 self
.process_ie_result(info
, download
=True)
2489 except DownloadError
:
2490 webpage_url
= info
.get('webpage_url')
2491 if webpage_url
is not None:
2492 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2493 return self
.download([webpage_url
])
2496 return self
._download
_retcode
2499 def filter_requested_info(info_dict
):
2500 fields_to_remove
= ('requested_formats', 'requested_subtitles')
2502 (k
, v
) for k
, v
in info_dict
.items()
2503 if (k
[0] != '_' or k
== '_type') and k
not in fields_to_remove
)
2505 def run_pp(self
, pp
, infodict
, files_to_move
={}):
2506 files_to_delete
= []
2507 files_to_delete
, infodict
= pp
.run(infodict
)
2508 if not files_to_delete
:
2509 return files_to_move
, infodict
2511 if self
.params
.get('keepvideo', False):
2512 for f
in files_to_delete
:
2513 files_to_move
.setdefault(f
, '')
2515 for old_filename
in set(files_to_delete
):
2516 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
2518 os
.remove(encodeFilename(old_filename
))
2519 except (IOError, OSError):
2520 self
.report_warning('Unable to remove downloaded original file')
2521 if old_filename
in files_to_move
:
2522 del files_to_move
[old_filename
]
2523 return files_to_move
, infodict
2525 def pre_process(self
, ie_info
):
2526 info
= dict(ie_info
)
2527 for pp
in self
._pps
['beforedl']:
2528 info
= self
.run_pp(pp
, info
)[1]
2531 def post_process(self
, filename
, ie_info
, files_to_move
={}):
2532 """Run all the postprocessors on the given file."""
2533 info
= dict(ie_info
)
2534 info
['filepath'] = filename
2535 info
['__files_to_move'] = {}
2537 for pp
in ie_info
.get('__postprocessors', []) + self
._pps
['normal']:
2538 files_to_move
, info
= self
.run_pp(pp
, info
, files_to_move
)
2539 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
, files_to_move
), info
)[1]
2540 for pp
in self
._pps
['aftermove']:
2541 info
= self
.run_pp(pp
, info
, {})[1]
2543 def _make_archive_id(self
, info_dict
):
2544 video_id
= info_dict
.get('id')
2547 # Future-proof against any change in case
2548 # and backwards compatibility with prior versions
2549 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
2550 if extractor
is None:
2551 url
= str_or_none(info_dict
.get('url'))
2554 # Try to find matching extractor for the URL and take its ie_key
2555 for ie
in self
._ies
:
2556 if ie
.suitable(url
):
2557 extractor
= ie
.ie_key()
2561 return '%s %s' % (extractor
.lower(), video_id
)
2563 def in_download_archive(self
, info_dict
):
2564 fn
= self
.params
.get('download_archive')
2568 vid_id
= self
._make
_archive
_id
(info_dict
)
2570 return False # Incomplete video information
2572 return vid_id
in self
.archive
2574 def record_download_archive(self
, info_dict
):
2575 fn
= self
.params
.get('download_archive')
2578 vid_id
= self
._make
_archive
_id
(info_dict
)
2580 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
2581 archive_file
.write(vid_id
+ '\n')
2582 self
.archive
.add(vid_id
)
2585 def format_resolution(format
, default
='unknown'):
2586 if format
.get('vcodec') == 'none':
2588 if format
.get('resolution') is not None:
2589 return format
['resolution']
2590 if format
.get('height') is not None:
2591 if format
.get('width') is not None:
2592 res
= '%sx%s' % (format
['width'], format
['height'])
2594 res
= '%sp' % format
['height']
2595 elif format
.get('width') is not None:
2596 res
= '%dx?' % format
['width']
2601 def _format_note(self
, fdict
):
2603 if fdict
.get('ext') in ['f4f', 'f4m']:
2604 res
+= '(unsupported) '
2605 if fdict
.get('language'):
2608 res
+= '[%s] ' % fdict
['language']
2609 if fdict
.get('format_note') is not None:
2610 res
+= fdict
['format_note'] + ' '
2611 if fdict
.get('tbr') is not None:
2612 res
+= '%4dk ' % fdict
['tbr']
2613 if fdict
.get('container') is not None:
2616 res
+= '%s container' % fdict
['container']
2617 if (fdict
.get('vcodec') is not None
2618 and fdict
.get('vcodec') != 'none'):
2621 res
+= fdict
['vcodec']
2622 if fdict
.get('vbr') is not None:
2624 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
2626 if fdict
.get('vbr') is not None:
2627 res
+= '%4dk' % fdict
['vbr']
2628 if fdict
.get('fps') is not None:
2631 res
+= '%sfps' % fdict
['fps']
2632 if fdict
.get('acodec') is not None:
2635 if fdict
['acodec'] == 'none':
2638 res
+= '%-5s' % fdict
['acodec']
2639 elif fdict
.get('abr') is not None:
2643 if fdict
.get('abr') is not None:
2644 res
+= '@%3dk' % fdict
['abr']
2645 if fdict
.get('asr') is not None:
2646 res
+= ' (%5dHz)' % fdict
['asr']
2647 if fdict
.get('filesize') is not None:
2650 res
+= format_bytes(fdict
['filesize'])
2651 elif fdict
.get('filesize_approx') is not None:
2654 res
+= '~' + format_bytes(fdict
['filesize_approx'])
2657 def _format_note_table(self
, f
):
2658 def join_fields(*vargs
):
2659 return ', '.join((val
for val
in vargs
if val
!= ''))
2662 'UNSUPPORTED' if f
.get('ext') in ('f4f', 'f4m') else '',
2663 format_field(f
, 'language', '[%s]'),
2664 format_field(f
, 'format_note'),
2665 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
2666 format_field(f
, 'asr', '%5dHz'))
2668 def list_formats(self
, info_dict
):
2669 formats
= info_dict
.get('formats', [info_dict
])
2670 new_format
= self
.params
.get('listformats_table', False)
2674 format_field(f
, 'format_id'),
2675 format_field(f
, 'ext'),
2676 self
.format_resolution(f
),
2677 format_field(f
, 'fps', '%d'),
2679 format_field(f
, 'filesize', ' %s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~%s', func
=format_bytes
),
2680 format_field(f
, 'tbr', '%4dk'),
2681 f
.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2683 format_field(f
, 'vcodec', default
='unknown').replace('none', ''),
2684 format_field(f
, 'vbr', '%4dk'),
2685 format_field(f
, 'acodec', default
='unknown').replace('none', ''),
2686 format_field(f
, 'abr', '%3dk'),
2687 format_field(f
, 'asr', '%5dHz'),
2688 self
._format
_note
_table
(f
)]
2690 if f
.get('preference') is None or f
['preference'] >= -1000]
2691 header_line
= ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2692 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2696 format_field(f
, 'format_id'),
2697 format_field(f
, 'ext'),
2698 self
.format_resolution(f
),
2699 self
._format
_note
(f
)]
2701 if f
.get('preference') is None or f
['preference'] >= -1000]
2702 header_line
= ['format code', 'extension', 'resolution', 'note']
2704 # if len(formats) > 1:
2705 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2707 '[info] Available formats for %s:\n%s' % (info_dict
['id'], render_table(
2711 extraGap
=(0 if new_format
else 1),
2712 hideEmpty
=new_format
)))
2714 def list_thumbnails(self
, info_dict
):
2715 thumbnails
= info_dict
.get('thumbnails')
2717 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
2721 '[info] Thumbnails for %s:' % info_dict
['id'])
2722 self
.to_screen(render_table(
2723 ['ID', 'width', 'height', 'URL'],
2724 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
2726 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
2728 self
.to_screen('%s has no %s' % (video_id
, name
))
2731 'Available %s for %s:' % (name
, video_id
))
2732 self
.to_screen(render_table(
2733 ['Language', 'formats'],
2734 [[lang
, ', '.join(f
['ext'] for f
in reversed(formats
))]
2735 for lang
, formats
in subtitles
.items()]))
2737 def urlopen(self
, req
):
2738 """ Start an HTTP download """
2739 if isinstance(req
, compat_basestring
):
2740 req
= sanitized_Request(req
)
2741 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
2743 def print_debug_header(self
):
2744 if not self
.params
.get('verbose'):
2747 if type('') is not compat_str
:
2748 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2749 self
.report_warning(
2750 'Your Python is broken! Update to a newer and supported version')
2752 stdout_encoding
= getattr(
2753 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
2755 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2756 locale
.getpreferredencoding(),
2757 sys
.getfilesystemencoding(),
2759 self
.get_encoding()))
2760 write_string(encoding_str
, encoding
=None)
2762 self
._write
_string
('[debug] yt-dlp version %s\n' % __version__
)
2764 self
._write
_string
('[debug] Lazy loading extractors enabled\n')
2767 '[debug] Plugin Extractors: %s\n' % [ie
.ie_key() for ie
in _PLUGIN_CLASSES
])
2769 sp
= subprocess
.Popen(
2770 ['git', 'rev-parse', '--short', 'HEAD'],
2771 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
2772 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
2773 out
, err
= process_communicate_or_kill(sp
)
2774 out
= out
.decode().strip()
2775 if re
.match('[0-9a-f]+', out
):
2776 self
._write
_string
('[debug] Git HEAD: %s\n' % out
)
2783 def python_implementation():
2784 impl_name
= platform
.python_implementation()
2785 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
2786 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
2789 self
._write
_string
('[debug] Python version %s (%s) - %s\n' % (
2790 platform
.python_version(), python_implementation(),
2793 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
2794 exe_versions
['rtmpdump'] = rtmpdump_version()
2795 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
2796 exe_str
= ', '.join(
2798 for exe
, v
in sorted(exe_versions
.items())
2803 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
2806 for handler
in self
._opener
.handlers
:
2807 if hasattr(handler
, 'proxies'):
2808 proxy_map
.update(handler
.proxies
)
2809 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
2811 if self
.params
.get('call_home', False):
2812 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2813 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
2815 latest_version
= self
.urlopen(
2816 'https://yt-dl.org/latest/version').read().decode('utf-8')
2817 if version_tuple(latest_version
) > version_tuple(__version__
):
2818 self
.report_warning(
2819 'You are using an outdated version (newest version: %s)! '
2820 'See https://yt-dl.org/update if you need help updating.' %
2823 def _setup_opener(self
):
2824 timeout_val
= self
.params
.get('socket_timeout')
2825 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
2827 opts_cookiefile
= self
.params
.get('cookiefile')
2828 opts_proxy
= self
.params
.get('proxy')
2830 if opts_cookiefile
is None:
2831 self
.cookiejar
= compat_cookiejar
.CookieJar()
2833 opts_cookiefile
= expand_path(opts_cookiefile
)
2834 self
.cookiejar
= YoutubeDLCookieJar(opts_cookiefile
)
2835 if os
.access(opts_cookiefile
, os
.R_OK
):
2836 self
.cookiejar
.load(ignore_discard
=True, ignore_expires
=True)
2838 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
2839 if opts_proxy
is not None:
2840 if opts_proxy
== '':
2843 proxies
= {'http': opts_proxy, 'https': opts_proxy}
2845 proxies
= compat_urllib_request
.getproxies()
2846 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2847 if 'http' in proxies
and 'https' not in proxies
:
2848 proxies
['https'] = proxies
['http']
2849 proxy_handler
= PerRequestProxyHandler(proxies
)
2851 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
2852 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
2853 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
2854 redirect_handler
= YoutubeDLRedirectHandler()
2855 data_handler
= compat_urllib_request_DataHandler()
2857 # When passing our own FileHandler instance, build_opener won't add the
2858 # default FileHandler and allows us to disable the file protocol, which
2859 # can be used for malicious purposes (see
2860 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2861 file_handler
= compat_urllib_request
.FileHandler()
2863 def file_open(*args
, **kwargs
):
2864 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2865 file_handler
.file_open
= file_open
2867 opener
= compat_urllib_request
.build_opener(
2868 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
2870 # Delete the default user-agent header, which would otherwise apply in
2871 # cases where our custom HTTP handler doesn't come into play
2872 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2873 opener
.addheaders
= []
2874 self
._opener
= opener
2876 def encode(self
, s
):
2877 if isinstance(s
, bytes):
2878 return s
# Already encoded
2881 return s
.encode(self
.get_encoding())
2882 except UnicodeEncodeError as err
:
2883 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
2886 def get_encoding(self
):
2887 encoding
= self
.params
.get('encoding')
2888 if encoding
is None:
2889 encoding
= preferredencoding()
2892 def _write_thumbnails(self
, info_dict
, filename
): # return the extensions
2893 if self
.params
.get('writethumbnail', False):
2894 thumbnails
= info_dict
.get('thumbnails')
2896 thumbnails
= [thumbnails
[-1]]
2897 elif self
.params
.get('write_all_thumbnails', False):
2898 thumbnails
= info_dict
.get('thumbnails') or []
2903 for t
in thumbnails
:
2904 thumb_ext
= determine_ext(t
['url'], 'jpg')
2905 suffix
= '%s.' % t
['id'] if len(thumbnails
) > 1 else ''
2906 thumb_display_id
= '%s ' % t
['id'] if len(thumbnails
) > 1 else ''
2907 t
['filename'] = thumb_filename
= replace_extension(filename
, suffix
+ thumb_ext
, info_dict
.get('ext'))
2909 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(thumb_filename
)):
2910 ret
.append(suffix
+ thumb_ext
)
2911 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
2912 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2914 self
.to_screen('[%s] %s: Downloading thumbnail %s...' %
2915 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2917 uf
= self
.urlopen(t
['url'])
2918 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
2919 shutil
.copyfileobj(uf
, thumbf
)
2920 ret
.append(suffix
+ thumb_ext
)
2921 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2922 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
2923 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2924 self
.report_warning('Unable to download thumbnail "%s": %s' %
2925 (t
['url'], error_to_compat_str(err
)))