4 from __future__
import absolute_import
, unicode_literals
29 from string
import ascii_letters
34 compat_get_terminal_size
,
40 compat_tokenize_tokenize
,
42 compat_urllib_request
,
43 compat_urllib_request_DataHandler
,
54 DOT_DESKTOP_LINK_TEMPLATE
,
55 DOT_URL_LINK_TEMPLATE
,
56 DOT_WEBLOC_LINK_TEMPLATE
,
76 PerRequestProxyHandler
,
81 register_socks_protocols
,
93 UnavailableVideoError
,
99 YoutubeDLCookieProcessor
,
101 YoutubeDLRedirectHandler
,
102 process_communicate_or_kill
,
104 from .cache
import Cache
105 from .extractor
import get_info_extractor
, gen_extractor_classes
, _LAZY_LOADER
106 from .extractor
.openload
import PhantomJSwrapper
107 from .downloader
import get_suitable_downloader
108 from .downloader
.rtmp
import rtmpdump_version
109 from .postprocessor
import (
112 FFmpegFixupStretchedPP
,
115 FFmpegSubtitlesConvertorPP
,
118 from .version
import __version__
120 if compat_os_name
== 'nt':
124 class YoutubeDL(object):
127 YoutubeDL objects are the ones responsible of downloading the
128 actual video file and writing it to disk if the user has requested
129 it, among some other tasks. In most cases there should be one per
130 program. As, given a video URL, the downloader doesn't know how to
131 extract all the needed information, task that InfoExtractors do, it
132 has to pass the URL to one of them.
134 For this, YoutubeDL objects have a method that allows
135 InfoExtractors to be registered in a given order. When it is passed
136 a URL, the YoutubeDL object handles it to the first InfoExtractor it
137 finds that reports being able to handle it. The InfoExtractor extracts
138 all the information about the video or videos the URL refers to, and
139 YoutubeDL process the extracted information, possibly using a File
140 Downloader to download the video.
142 YoutubeDL objects accept a lot of parameters. In order not to saturate
143 the object constructor with arguments, it receives a dictionary of
144 options instead. These options are available through the params
145 attribute for the InfoExtractors to use. The YoutubeDL also
146 registers itself as the downloader in charge for the InfoExtractors
147 that are added to it, so this is a "mutual registration".
151 username: Username for authentication purposes.
152 password: Password for authentication purposes.
153 videopassword: Password for accessing a video.
154 ap_mso: Adobe Pass multiple-system operator identifier.
155 ap_username: Multiple-system operator account username.
156 ap_password: Multiple-system operator account password.
157 usenetrc: Use netrc for authentication instead.
158 verbose: Print additional info to stdout.
159 quiet: Do not print messages to stdout.
160 no_warnings: Do not print out anything for warnings.
161 forceurl: Force printing final URL.
162 forcetitle: Force printing title.
163 forceid: Force printing ID.
164 forcethumbnail: Force printing thumbnail URL.
165 forcedescription: Force printing description.
166 forcefilename: Force printing final filename.
167 forceduration: Force printing duration.
168 forcejson: Force printing info_dict as JSON.
169 dump_single_json: Force printing the info_dict of the whole playlist
170 (or video) as a single JSON line.
171 force_write_download_archive: Force writing download archive regardless of
172 'skip_download' or 'simulate'.
173 simulate: Do not download the video files.
174 format: Video format code. see "FORMAT SELECTION" for more details.
175 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
176 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
177 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
178 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
179 outtmpl: Template for output names.
180 restrictfilenames: Do not allow "&" and spaces in file names.
181 trim_file_name: Limit length of filename (extension excluded).
182 ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
183 force_generic_extractor: Force downloader to use the generic extractor
184 overwrites: Overwrite all video and metadata files if True,
185 overwrite only non-video files if None
186 and don't overwrite any file if False
187 playliststart: Playlist item to start at.
188 playlistend: Playlist item to end at.
189 playlist_items: Specific indices of playlist to download.
190 playlistreverse: Download playlist items in reverse order.
191 playlistrandom: Download playlist items in random order.
192 matchtitle: Download only matching titles.
193 rejecttitle: Reject downloads for matching titles.
194 logger: Log messages to a logging.Logger instance.
195 logtostderr: Log messages to stderr instead of stdout.
196 writedescription: Write the video description to a .description file
197 writeinfojson: Write the video description to a .info.json file
198 writeannotations: Write the video annotations to a .annotations.xml file
199 writethumbnail: Write the thumbnail image to a file
200 write_all_thumbnails: Write all thumbnail formats to files
201 writelink: Write an internet shortcut file, depending on the
202 current platform (.url/.webloc/.desktop)
203 writeurllink: Write a Windows internet shortcut file (.url)
204 writewebloclink: Write a macOS internet shortcut file (.webloc)
205 writedesktoplink: Write a Linux internet shortcut file (.desktop)
206 writesubtitles: Write the video subtitles to a file
207 writeautomaticsub: Write the automatically generated subtitles to a file
208 allsubtitles: Downloads all the subtitles of the video
209 (requires writesubtitles or writeautomaticsub)
210 listsubtitles: Lists all available subtitles for the video
211 subtitlesformat: The format code for subtitles
212 subtitleslangs: List of languages of the subtitles to download
213 keepvideo: Keep the video file after post-processing
214 daterange: A DateRange object, download only if the upload_date is in the range.
215 skip_download: Skip the actual download of the video file
216 cachedir: Location of the cache files in the filesystem.
217 False to disable filesystem cache.
218 noplaylist: Download single video instead of a playlist if in doubt.
219 age_limit: An integer representing the user's age in years.
220 Unsuitable videos for the given age are skipped.
221 min_views: An integer representing the minimum view count the video
222 must have in order to not be skipped.
223 Videos without view count information are always
224 downloaded. None for no limit.
225 max_views: An integer representing the maximum view count.
226 Videos that are more popular than that are not
228 Videos without view count information are always
229 downloaded. None for no limit.
230 download_archive: File name of a file where all downloads are recorded.
231 Videos already present in the file are not downloaded
233 break_on_existing: Stop the download process after attempting to download a file that's
235 cookiefile: File name where cookies should be read from and dumped to.
236 nocheckcertificate:Do not verify SSL certificates
237 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
238 At the moment, this is only supported by YouTube.
239 proxy: URL of the proxy server to use
240 geo_verification_proxy: URL of the proxy to use for IP address verification
241 on geo-restricted sites.
242 socket_timeout: Time to wait for unresponsive hosts, in seconds
243 bidi_workaround: Work around buggy terminals without bidirectional text
244 support, using fridibi
245 debug_printtraffic:Print out sent and received HTTP traffic
246 include_ads: Download ads as well
247 default_search: Prepend this string if an input url is not valid.
248 'auto' for elaborate guessing
249 encoding: Use this encoding instead of the system-specified.
250 extract_flat: Do not resolve URLs, return the immediate result.
251 Pass in 'in_playlist' to only show this behavior for
253 postprocessors: A list of dictionaries, each with an entry
254 * key: The name of the postprocessor. See
255 youtube_dlc/postprocessor/__init__.py for a list.
256 as well as any further keyword arguments for the
258 post_hooks: A list of functions that get called as the final step
259 for each video file, after all postprocessors have been
260 called. The filename will be passed as the only argument.
261 progress_hooks: A list of functions that get called on download
262 progress, with a dictionary with the entries
263 * status: One of "downloading", "error", or "finished".
264 Check this first and ignore unknown values.
266 If status is one of "downloading", or "finished", the
267 following properties may also be present:
268 * filename: The final filename (always present)
269 * tmpfilename: The filename we're currently writing to
270 * downloaded_bytes: Bytes on disk
271 * total_bytes: Size of the whole file, None if unknown
272 * total_bytes_estimate: Guess of the eventual file size,
274 * elapsed: The number of seconds since download started.
275 * eta: The estimated time in seconds, None if unknown
276 * speed: The download speed in bytes/second, None if
278 * fragment_index: The counter of the currently
279 downloaded video fragment.
280 * fragment_count: The number of fragments (= individual
281 files that will be merged)
283 Progress hooks are guaranteed to be called at least once
284 (with status "finished") if the download is successful.
285 merge_output_format: Extension to use when merging formats.
286 fixup: Automatically correct known faults of the file.
288 - "never": do nothing
289 - "warn": only emit a warning
290 - "detect_or_warn": check whether we can do anything
291 about it, warn otherwise (default)
292 source_address: Client-side IP address to bind to.
293 call_home: Boolean, true iff we are allowed to contact the
294 youtube-dlc servers for debugging.
295 sleep_interval: Number of seconds to sleep before each download when
296 used alone or a lower bound of a range for randomized
297 sleep before each download (minimum possible number
298 of seconds to sleep) when used along with
300 max_sleep_interval:Upper bound of a range for randomized sleep before each
301 download (maximum possible number of seconds to sleep).
302 Must only be used along with sleep_interval.
303 Actual sleep time will be a random float from range
304 [sleep_interval; max_sleep_interval].
305 listformats: Print an overview of available video formats and exit.
306 list_thumbnails: Print a table of all thumbnails and exit.
307 match_filter: A function that gets called with the info_dict of
309 If it returns a message, the video is ignored.
310 If it returns None, the video is downloaded.
311 match_filter_func in utils.py is one example for this.
312 no_color: Do not emit color codes in output.
313 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
316 Two-letter ISO 3166-2 country code that will be used for
317 explicit geographic restriction bypassing via faking
318 X-Forwarded-For HTTP header
320 IP range in CIDR notation that will be used similarly to
323 The following options determine which downloader is picked:
324 external_downloader: Executable of the external downloader to call.
325 None or unset for standard (built-in) downloader.
326 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
327 if True, otherwise use ffmpeg/avconv if False, otherwise
328 use downloader suggested by extractor if None.
330 The following parameters are not used by YoutubeDL itself, they are used by
331 the downloader (see youtube_dlc/downloader/common.py):
332 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
333 noresizebuffer, retries, continuedl, noprogress, consoletitle,
334 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
337 The following options are used by the post processors:
338 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
339 otherwise prefer ffmpeg.
340 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
341 to the binary or its containing directory.
342 postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
343 of additional command-line arguments for the postprocessor.
344 Use 'default' as the name for arguments to passed to all PP.
346 The following options are used by the Youtube extractor:
347 youtube_include_dash_manifest: If True (default), DASH manifests and related
348 data will be downloaded and processed by extractor.
349 You can reduce network I/O by disabling it if you don't
353 _NUMERIC_FIELDS
= set((
354 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
355 'timestamp', 'upload_year', 'upload_month', 'upload_day',
356 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
357 'average_rating', 'comment_count', 'age_limit',
358 'start_time', 'end_time',
359 'chapter_number', 'season_number', 'episode_number',
360 'track_number', 'disc_number', 'release_year',
367 _download_retcode
= None
368 _num_downloads
= None
371 def __init__(self
, params
=None, auto_init
=True):
372 """Create a FileDownloader object with the given options."""
376 self
._ies
_instances
= {}
378 self
._post
_hooks
= []
379 self
._progress
_hooks
= []
380 self
._download
_retcode
= 0
381 self
._num
_downloads
= 0
382 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
383 self
._err
_file
= sys
.stderr
386 'nocheckcertificate': False,
388 self
.params
.update(params
)
389 self
.cache
= Cache(self
)
392 """Preload the archive, if any is specified"""
393 def preload_download_archive(self
):
394 fn
= self
.params
.get('download_archive')
398 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
399 for line
in archive_file
:
400 self
.archive
.add(line
.strip())
401 except IOError as ioe
:
402 if ioe
.errno
!= errno
.ENOENT
:
407 def check_deprecated(param
, option
, suggestion
):
408 if self
.params
.get(param
) is not None:
410 '%s is deprecated. Use %s instead.' % (option
, suggestion
))
414 if self
.params
.get('verbose'):
415 self
.to_stdout('[debug] Loading archive file %r' % self
.params
.get('download_archive'))
417 preload_download_archive(self
)
419 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
420 if self
.params
.get('geo_verification_proxy') is None:
421 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
423 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
424 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
425 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
427 if params
.get('bidi_workaround', False):
430 master
, slave
= pty
.openpty()
431 width
= compat_get_terminal_size().columns
435 width_args
= ['-w', str(width
)]
437 stdin
=subprocess
.PIPE
,
439 stderr
=self
._err
_file
)
441 self
._output
_process
= subprocess
.Popen(
442 ['bidiv'] + width_args
, **sp_kwargs
445 self
._output
_process
= subprocess
.Popen(
446 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
447 self
._output
_channel
= os
.fdopen(master
, 'rb')
448 except OSError as ose
:
449 if ose
.errno
== errno
.ENOENT
:
450 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
454 if (sys
.platform
!= 'win32'
455 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
456 and not params
.get('restrictfilenames', False)):
457 # Unicode filesystem API will throw errors (#1474, #13027)
459 'Assuming --restrict-filenames since file system encoding '
460 'cannot encode all characters. '
461 'Set the LC_ALL environment variable to fix this.')
462 self
.params
['restrictfilenames'] = True
464 if isinstance(params
.get('outtmpl'), bytes):
466 'Parameter outtmpl is bytes, but should be a unicode string. '
467 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
472 self
.print_debug_header()
473 self
.add_default_info_extractors()
475 for pp_def_raw
in self
.params
.get('postprocessors', []):
476 pp_class
= get_postprocessor(pp_def_raw
['key'])
477 pp_def
= dict(pp_def_raw
)
479 pp
= pp_class(self
, **compat_kwargs(pp_def
))
480 self
.add_post_processor(pp
)
482 for ph
in self
.params
.get('post_hooks', []):
483 self
.add_post_hook(ph
)
485 for ph
in self
.params
.get('progress_hooks', []):
486 self
.add_progress_hook(ph
)
488 register_socks_protocols()
490 def warn_if_short_id(self
, argv
):
491 # short YouTube ID starting with dash?
493 i
for i
, a
in enumerate(argv
)
494 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
498 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
499 + ['--'] + [argv
[i
] for i
in idxs
]
502 'Long argument string detected. '
503 'Use -- to separate parameters and URLs, like this:\n%s\n' %
504 args_to_str(correct_argv
))
506 def add_info_extractor(self
, ie
):
507 """Add an InfoExtractor object to the end of the list."""
509 if not isinstance(ie
, type):
510 self
._ies
_instances
[ie
.ie_key()] = ie
511 ie
.set_downloader(self
)
513 def get_info_extractor(self
, ie_key
):
515 Get an instance of an IE with name ie_key, it will try to get one from
516 the _ies list, if there's no instance it will create a new one and add
517 it to the extractor list.
519 ie
= self
._ies
_instances
.get(ie_key
)
521 ie
= get_info_extractor(ie_key
)()
522 self
.add_info_extractor(ie
)
525 def add_default_info_extractors(self
):
527 Add the InfoExtractors returned by gen_extractors to the end of the list
529 for ie
in gen_extractor_classes():
530 self
.add_info_extractor(ie
)
532 def add_post_processor(self
, pp
):
533 """Add a PostProcessor object to the end of the chain."""
535 pp
.set_downloader(self
)
537 def add_post_hook(self
, ph
):
538 """Add the post hook"""
539 self
._post
_hooks
.append(ph
)
541 def add_progress_hook(self
, ph
):
542 """Add the progress hook (currently only for the file downloader)"""
543 self
._progress
_hooks
.append(ph
)
545 def _bidi_workaround(self
, message
):
546 if not hasattr(self
, '_output_channel'):
549 assert hasattr(self
, '_output_process')
550 assert isinstance(message
, compat_str
)
551 line_count
= message
.count('\n') + 1
552 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
553 self
._output
_process
.stdin
.flush()
554 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
555 for _
in range(line_count
))
556 return res
[:-len('\n')]
558 def to_screen(self
, message
, skip_eol
=False):
559 """Print message to stdout if not in quiet mode."""
560 return self
.to_stdout(message
, skip_eol
, check_quiet
=True)
562 def _write_string(self
, s
, out
=None):
563 write_string(s
, out
=out
, encoding
=self
.params
.get('encoding'))
565 def to_stdout(self
, message
, skip_eol
=False, check_quiet
=False):
566 """Print message to stdout if not in quiet mode."""
567 if self
.params
.get('logger'):
568 self
.params
['logger'].debug(message
)
569 elif not check_quiet
or not self
.params
.get('quiet', False):
570 message
= self
._bidi
_workaround
(message
)
571 terminator
= ['\n', ''][skip_eol
]
572 output
= message
+ terminator
574 self
._write
_string
(output
, self
._screen
_file
)
576 def to_stderr(self
, message
):
577 """Print message to stderr."""
578 assert isinstance(message
, compat_str
)
579 if self
.params
.get('logger'):
580 self
.params
['logger'].error(message
)
582 message
= self
._bidi
_workaround
(message
)
583 output
= message
+ '\n'
584 self
._write
_string
(output
, self
._err
_file
)
586 def to_console_title(self
, message
):
587 if not self
.params
.get('consoletitle', False):
589 if compat_os_name
== 'nt':
590 if ctypes
.windll
.kernel32
.GetConsoleWindow():
591 # c_wchar_p() might not be necessary if `message` is
592 # already of type unicode()
593 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
594 elif 'TERM' in os
.environ
:
595 self
._write
_string
('\033[0;%s\007' % message
, self
._screen
_file
)
597 def save_console_title(self
):
598 if not self
.params
.get('consoletitle', False):
600 if self
.params
.get('simulate', False):
602 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
603 # Save the title on stack
604 self
._write
_string
('\033[22;0t', self
._screen
_file
)
606 def restore_console_title(self
):
607 if not self
.params
.get('consoletitle', False):
609 if self
.params
.get('simulate', False):
611 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
612 # Restore the title from stack
613 self
._write
_string
('\033[23;0t', self
._screen
_file
)
616 self
.save_console_title()
619 def __exit__(self
, *args
):
620 self
.restore_console_title()
622 if self
.params
.get('cookiefile') is not None:
623 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
625 def trouble(self
, message
=None, tb
=None):
626 """Determine action to take when a download problem appears.
628 Depending on if the downloader has been configured to ignore
629 download errors or not, this method may throw an exception or
630 not when errors are found, after printing the message.
632 tb, if given, is additional traceback information.
634 if message
is not None:
635 self
.to_stderr(message
)
636 if self
.params
.get('verbose'):
638 if sys
.exc_info()[0]: # if .trouble has been called from an except block
640 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
641 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
642 tb
+= encode_compat_str(traceback
.format_exc())
644 tb_data
= traceback
.format_list(traceback
.extract_stack())
645 tb
= ''.join(tb_data
)
647 if not self
.params
.get('ignoreerrors', False):
648 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
649 exc_info
= sys
.exc_info()[1].exc_info
651 exc_info
= sys
.exc_info()
652 raise DownloadError(message
, exc_info
)
653 self
._download
_retcode
= 1
655 def report_warning(self
, message
):
657 Print the message to stderr, it will be prefixed with 'WARNING:'
658 If stderr is a tty file the 'WARNING:' will be colored
660 if self
.params
.get('logger') is not None:
661 self
.params
['logger'].warning(message
)
663 if self
.params
.get('no_warnings'):
665 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
666 _msg_header
= '\033[0;33mWARNING:\033[0m'
668 _msg_header
= 'WARNING:'
669 warning_message
= '%s %s' % (_msg_header
, message
)
670 self
.to_stderr(warning_message
)
672 def report_error(self
, message
, tb
=None):
674 Do the same as trouble, but prefixes the message with 'ERROR:', colored
675 in red if stderr is a tty file.
677 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
678 _msg_header
= '\033[0;31mERROR:\033[0m'
680 _msg_header
= 'ERROR:'
681 error_message
= '%s %s' % (_msg_header
, message
)
682 self
.trouble(error_message
, tb
)
684 def report_file_already_downloaded(self
, file_name
):
685 """Report file has already been fully downloaded."""
687 self
.to_screen('[download] %s has already been downloaded' % file_name
)
688 except UnicodeEncodeError:
689 self
.to_screen('[download] The file has already been downloaded')
691 def report_file_delete(self
, file_name
):
692 """Report that existing file will be deleted."""
694 self
.to_screen('Deleting already existent file %s' % file_name
)
695 except UnicodeEncodeError:
696 self
.to_screen('Deleting already existent file')
698 def prepare_filename(self
, info_dict
):
699 """Generate the output filename."""
701 template_dict
= dict(info_dict
)
703 template_dict
['epoch'] = int(time
.time())
704 autonumber_size
= self
.params
.get('autonumber_size')
705 if autonumber_size
is None:
707 template_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
708 if template_dict
.get('resolution') is None:
709 if template_dict
.get('width') and template_dict
.get('height'):
710 template_dict
['resolution'] = '%dx%d' % (template_dict
['width'], template_dict
['height'])
711 elif template_dict
.get('height'):
712 template_dict
['resolution'] = '%sp' % template_dict
['height']
713 elif template_dict
.get('width'):
714 template_dict
['resolution'] = '%dx?' % template_dict
['width']
716 sanitize
= lambda k
, v
: sanitize_filename(
718 restricted
=self
.params
.get('restrictfilenames'),
719 is_id
=(k
== 'id' or k
.endswith('_id')))
720 template_dict
= dict((k
, v
if isinstance(v
, compat_numeric_types
) else sanitize(k
, v
))
721 for k
, v
in template_dict
.items()
722 if v
is not None and not isinstance(v
, (list, tuple, dict)))
723 template_dict
= collections
.defaultdict(lambda: 'NA', template_dict
)
725 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
727 # For fields playlist_index and autonumber convert all occurrences
728 # of %(field)s to %(field)0Nd for backward compatibility
729 field_size_compat_map
= {
730 'playlist_index': len(str(template_dict
['n_entries'])),
731 'autonumber': autonumber_size
,
733 FIELD_SIZE_COMPAT_RE
= r
'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
734 mobj
= re
.search(FIELD_SIZE_COMPAT_RE
, outtmpl
)
737 FIELD_SIZE_COMPAT_RE
,
738 r
'%%(\1)0%dd' % field_size_compat_map
[mobj
.group('field')],
741 # Missing numeric fields used together with integer presentation types
742 # in format specification will break the argument substitution since
743 # string 'NA' is returned for missing fields. We will patch output
744 # template for missing fields to meet string presentation type.
745 for numeric_field
in self
._NUMERIC
_FIELDS
:
746 if numeric_field
not in template_dict
:
747 # As of [1] format syntax is:
748 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
749 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
753 \({0}\) # mapping key
754 (?:[#0\-+ ]+)? # conversion flags (optional)
755 (?:\d+)? # minimum field width (optional)
756 (?:\.\d+)? # precision (optional)
757 [hlL]? # length modifier (optional)
758 [diouxXeEfFgGcrs%] # conversion type
761 FORMAT_RE
.format(numeric_field
),
762 r
'%({0})s'.format(numeric_field
), outtmpl
)
764 # expand_path translates '%%' into '%' and '$$' into '$'
765 # correspondingly that is not what we want since we need to keep
766 # '%%' intact for template dict substitution step. Working around
767 # with boundary-alike separator hack.
768 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
769 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
771 # outtmpl should be expand_path'ed before template dict substitution
772 # because meta fields may contain env variables we don't want to
773 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
774 # title "Hello $PATH", we don't want `$PATH` to be expanded.
775 filename
= expand_path(outtmpl
).replace(sep
, '') % template_dict
777 # https://github.com/blackjack4494/youtube-dlc/issues/85
778 trim_file_name
= self
.params
.get('trim_file_name', False)
780 fn_groups
= filename
.rsplit('.')
783 if len(fn_groups
) > 2:
784 sub_ext
= fn_groups
[-2]
785 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
787 # Temporary fix for #4787
788 # 'Treat' all problem characters by passing filename through preferredencoding
789 # to workaround encoding issues with subprocess on python2 @ Windows
790 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
791 filename
= encodeFilename(filename
, True).decode(preferredencoding())
792 return sanitize_path(filename
)
793 except ValueError as err
:
794 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
797 def _match_entry(self
, info_dict
, incomplete
):
798 """ Returns None if the file should be downloaded """
800 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
801 if 'title' in info_dict
:
802 # This can happen when we're just evaluating the playlist
803 title
= info_dict
['title']
804 matchtitle
= self
.params
.get('matchtitle', False)
806 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
807 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
808 rejecttitle
= self
.params
.get('rejecttitle', False)
810 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
811 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
812 date
= info_dict
.get('upload_date')
814 dateRange
= self
.params
.get('daterange', DateRange())
815 if date
not in dateRange
:
816 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
817 view_count
= info_dict
.get('view_count')
818 if view_count
is not None:
819 min_views
= self
.params
.get('min_views')
820 if min_views
is not None and view_count
< min_views
:
821 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
822 max_views
= self
.params
.get('max_views')
823 if max_views
is not None and view_count
> max_views
:
824 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
825 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
826 return 'Skipping "%s" because it is age restricted' % video_title
827 if self
.in_download_archive(info_dict
):
828 return '%s has already been recorded in archive' % video_title
831 match_filter
= self
.params
.get('match_filter')
832 if match_filter
is not None:
833 ret
= match_filter(info_dict
)
840 def add_extra_info(info_dict
, extra_info
):
841 '''Set the keys from extra_info in info dict if they are missing'''
842 for key
, value
in extra_info
.items():
843 info_dict
.setdefault(key
, value
)
845 def extract_info(self
, url
, download
=True, ie_key
=None, info_dict
=None, extra_info
={},
846 process
=True, force_generic_extractor
=False):
848 Returns a list with a dictionary for each video we find.
849 If 'download', also downloads the videos.
850 extra_info is a dict containing the extra values to add to each result
853 if not ie_key
and force_generic_extractor
:
857 ies
= [self
.get_info_extractor(ie_key
)]
862 if not ie
.suitable(url
):
866 ie
= self
.get_info_extractor(ie_key
)
868 self
.report_warning('The program functionality for this site has been marked as broken, '
869 'and will probably not work.')
872 temp_id
= ie
.extract_id(url
) if callable(getattr(ie
, 'extract_id', None)) else ie
._match
_id
(url
)
873 except (AssertionError, IndexError, AttributeError):
875 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
876 self
.to_screen("[%s] %s: has already been recorded in archive" % (
880 return self
.__extract
_info
(url
, ie
, download
, extra_info
, process
, info_dict
)
883 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
885 def __handle_extraction_exceptions(func
):
886 def wrapper(self
, *args
, **kwargs
):
888 return func(self
, *args
, **kwargs
)
889 except GeoRestrictedError
as e
:
892 msg
+= '\nThis video is available in %s.' % ', '.join(
893 map(ISO3166Utils
.short2full
, e
.countries
))
894 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
895 self
.report_error(msg
)
896 except ExtractorError
as e
: # An error we somewhat expected
897 self
.report_error(compat_str(e
), e
.format_traceback())
898 except MaxDownloadsReached
:
900 except Exception as e
:
901 if self
.params
.get('ignoreerrors', False):
902 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
907 @__handle_extraction_exceptions
908 def __extract_info(self
, url
, ie
, download
, extra_info
, process
, info_dict
):
909 ie_result
= ie
.extract(url
)
910 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
912 if isinstance(ie_result
, list):
913 # Backwards compatibility: old IE result format
915 '_type': 'compat_list',
916 'entries': ie_result
,
919 if info_dict
.get('id'):
920 ie_result
['id'] = info_dict
['id']
921 if info_dict
.get('title'):
922 ie_result
['title'] = info_dict
['title']
923 self
.add_default_extra_info(ie_result
, ie
, url
)
925 return self
.process_ie_result(ie_result
, download
, extra_info
)
929 def add_default_extra_info(self
, ie_result
, ie
, url
):
930 self
.add_extra_info(ie_result
, {
931 'extractor': ie
.IE_NAME
,
934 formatSeconds(ie_result
['duration'], '-')
935 if ie_result
.get('duration', None) is not None
937 'webpage_url_basename': url_basename(url
),
938 'extractor_key': ie
.ie_key(),
941 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
943 Take the result of the ie(may be modified) and resolve all unresolved
944 references (URLs, playlist items).
946 It will also download the videos if 'download'.
947 Returns the resolved ie_result.
949 result_type
= ie_result
.get('_type', 'video')
951 if result_type
in ('url', 'url_transparent'):
952 ie_result
['url'] = sanitize_url(ie_result
['url'])
953 extract_flat
= self
.params
.get('extract_flat', False)
954 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
955 or extract_flat
is True):
956 self
.__forced
_printings
(
957 ie_result
, self
.prepare_filename(ie_result
),
961 if result_type
== 'video':
962 self
.add_extra_info(ie_result
, extra_info
)
963 return self
.process_video_result(ie_result
, download
=download
)
964 elif result_type
== 'url':
965 # We have to add extra_info to the results because it may be
966 # contained in a playlist
967 return self
.extract_info(ie_result
['url'],
968 download
, info_dict
=ie_result
,
969 ie_key
=ie_result
.get('ie_key'),
970 extra_info
=extra_info
)
971 elif result_type
== 'url_transparent':
972 # Use the information from the embedding page
973 info
= self
.extract_info(
974 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
975 extra_info
=extra_info
, download
=False, process
=False)
977 # extract_info may return None when ignoreerrors is enabled and
978 # extraction failed with an error, don't crash and return early
983 force_properties
= dict(
984 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
985 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
986 if f
in force_properties
:
987 del force_properties
[f
]
988 new_result
= info
.copy()
989 new_result
.update(force_properties
)
991 # Extracted info may not be a video result (i.e.
992 # info.get('_type', 'video') != video) but rather an url or
993 # url_transparent. In such cases outer metadata (from ie_result)
994 # should be propagated to inner one (info). For this to happen
995 # _type of info should be overridden with url_transparent. This
996 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
997 if new_result
.get('_type') == 'url':
998 new_result
['_type'] = 'url_transparent'
1000 return self
.process_ie_result(
1001 new_result
, download
=download
, extra_info
=extra_info
)
1002 elif result_type
in ('playlist', 'multi_video'):
1003 # We process each entry in the playlist
1004 playlist
= ie_result
.get('title') or ie_result
.get('id')
1005 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1007 playlist_results
= []
1009 playliststart
= self
.params
.get('playliststart', 1) - 1
1010 playlistend
= self
.params
.get('playlistend')
1011 # For backwards compatibility, interpret -1 as whole list
1012 if playlistend
== -1:
1015 playlistitems_str
= self
.params
.get('playlist_items')
1016 playlistitems
= None
1017 if playlistitems_str
is not None:
1018 def iter_playlistitems(format
):
1019 for string_segment
in format
.split(','):
1020 if '-' in string_segment
:
1021 start
, end
= string_segment
.split('-')
1022 for item
in range(int(start
), int(end
) + 1):
1025 yield int(string_segment
)
1026 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1028 ie_entries
= ie_result
['entries']
1030 def make_playlistitems_entries(list_ie_entries
):
1031 num_entries
= len(list_ie_entries
)
1033 list_ie_entries
[i
- 1] for i
in playlistitems
1034 if -num_entries
<= i
- 1 < num_entries
]
1036 def report_download(num_entries
):
1038 '[%s] playlist %s: Downloading %d videos' %
1039 (ie_result
['extractor'], playlist
, num_entries
))
1041 if isinstance(ie_entries
, list):
1042 n_all_entries
= len(ie_entries
)
1044 entries
= make_playlistitems_entries(ie_entries
)
1046 entries
= ie_entries
[playliststart
:playlistend
]
1047 n_entries
= len(entries
)
1049 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1050 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
1051 elif isinstance(ie_entries
, PagedList
):
1054 for item
in playlistitems
:
1055 entries
.extend(ie_entries
.getslice(
1059 entries
= ie_entries
.getslice(
1060 playliststart
, playlistend
)
1061 n_entries
= len(entries
)
1062 report_download(n_entries
)
1065 entries
= make_playlistitems_entries(list(itertools
.islice(
1066 ie_entries
, 0, max(playlistitems
))))
1068 entries
= list(itertools
.islice(
1069 ie_entries
, playliststart
, playlistend
))
1070 n_entries
= len(entries
)
1071 report_download(n_entries
)
1073 if self
.params
.get('playlistreverse', False):
1074 entries
= entries
[::-1]
1076 if self
.params
.get('playlistrandom', False):
1077 random
.shuffle(entries
)
1079 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1081 for i
, entry
in enumerate(entries
, 1):
1082 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1083 # This __x_forwarded_for_ip thing is a bit ugly but requires
1086 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1088 'n_entries': n_entries
,
1089 'playlist': playlist
,
1090 'playlist_id': ie_result
.get('id'),
1091 'playlist_title': ie_result
.get('title'),
1092 'playlist_uploader': ie_result
.get('uploader'),
1093 'playlist_uploader_id': ie_result
.get('uploader_id'),
1094 'playlist_index': playlistitems
[i
- 1] if playlistitems
else i
+ playliststart
,
1095 'extractor': ie_result
['extractor'],
1096 'webpage_url': ie_result
['webpage_url'],
1097 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1098 'extractor_key': ie_result
['extractor_key'],
1101 reason
= self
._match
_entry
(entry
, incomplete
=True)
1102 if reason
is not None:
1103 if reason
.endswith('has already been recorded in the archive') and self
.params
.get('break_on_existing'):
1104 print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1107 self
.to_screen('[download] ' + reason
)
1110 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1111 # TODO: skip failed (empty) entries?
1112 playlist_results
.append(entry_result
)
1113 ie_result
['entries'] = playlist_results
1114 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1116 elif result_type
== 'compat_list':
1117 self
.report_warning(
1118 'Extractor %s returned a compat_list result. '
1119 'It needs to be updated.' % ie_result
.get('extractor'))
1122 self
.add_extra_info(
1125 'extractor': ie_result
['extractor'],
1126 'webpage_url': ie_result
['webpage_url'],
1127 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1128 'extractor_key': ie_result
['extractor_key'],
1132 ie_result
['entries'] = [
1133 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1134 for r
in ie_result
['entries']
1138 raise Exception('Invalid result type: %s' % result_type
)
1140 @__handle_extraction_exceptions
1141 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1142 return self
.process_ie_result(
1143 entry
, download
=download
, extra_info
=extra_info
)
1145 def _build_format_filter(self
, filter_spec
):
1146 " Returns a function to filter the formats according to the filter_spec "
1156 operator_rex
= re
.compile(r
'''(?x)\s*
1157 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1158 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1159 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1161 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1162 m
= operator_rex
.search(filter_spec
)
1165 comparison_value
= int(m
.group('value'))
1167 comparison_value
= parse_filesize(m
.group('value'))
1168 if comparison_value
is None:
1169 comparison_value
= parse_filesize(m
.group('value') + 'B')
1170 if comparison_value
is None:
1172 'Invalid value %r in format specification %r' % (
1173 m
.group('value'), filter_spec
))
1174 op
= OPERATORS
[m
.group('op')]
1179 '^=': lambda attr
, value
: attr
.startswith(value
),
1180 '$=': lambda attr
, value
: attr
.endswith(value
),
1181 '*=': lambda attr
, value
: value
in attr
,
1183 str_operator_rex
= re
.compile(r
'''(?x)
1184 \s*(?P<key>[a-zA-Z0-9._-]+)
1185 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1186 \s*(?P<value>[a-zA-Z0-9._-]+)
1188 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1189 m
= str_operator_rex
.search(filter_spec
)
1191 comparison_value
= m
.group('value')
1192 str_op
= STR_OPERATORS
[m
.group('op')]
1193 if m
.group('negation'):
1194 op
= lambda attr
, value
: not str_op(attr
, value
)
1199 raise ValueError('Invalid filter specification %r' % filter_spec
)
1202 actual_value
= f
.get(m
.group('key'))
1203 if actual_value
is None:
1204 return m
.group('none_inclusive')
1205 return op(actual_value
, comparison_value
)
1208 def _default_format_spec(self
, info_dict
, download
=True):
1211 merger
= FFmpegMergerPP(self
)
1212 return merger
.available
and merger
.can_merge()
1215 not self
.params
.get('simulate', False)
1219 or info_dict
.get('is_live', False)
1220 or self
.params
.get('outtmpl', DEFAULT_OUTTMPL
) == '-'))
1223 'best/bestvideo+bestaudio'
1225 else 'bestvideo*+bestaudio/best'
1226 if not self
.params
.get('allow_multiple_audio_streams', False)
1227 else 'bestvideo+bestaudio/best')
1229 def build_format_selector(self
, format_spec
):
1230 def syntax_error(note
, start
):
1232 'Invalid format specification: '
1233 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1234 return SyntaxError(message
)
1236 PICKFIRST
= 'PICKFIRST'
1240 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1242 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1243 'video': self
.params
.get('allow_multiple_video_streams', False)}
1245 def _parse_filter(tokens
):
1247 for type, string
, start
, _
, _
in tokens
:
1248 if type == tokenize
.OP
and string
== ']':
1249 return ''.join(filter_parts
)
1251 filter_parts
.append(string
)
1253 def _remove_unused_ops(tokens
):
1254 # Remove operators that we don't use and join them with the surrounding strings
1255 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1256 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1257 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1258 for type, string
, start
, end
, line
in tokens
:
1259 if type == tokenize
.OP
and string
== '[':
1261 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1263 yield type, string
, start
, end
, line
1264 # everything inside brackets will be handled by _parse_filter
1265 for type, string
, start
, end
, line
in tokens
:
1266 yield type, string
, start
, end
, line
1267 if type == tokenize
.OP
and string
== ']':
1269 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1271 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1273 yield type, string
, start
, end
, line
1274 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1276 last_string
= string
1280 last_string
+= string
1282 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1284 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1286 current_selector
= None
1287 for type, string
, start
, _
, _
in tokens
:
1288 # ENCODING is only defined in python 3.x
1289 if type == getattr(tokenize
, 'ENCODING', None):
1291 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1292 current_selector
= FormatSelector(SINGLE
, string
, [])
1293 elif type == tokenize
.OP
:
1295 if not inside_group
:
1296 # ')' will be handled by the parentheses group
1297 tokens
.restore_last_token()
1299 elif inside_merge
and string
in ['/', ',']:
1300 tokens
.restore_last_token()
1302 elif inside_choice
and string
== ',':
1303 tokens
.restore_last_token()
1306 if not current_selector
:
1307 raise syntax_error('"," must follow a format selector', start
)
1308 selectors
.append(current_selector
)
1309 current_selector
= None
1311 if not current_selector
:
1312 raise syntax_error('"/" must follow a format selector', start
)
1313 first_choice
= current_selector
1314 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1315 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1317 if not current_selector
:
1318 current_selector
= FormatSelector(SINGLE
, 'best', [])
1319 format_filter
= _parse_filter(tokens
)
1320 current_selector
.filters
.append(format_filter
)
1322 if current_selector
:
1323 raise syntax_error('Unexpected "("', start
)
1324 group
= _parse_format_selection(tokens
, inside_group
=True)
1325 current_selector
= FormatSelector(GROUP
, group
, [])
1327 if not current_selector
:
1328 raise syntax_error('Unexpected "+"', start
)
1329 selector_1
= current_selector
1330 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1332 raise syntax_error('Expected a selector', start
)
1333 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1335 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1336 elif type == tokenize
.ENDMARKER
:
1338 if current_selector
:
1339 selectors
.append(current_selector
)
1342 def _build_selector_function(selector
):
1343 if isinstance(selector
, list): # ,
1344 fs
= [_build_selector_function(s
) for s
in selector
]
1346 def selector_function(ctx
):
1348 for format
in f(ctx
):
1350 return selector_function
1352 elif selector
.type == GROUP
: # ()
1353 selector_function
= _build_selector_function(selector
.selector
)
1355 elif selector
.type == PICKFIRST
: # /
1356 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1358 def selector_function(ctx
):
1360 picked_formats
= list(f(ctx
))
1362 return picked_formats
1365 elif selector
.type == SINGLE
: # atom
1366 format_spec
= selector
.selector
if selector
.selector
is not None else 'best'
1368 if format_spec
== 'all':
1369 def selector_function(ctx
):
1370 formats
= list(ctx
['formats'])
1376 format_fallback
= False
1377 format_spec_obj
= re
.match(r
'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec
)
1378 if format_spec_obj
is not None:
1379 format_idx
= 0 if format_spec_obj
.group(1)[0] == 'w' else -1
1380 format_type
= format_spec_obj
.group(2)[0] if format_spec_obj
.group(2) else False
1381 not_format_type
= 'v' if format_type
== 'a' else 'a'
1382 format_modified
= format_spec_obj
.group(3) is not None
1384 format_fallback
= not format_type
and not format_modified
# for b, w
1385 filter_f
= ((lambda f
: f
.get(format_type
+ 'codec') != 'none')
1386 if format_type
and format_modified
# bv*, ba*, wv*, wa*
1387 else (lambda f
: f
.get(not_format_type
+ 'codec') == 'none')
1388 if format_type
# bv, ba, wv, wa
1389 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
1390 if not format_modified
# b, w
1394 filter_f
= ((lambda f
: f
.get('ext') == format_spec
)
1395 if format_spec
in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1396 else (lambda f
: f
.get('format_id') == format_spec
)) # id
1398 def selector_function(ctx
):
1399 formats
= list(ctx
['formats'])
1402 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
1404 yield matches
[format_idx
]
1405 elif format_fallback
== 'force' or (format_fallback
and ctx
['incomplete_formats']):
1406 # for extractors with incomplete formats (audio only (soundcloud)
1407 # or video only (imgur)) best/worst will fallback to
1408 # best/worst {video,audio}-only format
1409 yield formats
[format_idx
]
1411 elif selector
.type == MERGE
: # +
1412 def _merge(formats_pair
):
1413 format_1
, format_2
= formats_pair
1416 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1417 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1419 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
1420 get_no_more
= {"video": False, "audio": False}
1421 for (i
, fmt_info
) in enumerate(formats_info
):
1422 for aud_vid
in ["audio", "video"]:
1423 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
1424 if get_no_more
[aud_vid
]:
1426 get_no_more
[aud_vid
] = True
1428 if len(formats_info
) == 1:
1429 return formats_info
[0]
1431 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1432 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1434 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1435 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1437 output_ext
= self
.params
.get('merge_output_format')
1440 output_ext
= the_only_video
['ext']
1441 elif the_only_audio
and not video_fmts
:
1442 output_ext
= the_only_audio
['ext']
1447 'requested_formats': formats_info
,
1448 'format': '+'.join(fmt_info
.get('format') for fmt_info
in formats_info
),
1449 'format_id': '+'.join(fmt_info
.get('format_id') for fmt_info
in formats_info
),
1455 'width': the_only_video
.get('width'),
1456 'height': the_only_video
.get('height'),
1457 'resolution': the_only_video
.get('resolution'),
1458 'fps': the_only_video
.get('fps'),
1459 'vcodec': the_only_video
.get('vcodec'),
1460 'vbr': the_only_video
.get('vbr'),
1461 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1466 'acodec': the_only_audio
.get('acodec'),
1467 'abr': the_only_audio
.get('abr'),
1472 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1474 def selector_function(ctx
):
1475 for pair
in itertools
.product(
1476 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1479 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1481 def final_selector(ctx
):
1482 ctx_copy
= copy
.deepcopy(ctx
)
1483 for _filter
in filters
:
1484 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
1485 return selector_function(ctx_copy
)
1486 return final_selector
1488 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1490 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1491 except tokenize
.TokenError
:
1492 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1494 class TokenIterator(object):
1495 def __init__(self
, tokens
):
1496 self
.tokens
= tokens
1503 if self
.counter
>= len(self
.tokens
):
1504 raise StopIteration()
1505 value
= self
.tokens
[self
.counter
]
1511 def restore_last_token(self
):
1514 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1515 return _build_selector_function(parsed_selector
)
1517 def _calc_headers(self
, info_dict
):
1518 res
= std_headers
.copy()
1520 add_headers
= info_dict
.get('http_headers')
1522 res
.update(add_headers
)
1524 cookies
= self
._calc
_cookies
(info_dict
)
1526 res
['Cookie'] = cookies
1528 if 'X-Forwarded-For' not in res
:
1529 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
1530 if x_forwarded_for_ip
:
1531 res
['X-Forwarded-For'] = x_forwarded_for_ip
1535 def _calc_cookies(self
, info_dict
):
1536 pr
= sanitized_Request(info_dict
['url'])
1537 self
.cookiejar
.add_cookie_header(pr
)
1538 return pr
.get_header('Cookie')
1540 def process_video_result(self
, info_dict
, download
=True):
1541 assert info_dict
.get('_type', 'video') == 'video'
1543 if 'id' not in info_dict
:
1544 raise ExtractorError('Missing "id" field in extractor result')
1545 if 'title' not in info_dict
:
1546 raise ExtractorError('Missing "title" field in extractor result')
1548 def report_force_conversion(field
, field_not
, conversion
):
1549 self
.report_warning(
1550 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1551 % (field
, field_not
, conversion
))
1553 def sanitize_string_field(info
, string_field
):
1554 field
= info
.get(string_field
)
1555 if field
is None or isinstance(field
, compat_str
):
1557 report_force_conversion(string_field
, 'a string', 'string')
1558 info
[string_field
] = compat_str(field
)
1560 def sanitize_numeric_fields(info
):
1561 for numeric_field
in self
._NUMERIC
_FIELDS
:
1562 field
= info
.get(numeric_field
)
1563 if field
is None or isinstance(field
, compat_numeric_types
):
1565 report_force_conversion(numeric_field
, 'numeric', 'int')
1566 info
[numeric_field
] = int_or_none(field
)
1568 sanitize_string_field(info_dict
, 'id')
1569 sanitize_numeric_fields(info_dict
)
1571 if 'playlist' not in info_dict
:
1572 # It isn't part of a playlist
1573 info_dict
['playlist'] = None
1574 info_dict
['playlist_index'] = None
1576 thumbnails
= info_dict
.get('thumbnails')
1577 if thumbnails
is None:
1578 thumbnail
= info_dict
.get('thumbnail')
1580 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
1582 thumbnails
.sort(key
=lambda t
: (
1583 t
.get('preference') if t
.get('preference') is not None else -1,
1584 t
.get('width') if t
.get('width') is not None else -1,
1585 t
.get('height') if t
.get('height') is not None else -1,
1586 t
.get('id') if t
.get('id') is not None else '', t
.get('url')))
1587 for i
, t
in enumerate(thumbnails
):
1588 t
['url'] = sanitize_url(t
['url'])
1589 if t
.get('width') and t
.get('height'):
1590 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
1591 if t
.get('id') is None:
1594 if self
.params
.get('list_thumbnails'):
1595 self
.list_thumbnails(info_dict
)
1598 thumbnail
= info_dict
.get('thumbnail')
1600 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
1602 info_dict
['thumbnail'] = thumbnails
[-1]['url']
1604 if 'display_id' not in info_dict
and 'id' in info_dict
:
1605 info_dict
['display_id'] = info_dict
['id']
1607 if info_dict
.get('upload_date') is None and info_dict
.get('timestamp') is not None:
1608 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1609 # see http://bugs.python.org/issue1646728)
1611 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
['timestamp'])
1612 info_dict
['upload_date'] = upload_date
.strftime('%Y%m%d')
1613 except (ValueError, OverflowError, OSError):
1616 # Auto generate title fields corresponding to the *_number fields when missing
1617 # in order to always have clean titles. This is very common for TV series.
1618 for field
in ('chapter', 'season', 'episode'):
1619 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
1620 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
1622 for cc_kind
in ('subtitles', 'automatic_captions'):
1623 cc
= info_dict
.get(cc_kind
)
1625 for _
, subtitle
in cc
.items():
1626 for subtitle_format
in subtitle
:
1627 if subtitle_format
.get('url'):
1628 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
1629 if subtitle_format
.get('ext') is None:
1630 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
1632 automatic_captions
= info_dict
.get('automatic_captions')
1633 subtitles
= info_dict
.get('subtitles')
1635 if self
.params
.get('listsubtitles', False):
1636 if 'automatic_captions' in info_dict
:
1637 self
.list_subtitles(
1638 info_dict
['id'], automatic_captions
, 'automatic captions')
1639 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
1642 info_dict
['requested_subtitles'] = self
.process_subtitles(
1643 info_dict
['id'], subtitles
, automatic_captions
)
1645 # We now pick which formats have to be downloaded
1646 if info_dict
.get('formats') is None:
1647 # There's only one format available
1648 formats
= [info_dict
]
1650 formats
= info_dict
['formats']
1653 raise ExtractorError('No video formats found!')
1655 def is_wellformed(f
):
1658 self
.report_warning(
1659 '"url" field is missing or empty - skipping format, '
1660 'there is an error in extractor')
1662 if isinstance(url
, bytes):
1663 sanitize_string_field(f
, 'url')
1666 # Filter out malformed formats for better extraction robustness
1667 formats
= list(filter(is_wellformed
, formats
))
1671 # We check that all the formats have the format and format_id fields
1672 for i
, format
in enumerate(formats
):
1673 sanitize_string_field(format
, 'format_id')
1674 sanitize_numeric_fields(format
)
1675 format
['url'] = sanitize_url(format
['url'])
1676 if not format
.get('format_id'):
1677 format
['format_id'] = compat_str(i
)
1679 # Sanitize format_id from characters used in format selector expression
1680 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
1681 format_id
= format
['format_id']
1682 if format_id
not in formats_dict
:
1683 formats_dict
[format_id
] = []
1684 formats_dict
[format_id
].append(format
)
1686 # Make sure all formats have unique format_id
1687 for format_id
, ambiguous_formats
in formats_dict
.items():
1688 if len(ambiguous_formats
) > 1:
1689 for i
, format
in enumerate(ambiguous_formats
):
1690 format
['format_id'] = '%s-%d' % (format_id
, i
)
1692 for i
, format
in enumerate(formats
):
1693 if format
.get('format') is None:
1694 format
['format'] = '{id} - {res}{note}'.format(
1695 id=format
['format_id'],
1696 res
=self
.format_resolution(format
),
1697 note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
1699 # Automatically determine file extension if missing
1700 if format
.get('ext') is None:
1701 format
['ext'] = determine_ext(format
['url']).lower()
1702 # Automatically determine protocol if missing (useful for format
1703 # selection purposes)
1704 if format
.get('protocol') is None:
1705 format
['protocol'] = determine_protocol(format
)
1706 # Add HTTP headers, so that external programs can use them from the
1708 full_format_info
= info_dict
.copy()
1709 full_format_info
.update(format
)
1710 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
1711 # Remove private housekeeping stuff
1712 if '__x_forwarded_for_ip' in info_dict
:
1713 del info_dict
['__x_forwarded_for_ip']
1715 # TODO Central sorting goes here
1717 if formats
[0] is not info_dict
:
1718 # only set the 'formats' fields if the original info_dict list them
1719 # otherwise we end up with a circular reference, the first (and unique)
1720 # element in the 'formats' field in info_dict is info_dict itself,
1721 # which can't be exported to json
1722 info_dict
['formats'] = formats
1723 if self
.params
.get('listformats'):
1724 self
.list_formats(info_dict
)
1727 req_format
= self
.params
.get('format')
1728 if req_format
is None:
1729 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
1730 if self
.params
.get('verbose'):
1731 self
._write
_string
('[debug] Default format spec: %s\n' % req_format
)
1733 format_selector
= self
.build_format_selector(req_format
)
1735 # While in format selection we may need to have an access to the original
1736 # format set in order to calculate some metrics or do some processing.
1737 # For now we need to be able to guess whether original formats provided
1738 # by extractor are incomplete or not (i.e. whether extractor provides only
1739 # video-only or audio-only formats) for proper formats selection for
1740 # extractors with such incomplete formats (see
1741 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1742 # Since formats may be filtered during format selection and may not match
1743 # the original formats the results may be incorrect. Thus original formats
1744 # or pre-calculated metrics should be passed to format selection routines
1746 # We will pass a context object containing all necessary additional data
1747 # instead of just formats.
1748 # This fixes incorrect format selection issue (see
1749 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1750 incomplete_formats
= (
1751 # All formats are video-only or
1752 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
1753 # all formats are audio-only
1754 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
1758 'incomplete_formats': incomplete_formats
,
1761 formats_to_download
= list(format_selector(ctx
))
1762 if not formats_to_download
:
1763 raise ExtractorError('requested format not available',
1767 self
.to_screen('[info] Downloading format(s) %s' % ", ".join([f
['format_id'] for f
in formats_to_download
]))
1768 if len(formats_to_download
) > 1:
1769 self
.to_screen('[info] %s: downloading video in %s formats' % (info_dict
['id'], len(formats_to_download
)))
1770 for format
in formats_to_download
:
1771 new_info
= dict(info_dict
)
1772 new_info
.update(format
)
1773 self
.process_info(new_info
)
1774 # We update the info dict with the best quality format (backwards compatibility)
1775 info_dict
.update(formats_to_download
[-1])
1778 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
1779 """Select the requested subtitles and their format"""
1781 if normal_subtitles
and self
.params
.get('writesubtitles'):
1782 available_subs
.update(normal_subtitles
)
1783 if automatic_captions
and self
.params
.get('writeautomaticsub'):
1784 for lang
, cap_info
in automatic_captions
.items():
1785 if lang
not in available_subs
:
1786 available_subs
[lang
] = cap_info
1788 if (not self
.params
.get('writesubtitles') and not
1789 self
.params
.get('writeautomaticsub') or not
1793 if self
.params
.get('allsubtitles', False):
1794 requested_langs
= available_subs
.keys()
1796 if self
.params
.get('subtitleslangs', False):
1797 requested_langs
= self
.params
.get('subtitleslangs')
1798 elif 'en' in available_subs
:
1799 requested_langs
= ['en']
1801 requested_langs
= [list(available_subs
.keys())[0]]
1803 formats_query
= self
.params
.get('subtitlesformat', 'best')
1804 formats_preference
= formats_query
.split('/') if formats_query
else []
1806 for lang
in requested_langs
:
1807 formats
= available_subs
.get(lang
)
1809 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
1811 for ext
in formats_preference
:
1815 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
1821 self
.report_warning(
1822 'No subtitle format found matching "%s" for language %s, '
1823 'using %s' % (formats_query
, lang
, f
['ext']))
1827 def __forced_printings(self
, info_dict
, filename
, incomplete
):
1828 def print_mandatory(field
):
1829 if (self
.params
.get('force%s' % field
, False)
1830 and (not incomplete
or info_dict
.get(field
) is not None)):
1831 self
.to_stdout(info_dict
[field
])
1833 def print_optional(field
):
1834 if (self
.params
.get('force%s' % field
, False)
1835 and info_dict
.get(field
) is not None):
1836 self
.to_stdout(info_dict
[field
])
1838 print_mandatory('title')
1839 print_mandatory('id')
1840 if self
.params
.get('forceurl', False) and not incomplete
:
1841 if info_dict
.get('requested_formats') is not None:
1842 for f
in info_dict
['requested_formats']:
1843 self
.to_stdout(f
['url'] + f
.get('play_path', ''))
1845 # For RTMP URLs, also include the playpath
1846 self
.to_stdout(info_dict
['url'] + info_dict
.get('play_path', ''))
1847 print_optional('thumbnail')
1848 print_optional('description')
1849 if self
.params
.get('forcefilename', False) and filename
is not None:
1850 self
.to_stdout(filename
)
1851 if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None:
1852 self
.to_stdout(formatSeconds(info_dict
['duration']))
1853 print_mandatory('format')
1854 if self
.params
.get('forcejson', False):
1855 self
.to_stdout(json
.dumps(info_dict
))
1857 def process_info(self
, info_dict
):
1858 """Process a single resolved IE result."""
1860 assert info_dict
.get('_type', 'video') == 'video'
1862 max_downloads
= self
.params
.get('max_downloads')
1863 if max_downloads
is not None:
1864 if self
._num
_downloads
>= int(max_downloads
):
1865 raise MaxDownloadsReached()
1867 # TODO: backward compatibility, to be removed
1868 info_dict
['fulltitle'] = info_dict
['title']
1870 if 'format' not in info_dict
:
1871 info_dict
['format'] = info_dict
['ext']
1873 reason
= self
._match
_entry
(info_dict
, incomplete
=False)
1874 if reason
is not None:
1875 self
.to_screen('[download] ' + reason
)
1878 self
._num
_downloads
+= 1
1880 info_dict
['_filename'] = filename
= self
.prepare_filename(info_dict
)
1883 self
.__forced
_printings
(info_dict
, filename
, incomplete
=False)
1885 if self
.params
.get('simulate', False):
1886 if self
.params
.get('force_write_download_archive', False):
1887 self
.record_download_archive(info_dict
)
1889 # Do nothing else if in simulate mode
1892 if filename
is None:
1895 def ensure_dir_exists(path
):
1897 dn
= os
.path
.dirname(path
)
1898 if dn
and not os
.path
.exists(dn
):
1901 except (OSError, IOError) as err
:
1902 self
.report_error('unable to create directory ' + error_to_compat_str(err
))
1905 if not ensure_dir_exists(sanitize_path(encodeFilename(filename
))):
1908 if self
.params
.get('writedescription', False):
1909 descfn
= replace_extension(filename
, 'description', info_dict
.get('ext'))
1910 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
1911 self
.to_screen('[info] Video description is already present')
1912 elif info_dict
.get('description') is None:
1913 self
.report_warning('There\'s no description to write.')
1916 self
.to_screen('[info] Writing video description to: ' + descfn
)
1917 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1918 descfile
.write(info_dict
['description'])
1919 except (OSError, IOError):
1920 self
.report_error('Cannot write description file ' + descfn
)
1923 if self
.params
.get('writeannotations', False):
1924 annofn
= replace_extension(filename
, 'annotations.xml', info_dict
.get('ext'))
1925 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
1926 self
.to_screen('[info] Video annotations are already present')
1927 elif not info_dict
.get('annotations'):
1928 self
.report_warning('There are no annotations to write.')
1931 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
1932 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
1933 annofile
.write(info_dict
['annotations'])
1934 except (KeyError, TypeError):
1935 self
.report_warning('There are no annotations to write.')
1936 except (OSError, IOError):
1937 self
.report_error('Cannot write annotations file: ' + annofn
)
1940 def dl(name
, info
, subtitle
=False):
1941 fd
= get_suitable_downloader(info
, self
.params
)(self
, self
.params
)
1942 for ph
in self
._progress
_hooks
:
1943 fd
.add_progress_hook(ph
)
1944 if self
.params
.get('verbose'):
1945 self
.to_screen('[debug] Invoking downloader on %r' % info
.get('url'))
1946 return fd
.download(name
, info
, subtitle
)
1948 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
1949 self
.params
.get('writeautomaticsub')])
1951 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
1952 # subtitles download errors are already managed as troubles in relevant IE
1953 # that way it will silently go on when used with unsupporting IE
1954 subtitles
= info_dict
['requested_subtitles']
1955 # ie = self.get_info_extractor(info_dict['extractor_key'])
1956 for sub_lang
, sub_info
in subtitles
.items():
1957 sub_format
= sub_info
['ext']
1958 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
1959 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(sub_filename
)):
1960 self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
))
1962 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
1963 if sub_info
.get('data') is not None:
1965 # Use newline='' to prevent conversion of newline characters
1966 # See https://github.com/ytdl-org/youtube-dl/issues/10268
1967 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
:
1968 subfile
.write(sub_info
['data'])
1969 except (OSError, IOError):
1970 self
.report_error('Cannot write subtitles file ' + sub_filename
)
1974 dl(sub_filename
, sub_info
, subtitle
=True)
1976 if self.params.get('sleep_interval_subtitles', False):
1977 dl(sub_filename, sub_info)
1979 sub_data = ie._request_webpage(
1980 sub_info['url'], info_dict['id'], note=False).read()
1981 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1982 subfile.write(sub_data)
1984 except (ExtractorError
, IOError, OSError, ValueError, compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
1985 self
.report_warning('Unable to download subtitle for "%s": %s' %
1986 (sub_lang
, error_to_compat_str(err
)))
1989 if self
.params
.get('skip_download', False):
1990 if self
.params
.get('convertsubtitles', False):
1991 subconv
= FFmpegSubtitlesConvertorPP(self
, format
=self
.params
.get('convertsubtitles'))
1992 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
1994 os
.path
.splitext(filename
)[0]
1995 if filename_real_ext
== info_dict
['ext']
1997 afilename
= '%s.%s' % (filename_wo_ext
, self
.params
.get('convertsubtitles'))
1998 if subconv
.available
:
1999 info_dict
.setdefault('__postprocessors', [])
2000 # info_dict['__postprocessors'].append(subconv)
2001 if os
.path
.exists(encodeFilename(afilename
)):
2003 '[download] %s has already been downloaded and '
2004 'converted' % afilename
)
2007 self
.post_process(filename
, info_dict
)
2008 except (PostProcessingError
) as err
:
2009 self
.report_error('postprocessing: %s' % str(err
))
2012 if self
.params
.get('writeinfojson', False):
2013 infofn
= replace_extension(filename
, 'info.json', info_dict
.get('ext'))
2014 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
2015 self
.to_screen('[info] Video description metadata is already present')
2017 self
.to_screen('[info] Writing video description metadata as JSON to: ' + infofn
)
2019 write_json_file(self
.filter_requested_info(info_dict
), infofn
)
2020 except (OSError, IOError):
2021 self
.report_error('Cannot write metadata to JSON file ' + infofn
)
2024 self
._write
_thumbnails
(info_dict
, filename
)
2026 # Write internet shortcut files
2027 url_link
= webloc_link
= desktop_link
= False
2028 if self
.params
.get('writelink', False):
2029 if sys
.platform
== "darwin": # macOS.
2031 elif sys
.platform
.startswith("linux"):
2033 else: # if sys.platform in ['win32', 'cygwin']:
2035 if self
.params
.get('writeurllink', False):
2037 if self
.params
.get('writewebloclink', False):
2039 if self
.params
.get('writedesktoplink', False):
2042 if url_link
or webloc_link
or desktop_link
:
2043 if 'webpage_url' not in info_dict
:
2044 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2046 ascii_url
= iri_to_uri(info_dict
['webpage_url'])
2048 def _write_link_file(extension
, template
, newline
, embed_filename
):
2049 linkfn
= replace_extension(filename
, extension
, info_dict
.get('ext'))
2050 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(linkfn
)):
2051 self
.to_screen('[info] Internet shortcut is already present')
2054 self
.to_screen('[info] Writing internet shortcut to: ' + linkfn
)
2055 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8', newline
=newline
) as linkfile
:
2056 template_vars
= {'url': ascii_url}
2058 template_vars
['filename'] = linkfn
[:-(len(extension
) + 1)]
2059 linkfile
.write(template
% template_vars
)
2060 except (OSError, IOError):
2061 self
.report_error('Cannot write internet shortcut ' + linkfn
)
2066 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE
, '\r\n', embed_filename
=False):
2069 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE
, '\n', embed_filename
=False):
2072 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE
, '\n', embed_filename
=True):
2076 must_record_download_archive
= False
2077 if not self
.params
.get('skip_download', False):
2079 if info_dict
.get('requested_formats') is not None:
2082 merger
= FFmpegMergerPP(self
)
2083 if not merger
.available
:
2085 self
.report_warning('You have requested multiple '
2086 'formats but ffmpeg or avconv are not installed.'
2087 ' The formats won\'t be merged.')
2089 postprocessors
= [merger
]
2091 def compatible_formats(formats
):
2092 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2093 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2094 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2095 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2099 exts
= set(format
.get('ext') for format
in formats
)
2101 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2104 for ext_sets
in COMPATIBLE_EXTS
:
2105 if ext_sets
.issuperset(exts
):
2107 # TODO: Check acodec/vcodec
2110 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2112 os
.path
.splitext(filename
)[0]
2113 if filename_real_ext
== info_dict
['ext']
2115 requested_formats
= info_dict
['requested_formats']
2116 if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
):
2117 info_dict
['ext'] = 'mkv'
2118 self
.report_warning(
2119 'Requested formats are incompatible for merge and will be merged into mkv.')
2120 # Ensure filename always has a correct extension for successful merge
2121 filename
= '%s.%s' % (filename_wo_ext
, info_dict
['ext'])
2122 file_exists
= os
.path
.exists(encodeFilename(filename
))
2123 if not self
.params
.get('overwrites', False) and file_exists
:
2125 '[download] %s has already been downloaded and '
2126 'merged' % filename
)
2129 self
.report_file_delete(filename
)
2130 os
.remove(encodeFilename(filename
))
2131 for f
in requested_formats
:
2132 new_info
= dict(info_dict
)
2134 fname
= prepend_extension(
2135 self
.prepare_filename(new_info
),
2136 'f%s' % f
['format_id'], new_info
['ext'])
2137 if not ensure_dir_exists(fname
):
2139 downloaded
.append(fname
)
2140 partial_success
, real_download
= dl(fname
, new_info
)
2141 success
= success
and partial_success
2142 info_dict
['__postprocessors'] = postprocessors
2143 info_dict
['__files_to_merge'] = downloaded
2144 # Even if there were no downloads, it is being merged only now
2145 info_dict
['__real_download'] = True
2147 # Delete existing file with --yes-overwrites
2148 if self
.params
.get('overwrites', False):
2149 if os
.path
.exists(encodeFilename(filename
)):
2150 self
.report_file_delete(filename
)
2151 os
.remove(encodeFilename(filename
))
2152 # Just a single file
2153 success
, real_download
= dl(filename
, info_dict
)
2154 info_dict
['__real_download'] = real_download
2155 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2156 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2158 except (OSError, IOError) as err
:
2159 raise UnavailableVideoError(err
)
2160 except (ContentTooShortError
, ) as err
:
2161 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2164 if success
and filename
!= '-':
2166 fixup_policy
= self
.params
.get('fixup')
2167 if fixup_policy
is None:
2168 fixup_policy
= 'detect_or_warn'
2170 INSTALL_FFMPEG_MESSAGE
= 'Install ffmpeg or avconv to fix this automatically.'
2172 stretched_ratio
= info_dict
.get('stretched_ratio')
2173 if stretched_ratio
is not None and stretched_ratio
!= 1:
2174 if fixup_policy
== 'warn':
2175 self
.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2176 info_dict
['id'], stretched_ratio
))
2177 elif fixup_policy
== 'detect_or_warn':
2178 stretched_pp
= FFmpegFixupStretchedPP(self
)
2179 if stretched_pp
.available
:
2180 info_dict
.setdefault('__postprocessors', [])
2181 info_dict
['__postprocessors'].append(stretched_pp
)
2183 self
.report_warning(
2184 '%s: Non-uniform pixel ratio (%s). %s'
2185 % (info_dict
['id'], stretched_ratio
, INSTALL_FFMPEG_MESSAGE
))
2187 assert fixup_policy
in ('ignore', 'never')
2189 if (info_dict
.get('requested_formats') is None
2190 and info_dict
.get('container') == 'm4a_dash'):
2191 if fixup_policy
== 'warn':
2192 self
.report_warning(
2193 '%s: writing DASH m4a. '
2194 'Only some players support this container.'
2196 elif fixup_policy
== 'detect_or_warn':
2197 fixup_pp
= FFmpegFixupM4aPP(self
)
2198 if fixup_pp
.available
:
2199 info_dict
.setdefault('__postprocessors', [])
2200 info_dict
['__postprocessors'].append(fixup_pp
)
2202 self
.report_warning(
2203 '%s: writing DASH m4a. '
2204 'Only some players support this container. %s'
2205 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2207 assert fixup_policy
in ('ignore', 'never')
2209 if (info_dict
.get('protocol') == 'm3u8_native'
2210 or info_dict
.get('protocol') == 'm3u8'
2211 and self
.params
.get('hls_prefer_native')):
2212 if fixup_policy
== 'warn':
2213 self
.report_warning('%s: malformed AAC bitstream detected.' % (
2215 elif fixup_policy
== 'detect_or_warn':
2216 fixup_pp
= FFmpegFixupM3u8PP(self
)
2217 if fixup_pp
.available
:
2218 info_dict
.setdefault('__postprocessors', [])
2219 info_dict
['__postprocessors'].append(fixup_pp
)
2221 self
.report_warning(
2222 '%s: malformed AAC bitstream detected. %s'
2223 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2225 assert fixup_policy
in ('ignore', 'never')
2228 self
.post_process(filename
, info_dict
)
2229 except (PostProcessingError
) as err
:
2230 self
.report_error('postprocessing: %s' % str(err
))
2233 for ph
in self
._post
_hooks
:
2235 except Exception as err
:
2236 self
.report_error('post hooks: %s' % str(err
))
2238 must_record_download_archive
= True
2240 if must_record_download_archive
or self
.params
.get('force_write_download_archive', False):
2241 self
.record_download_archive(info_dict
)
2242 max_downloads
= self
.params
.get('max_downloads')
2243 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
2244 raise MaxDownloadsReached()
2246 def download(self
, url_list
):
2247 """Download a given list of URLs."""
2248 outtmpl
= self
.params
.get('outtmpl', DEFAULT_OUTTMPL
)
2249 if (len(url_list
) > 1
2251 and '%' not in outtmpl
2252 and self
.params
.get('max_downloads') != 1):
2253 raise SameFileError(outtmpl
)
2255 for url
in url_list
:
2257 # It also downloads the videos
2258 res
= self
.extract_info(
2259 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2260 except UnavailableVideoError
:
2261 self
.report_error('unable to download video')
2262 except MaxDownloadsReached
:
2263 self
.to_screen('[info] Maximum number of downloaded files reached.')
2266 if self
.params
.get('dump_single_json', False):
2267 self
.to_stdout(json
.dumps(res
))
2269 return self
._download
_retcode
2271 def download_with_info_file(self
, info_filename
):
2272 with contextlib
.closing(fileinput
.FileInput(
2273 [info_filename
], mode
='r',
2274 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2275 # FileInput doesn't have a read method, we can't call json.load
2276 info
= self
.filter_requested_info(json
.loads('\n'.join(f
)))
2278 self
.process_ie_result(info
, download
=True)
2279 except DownloadError
:
2280 webpage_url
= info
.get('webpage_url')
2281 if webpage_url
is not None:
2282 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2283 return self
.download([webpage_url
])
2286 return self
._download
_retcode
2289 def filter_requested_info(info_dict
):
2291 (k
, v
) for k
, v
in info_dict
.items()
2292 if k
not in ['requested_formats', 'requested_subtitles'])
2294 def post_process(self
, filename
, ie_info
):
2295 """Run all the postprocessors on the given file."""
2296 info
= dict(ie_info
)
2297 info
['filepath'] = filename
2299 if ie_info
.get('__postprocessors') is not None:
2300 pps_chain
.extend(ie_info
['__postprocessors'])
2301 pps_chain
.extend(self
._pps
)
2302 for pp
in pps_chain
:
2303 files_to_delete
= []
2305 files_to_delete
, info
= pp
.run(info
)
2306 except PostProcessingError
as e
:
2307 self
.report_error(e
.msg
)
2308 if files_to_delete
and not self
.params
.get('keepvideo', False):
2309 for old_filename
in set(files_to_delete
):
2310 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
2312 os
.remove(encodeFilename(old_filename
))
2313 except (IOError, OSError):
2314 self
.report_warning('Unable to remove downloaded original file')
2316 def _make_archive_id(self
, info_dict
):
2317 video_id
= info_dict
.get('id')
2320 # Future-proof against any change in case
2321 # and backwards compatibility with prior versions
2322 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
2323 if extractor
is None:
2324 url
= str_or_none(info_dict
.get('url'))
2327 # Try to find matching extractor for the URL and take its ie_key
2328 for ie
in self
._ies
:
2329 if ie
.suitable(url
):
2330 extractor
= ie
.ie_key()
2334 return extractor
.lower() + ' ' + video_id
2336 def in_download_archive(self
, info_dict
):
2337 fn
= self
.params
.get('download_archive')
2341 vid_id
= self
._make
_archive
_id
(info_dict
)
2343 return False # Incomplete video information
2345 return vid_id
in self
.archive
2347 def record_download_archive(self
, info_dict
):
2348 fn
= self
.params
.get('download_archive')
2351 vid_id
= self
._make
_archive
_id
(info_dict
)
2353 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
2354 archive_file
.write(vid_id
+ '\n')
2355 self
.archive
.add(vid_id
)
2358 def format_resolution(format
, default
='unknown'):
2359 if format
.get('vcodec') == 'none':
2361 if format
.get('resolution') is not None:
2362 return format
['resolution']
2363 if format
.get('height') is not None:
2364 if format
.get('width') is not None:
2365 res
= '%sx%s' % (format
['width'], format
['height'])
2367 res
= '%sp' % format
['height']
2368 elif format
.get('width') is not None:
2369 res
= '%dx?' % format
['width']
2374 def _format_note(self
, fdict
):
2376 if fdict
.get('ext') in ['f4f', 'f4m']:
2377 res
+= '(unsupported) '
2378 if fdict
.get('language'):
2381 res
+= '[%s] ' % fdict
['language']
2382 if fdict
.get('format_note') is not None:
2383 res
+= fdict
['format_note'] + ' '
2384 if fdict
.get('tbr') is not None:
2385 res
+= '%4dk ' % fdict
['tbr']
2386 if fdict
.get('container') is not None:
2389 res
+= '%s container' % fdict
['container']
2390 if (fdict
.get('vcodec') is not None
2391 and fdict
.get('vcodec') != 'none'):
2394 res
+= fdict
['vcodec']
2395 if fdict
.get('vbr') is not None:
2397 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
2399 if fdict
.get('vbr') is not None:
2400 res
+= '%4dk' % fdict
['vbr']
2401 if fdict
.get('fps') is not None:
2404 res
+= '%sfps' % fdict
['fps']
2405 if fdict
.get('acodec') is not None:
2408 if fdict
['acodec'] == 'none':
2411 res
+= '%-5s' % fdict
['acodec']
2412 elif fdict
.get('abr') is not None:
2416 if fdict
.get('abr') is not None:
2417 res
+= '@%3dk' % fdict
['abr']
2418 if fdict
.get('asr') is not None:
2419 res
+= ' (%5dHz)' % fdict
['asr']
2420 if fdict
.get('filesize') is not None:
2423 res
+= format_bytes(fdict
['filesize'])
2424 elif fdict
.get('filesize_approx') is not None:
2427 res
+= '~' + format_bytes(fdict
['filesize_approx'])
2430 def _format_note_table(self
, f
):
2431 def join_fields(*vargs
):
2432 return ', '.join((val
for val
in vargs
if val
!= ''))
2435 'UNSUPPORTED' if f
.get('ext') in ('f4f', 'f4m') else '',
2436 format_field(f
, 'language', '[%s]'),
2437 format_field(f
, 'format_note'),
2438 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
2439 format_field(f
, 'asr', '%5dHz'))
2441 def list_formats(self
, info_dict
):
2442 formats
= info_dict
.get('formats', [info_dict
])
2443 new_format
= self
.params
.get('listformats_table', False)
2447 format_field(f
, 'format_id'),
2448 format_field(f
, 'ext'),
2449 self
.format_resolution(f
),
2450 format_field(f
, 'fps', '%d'),
2452 format_field(f
, 'filesize', ' %s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~%s', func
=format_bytes
),
2453 format_field(f
, 'tbr', '%4dk'),
2454 f
.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2456 format_field(f
, 'vcodec', default
='unknown').replace('none', ''),
2457 format_field(f
, 'vbr', '%4dk'),
2458 format_field(f
, 'acodec', default
='unknown').replace('none', ''),
2459 format_field(f
, 'abr', '%3dk'),
2460 format_field(f
, 'asr', '%5dHz'),
2461 self
._format
_note
_table
(f
)]
2463 if f
.get('preference') is None or f
['preference'] >= -1000]
2464 header_line
= ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2465 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2469 format_field(f
, 'format_id'),
2470 format_field(f
, 'ext'),
2471 self
.format_resolution(f
),
2472 self
._format
_note
(f
)]
2474 if f
.get('preference') is None or f
['preference'] >= -1000]
2475 header_line
= ['format code', 'extension', 'resolution', 'note']
2477 # if len(formats) > 1:
2478 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2480 '[info] Available formats for %s:\n%s' % (info_dict
['id'], render_table(
2484 extraGap
=(0 if new_format
else 1),
2485 hideEmpty
=new_format
)))
2487 def list_thumbnails(self
, info_dict
):
2488 thumbnails
= info_dict
.get('thumbnails')
2490 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
2494 '[info] Thumbnails for %s:' % info_dict
['id'])
2495 self
.to_screen(render_table(
2496 ['ID', 'width', 'height', 'URL'],
2497 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
2499 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
2501 self
.to_screen('%s has no %s' % (video_id
, name
))
2504 'Available %s for %s:' % (name
, video_id
))
2505 self
.to_screen(render_table(
2506 ['Language', 'formats'],
2507 [[lang
, ', '.join(f
['ext'] for f
in reversed(formats
))]
2508 for lang
, formats
in subtitles
.items()]))
2510 def urlopen(self
, req
):
2511 """ Start an HTTP download """
2512 if isinstance(req
, compat_basestring
):
2513 req
= sanitized_Request(req
)
2514 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
2516 def print_debug_header(self
):
2517 if not self
.params
.get('verbose'):
2520 if type('') is not compat_str
:
2521 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2522 self
.report_warning(
2523 'Your Python is broken! Update to a newer and supported version')
2525 stdout_encoding
= getattr(
2526 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
2528 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2529 locale
.getpreferredencoding(),
2530 sys
.getfilesystemencoding(),
2532 self
.get_encoding()))
2533 write_string(encoding_str
, encoding
=None)
2535 self
._write
_string
('[debug] youtube-dlc version ' + __version__
+ '\n')
2537 self
._write
_string
('[debug] Lazy loading extractors enabled' + '\n')
2539 sp
= subprocess
.Popen(
2540 ['git', 'rev-parse', '--short', 'HEAD'],
2541 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
2542 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
2543 out
, err
= process_communicate_or_kill(sp
)
2544 out
= out
.decode().strip()
2545 if re
.match('[0-9a-f]+', out
):
2546 self
._write
_string
('[debug] Git HEAD: ' + out
+ '\n')
2553 def python_implementation():
2554 impl_name
= platform
.python_implementation()
2555 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
2556 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
2559 self
._write
_string
('[debug] Python version %s (%s) - %s\n' % (
2560 platform
.python_version(), python_implementation(),
2563 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
2564 exe_versions
['rtmpdump'] = rtmpdump_version()
2565 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
2566 exe_str
= ', '.join(
2568 for exe
, v
in sorted(exe_versions
.items())
2573 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
2576 for handler
in self
._opener
.handlers
:
2577 if hasattr(handler
, 'proxies'):
2578 proxy_map
.update(handler
.proxies
)
2579 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
2581 if self
.params
.get('call_home', False):
2582 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2583 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
2585 latest_version
= self
.urlopen(
2586 'https://yt-dl.org/latest/version').read().decode('utf-8')
2587 if version_tuple(latest_version
) > version_tuple(__version__
):
2588 self
.report_warning(
2589 'You are using an outdated version (newest version: %s)! '
2590 'See https://yt-dl.org/update if you need help updating.' %
2593 def _setup_opener(self
):
2594 timeout_val
= self
.params
.get('socket_timeout')
2595 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
2597 opts_cookiefile
= self
.params
.get('cookiefile')
2598 opts_proxy
= self
.params
.get('proxy')
2600 if opts_cookiefile
is None:
2601 self
.cookiejar
= compat_cookiejar
.CookieJar()
2603 opts_cookiefile
= expand_path(opts_cookiefile
)
2604 self
.cookiejar
= YoutubeDLCookieJar(opts_cookiefile
)
2605 if os
.access(opts_cookiefile
, os
.R_OK
):
2606 self
.cookiejar
.load(ignore_discard
=True, ignore_expires
=True)
2608 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
2609 if opts_proxy
is not None:
2610 if opts_proxy
== '':
2613 proxies
= {'http': opts_proxy, 'https': opts_proxy}
2615 proxies
= compat_urllib_request
.getproxies()
2616 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2617 if 'http' in proxies
and 'https' not in proxies
:
2618 proxies
['https'] = proxies
['http']
2619 proxy_handler
= PerRequestProxyHandler(proxies
)
2621 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
2622 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
2623 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
2624 redirect_handler
= YoutubeDLRedirectHandler()
2625 data_handler
= compat_urllib_request_DataHandler()
2627 # When passing our own FileHandler instance, build_opener won't add the
2628 # default FileHandler and allows us to disable the file protocol, which
2629 # can be used for malicious purposes (see
2630 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2631 file_handler
= compat_urllib_request
.FileHandler()
2633 def file_open(*args
, **kwargs
):
2634 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2635 file_handler
.file_open
= file_open
2637 opener
= compat_urllib_request
.build_opener(
2638 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
2640 # Delete the default user-agent header, which would otherwise apply in
2641 # cases where our custom HTTP handler doesn't come into play
2642 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2643 opener
.addheaders
= []
2644 self
._opener
= opener
2646 def encode(self
, s
):
2647 if isinstance(s
, bytes):
2648 return s
# Already encoded
2651 return s
.encode(self
.get_encoding())
2652 except UnicodeEncodeError as err
:
2653 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
2656 def get_encoding(self
):
2657 encoding
= self
.params
.get('encoding')
2658 if encoding
is None:
2659 encoding
= preferredencoding()
2662 def _write_thumbnails(self
, info_dict
, filename
):
2663 if self
.params
.get('writethumbnail', False):
2664 thumbnails
= info_dict
.get('thumbnails')
2666 thumbnails
= [thumbnails
[-1]]
2667 elif self
.params
.get('write_all_thumbnails', False):
2668 thumbnails
= info_dict
.get('thumbnails')
2673 # No thumbnails present, so return immediately
2676 for t
in thumbnails
:
2677 thumb_ext
= determine_ext(t
['url'], 'jpg')
2678 suffix
= '_%s' % t
['id'] if len(thumbnails
) > 1 else ''
2679 thumb_display_id
= '%s ' % t
['id'] if len(thumbnails
) > 1 else ''
2680 t
['filename'] = thumb_filename
= replace_extension(filename
+ suffix
, thumb_ext
, info_dict
.get('ext'))
2682 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(thumb_filename
)):
2683 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
2684 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2686 self
.to_screen('[%s] %s: Downloading thumbnail %s...' %
2687 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
2689 uf
= self
.urlopen(t
['url'])
2690 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
2691 shutil
.copyfileobj(uf
, thumbf
)
2692 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2693 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
2694 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
2695 self
.report_warning('Unable to download thumbnail "%s": %s' %
2696 (t
['url'], error_to_compat_str(err
)))