4 from __future__
import absolute_import
, unicode_literals
28 from string
import ascii_letters
29 from zipimport
import zipimporter
34 compat_get_terminal_size
,
39 compat_tokenize_tokenize
,
41 compat_urllib_request
,
42 compat_urllib_request_DataHandler
,
53 DOT_DESKTOP_LINK_TEMPLATE
,
54 DOT_URL_LINK_TEMPLATE
,
55 DOT_WEBLOC_LINK_TEMPLATE
,
83 PerRequestProxyHandler
,
88 process_communicate_or_kill
,
90 register_socks_protocols
,
105 UnavailableVideoError
,
111 YoutubeDLCookieProcessor
,
113 YoutubeDLRedirectHandler
,
115 from .cache
import Cache
116 from .extractor
import (
117 gen_extractor_classes
,
122 from .extractor
.openload
import PhantomJSwrapper
123 from .downloader
import (
124 get_suitable_downloader
,
125 shorten_protocol_name
127 from .downloader
.rtmp
import rtmpdump_version
128 from .postprocessor
import (
131 FFmpegFixupStretchedPP
,
134 # FFmpegSubtitlesConvertorPP,
136 MoveFilesAfterDownloadPP
,
138 from .version
import __version__
140 if compat_os_name
== 'nt':
144 class YoutubeDL(object):
147 YoutubeDL objects are the ones responsible of downloading the
148 actual video file and writing it to disk if the user has requested
149 it, among some other tasks. In most cases there should be one per
150 program. As, given a video URL, the downloader doesn't know how to
151 extract all the needed information, task that InfoExtractors do, it
152 has to pass the URL to one of them.
154 For this, YoutubeDL objects have a method that allows
155 InfoExtractors to be registered in a given order. When it is passed
156 a URL, the YoutubeDL object handles it to the first InfoExtractor it
157 finds that reports being able to handle it. The InfoExtractor extracts
158 all the information about the video or videos the URL refers to, and
159 YoutubeDL process the extracted information, possibly using a File
160 Downloader to download the video.
162 YoutubeDL objects accept a lot of parameters. In order not to saturate
163 the object constructor with arguments, it receives a dictionary of
164 options instead. These options are available through the params
165 attribute for the InfoExtractors to use. The YoutubeDL also
166 registers itself as the downloader in charge for the InfoExtractors
167 that are added to it, so this is a "mutual registration".
171 username: Username for authentication purposes.
172 password: Password for authentication purposes.
173 videopassword: Password for accessing a video.
174 ap_mso: Adobe Pass multiple-system operator identifier.
175 ap_username: Multiple-system operator account username.
176 ap_password: Multiple-system operator account password.
177 usenetrc: Use netrc for authentication instead.
178 verbose: Print additional info to stdout.
179 quiet: Do not print messages to stdout.
180 no_warnings: Do not print out anything for warnings.
181 forceprint: A list of templates to force print
182 forceurl: Force printing final URL. (Deprecated)
183 forcetitle: Force printing title. (Deprecated)
184 forceid: Force printing ID. (Deprecated)
185 forcethumbnail: Force printing thumbnail URL. (Deprecated)
186 forcedescription: Force printing description. (Deprecated)
187 forcefilename: Force printing final filename. (Deprecated)
188 forceduration: Force printing duration. (Deprecated)
189 forcejson: Force printing info_dict as JSON.
190 dump_single_json: Force printing the info_dict of the whole playlist
191 (or video) as a single JSON line.
192 force_write_download_archive: Force writing download archive regardless
193 of 'skip_download' or 'simulate'.
194 simulate: Do not download the video files.
195 format: Video format code. see "FORMAT SELECTION" for more details.
196 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
197 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
198 extracting metadata even if the video is not actually
199 available for download (experimental)
200 format_sort: How to sort the video formats. see "Sorting Formats"
202 format_sort_force: Force the given format_sort. see "Sorting Formats"
204 allow_multiple_video_streams: Allow multiple video streams to be merged
206 allow_multiple_audio_streams: Allow multiple audio streams to be merged
208 paths: Dictionary of output paths. The allowed keys are 'home'
209 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
210 outtmpl: Dictionary of templates for output names. Allowed keys
211 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
212 A string a also accepted for backward compatibility
213 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
214 restrictfilenames: Do not allow "&" and spaces in file names
215 trim_file_name: Limit length of filename (extension excluded)
216 windowsfilenames: Force the filenames to be windows compatible
217 ignoreerrors: Do not stop on download errors
218 (Default True when running yt-dlp,
219 but False when directly accessing YoutubeDL class)
220 skip_playlist_after_errors: Number of allowed failures until the rest of
221 the playlist is skipped
222 force_generic_extractor: Force downloader to use the generic extractor
223 overwrites: Overwrite all video and metadata files if True,
224 overwrite only non-video files if None
225 and don't overwrite any file if False
226 playliststart: Playlist item to start at.
227 playlistend: Playlist item to end at.
228 playlist_items: Specific indices of playlist to download.
229 playlistreverse: Download playlist items in reverse order.
230 playlistrandom: Download playlist items in random order.
231 matchtitle: Download only matching titles.
232 rejecttitle: Reject downloads for matching titles.
233 logger: Log messages to a logging.Logger instance.
234 logtostderr: Log messages to stderr instead of stdout.
235 writedescription: Write the video description to a .description file
236 writeinfojson: Write the video description to a .info.json file
237 clean_infojson: Remove private fields from the infojson
238 writecomments: Extract video comments. This will not be written to disk
239 unless writeinfojson is also given
240 writeannotations: Write the video annotations to a .annotations.xml file
241 writethumbnail: Write the thumbnail image to a file
242 allow_playlist_files: Whether to write playlists' description, infojson etc
243 also to disk when using the 'write*' options
244 write_all_thumbnails: Write all thumbnail formats to files
245 writelink: Write an internet shortcut file, depending on the
246 current platform (.url/.webloc/.desktop)
247 writeurllink: Write a Windows internet shortcut file (.url)
248 writewebloclink: Write a macOS internet shortcut file (.webloc)
249 writedesktoplink: Write a Linux internet shortcut file (.desktop)
250 writesubtitles: Write the video subtitles to a file
251 writeautomaticsub: Write the automatically generated subtitles to a file
252 allsubtitles: Deprecated - Use subtitlelangs = ['all']
253 Downloads all the subtitles of the video
254 (requires writesubtitles or writeautomaticsub)
255 listsubtitles: Lists all available subtitles for the video
256 subtitlesformat: The format code for subtitles
257 subtitleslangs: List of languages of the subtitles to download (can be regex).
258 The list may contain "all" to refer to all the available
259 subtitles. The language can be prefixed with a "-" to
260 exclude it from the requested languages. Eg: ['all', '-live_chat']
261 keepvideo: Keep the video file after post-processing
262 daterange: A DateRange object, download only if the upload_date is in the range.
263 skip_download: Skip the actual download of the video file
264 cachedir: Location of the cache files in the filesystem.
265 False to disable filesystem cache.
266 noplaylist: Download single video instead of a playlist if in doubt.
267 age_limit: An integer representing the user's age in years.
268 Unsuitable videos for the given age are skipped.
269 min_views: An integer representing the minimum view count the video
270 must have in order to not be skipped.
271 Videos without view count information are always
272 downloaded. None for no limit.
273 max_views: An integer representing the maximum view count.
274 Videos that are more popular than that are not
276 Videos without view count information are always
277 downloaded. None for no limit.
278 download_archive: File name of a file where all downloads are recorded.
279 Videos already present in the file are not downloaded
281 break_on_existing: Stop the download process after attempting to download a
282 file that is in the archive.
283 break_on_reject: Stop the download process when encountering a video that
284 has been filtered out.
285 cookiefile: File name where cookies should be read from and dumped to
286 nocheckcertificate:Do not verify SSL certificates
287 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
288 At the moment, this is only supported by YouTube.
289 proxy: URL of the proxy server to use
290 geo_verification_proxy: URL of the proxy to use for IP address verification
291 on geo-restricted sites.
292 socket_timeout: Time to wait for unresponsive hosts, in seconds
293 bidi_workaround: Work around buggy terminals without bidirectional text
294 support, using fridibi
295 debug_printtraffic:Print out sent and received HTTP traffic
296 include_ads: Download ads as well
297 default_search: Prepend this string if an input url is not valid.
298 'auto' for elaborate guessing
299 encoding: Use this encoding instead of the system-specified.
300 extract_flat: Do not resolve URLs, return the immediate result.
301 Pass in 'in_playlist' to only show this behavior for
303 postprocessors: A list of dictionaries, each with an entry
304 * key: The name of the postprocessor. See
305 yt_dlp/postprocessor/__init__.py for a list.
306 * when: When to run the postprocessor. Can be one of
307 pre_process|before_dl|post_process|after_move.
308 Assumed to be 'post_process' if not given
309 post_hooks: A list of functions that get called as the final step
310 for each video file, after all postprocessors have been
311 called. The filename will be passed as the only argument.
312 progress_hooks: A list of functions that get called on download
313 progress, with a dictionary with the entries
314 * status: One of "downloading", "error", or "finished".
315 Check this first and ignore unknown values.
317 If status is one of "downloading", or "finished", the
318 following properties may also be present:
319 * filename: The final filename (always present)
320 * tmpfilename: The filename we're currently writing to
321 * downloaded_bytes: Bytes on disk
322 * total_bytes: Size of the whole file, None if unknown
323 * total_bytes_estimate: Guess of the eventual file size,
325 * elapsed: The number of seconds since download started.
326 * eta: The estimated time in seconds, None if unknown
327 * speed: The download speed in bytes/second, None if
329 * fragment_index: The counter of the currently
330 downloaded video fragment.
331 * fragment_count: The number of fragments (= individual
332 files that will be merged)
334 Progress hooks are guaranteed to be called at least once
335 (with status "finished") if the download is successful.
336 merge_output_format: Extension to use when merging formats.
337 final_ext: Expected final extension; used to detect when the file was
338 already downloaded and converted. "merge_output_format" is
339 replaced by this extension when given
340 fixup: Automatically correct known faults of the file.
342 - "never": do nothing
343 - "warn": only emit a warning
344 - "detect_or_warn": check whether we can do anything
345 about it, warn otherwise (default)
346 source_address: Client-side IP address to bind to.
347 call_home: Boolean, true iff we are allowed to contact the
348 yt-dlp servers for debugging. (BROKEN)
349 sleep_interval_requests: Number of seconds to sleep between requests
351 sleep_interval: Number of seconds to sleep before each download when
352 used alone or a lower bound of a range for randomized
353 sleep before each download (minimum possible number
354 of seconds to sleep) when used along with
356 max_sleep_interval:Upper bound of a range for randomized sleep before each
357 download (maximum possible number of seconds to sleep).
358 Must only be used along with sleep_interval.
359 Actual sleep time will be a random float from range
360 [sleep_interval; max_sleep_interval].
361 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
362 listformats: Print an overview of available video formats and exit.
363 list_thumbnails: Print a table of all thumbnails and exit.
364 match_filter: A function that gets called with the info_dict of
366 If it returns a message, the video is ignored.
367 If it returns None, the video is downloaded.
368 match_filter_func in utils.py is one example for this.
369 no_color: Do not emit color codes in output.
370 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
373 Two-letter ISO 3166-2 country code that will be used for
374 explicit geographic restriction bypassing via faking
375 X-Forwarded-For HTTP header
377 IP range in CIDR notation that will be used similarly to
380 The following options determine which downloader is picked:
381 external_downloader: A dictionary of protocol keys and the executable of the
382 external downloader to use for it. The allowed protocols
383 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
384 Set the value to 'native' to use the native downloader
385 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
386 or {'m3u8': 'ffmpeg'} instead.
387 Use the native HLS downloader instead of ffmpeg/avconv
388 if True, otherwise use ffmpeg/avconv if False, otherwise
389 use downloader suggested by extractor if None.
390 compat_opts: Compatibility options. See "Differences in default behavior".
391 Note that only format-sort, format-spec, no-live-chat,
392 no-attach-info-json, playlist-index, list-formats,
393 no-direct-merge, no-youtube-channel-redirect,
394 and no-youtube-unavailable-videos works when used via the API
396 The following parameters are not used by YoutubeDL itself, they are used by
397 the downloader (see yt_dlp/downloader/common.py):
398 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
399 noresizebuffer, retries, continuedl, noprogress, consoletitle,
400 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
403 The following options are used by the post processors:
404 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
405 otherwise prefer ffmpeg. (avconv support is deprecated)
406 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
407 to the binary or its containing directory.
408 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
409 and a list of additional command-line arguments for the
410 postprocessor/executable. The dict can also have "PP+EXE" keys
411 which are used when the given exe is used by the given PP.
412 Use 'default' as the name for arguments to passed to all PP
414 The following options are used by the extractors:
415 extractor_retries: Number of times to retry for known errors
416 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
417 hls_split_discontinuity: Split HLS playlists to different formats at
418 discontinuities such as ad breaks (default: False)
419 youtube_include_dash_manifest: If True (default), DASH manifests and related
420 data will be downloaded and processed by extractor.
421 You can reduce network I/O by disabling it if you don't
422 care about DASH. (only for youtube)
423 youtube_include_hls_manifest: If True (default), HLS manifests and related
424 data will be downloaded and processed by extractor.
425 You can reduce network I/O by disabling it if you don't
426 care about HLS. (only for youtube)
429 _NUMERIC_FIELDS
= set((
430 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
431 'timestamp', 'upload_year', 'upload_month', 'upload_day',
432 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
433 'average_rating', 'comment_count', 'age_limit',
434 'start_time', 'end_time',
435 'chapter_number', 'season_number', 'episode_number',
436 'track_number', 'disc_number', 'release_year',
442 _pps
= {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
443 __prepare_filename_warned
= False
444 _first_webpage_request
= True
445 _download_retcode
= None
446 _num_downloads
= None
448 _playlist_urls
= set()
451 def __init__(self
, params
=None, auto_init
=True):
452 """Create a FileDownloader object with the given options."""
456 self
._ies
_instances
= {}
457 self
._pps
= {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
458 self
.__prepare
_filename
_warned
= False
459 self
._first
_webpage
_request
= True
460 self
._post
_hooks
= []
461 self
._progress
_hooks
= []
462 self
._download
_retcode
= 0
463 self
._num
_downloads
= 0
464 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
465 self
._err
_file
= sys
.stderr
468 'nocheckcertificate': False,
470 self
.params
.update(params
)
471 self
.cache
= Cache(self
)
473 if sys
.version_info
< (3, 6):
475 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
476 'Update to Python 3.6 or above' % sys
.version_info
[:2])
478 def check_deprecated(param
, option
, suggestion
):
479 if self
.params
.get(param
) is not None:
480 self
.report_warning('%s is deprecated. Use %s instead' % (option
, suggestion
))
484 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
485 if self
.params
.get('geo_verification_proxy') is None:
486 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
488 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
489 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
490 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
492 for msg
in self
.params
.get('warnings', []):
493 self
.report_warning(msg
)
495 if self
.params
.get('final_ext'):
496 if self
.params
.get('merge_output_format'):
497 self
.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
498 self
.params
['merge_output_format'] = self
.params
['final_ext']
500 if 'overwrites' in self
.params
and self
.params
['overwrites'] is None:
501 del self
.params
['overwrites']
503 if params
.get('bidi_workaround', False):
506 master
, slave
= pty
.openpty()
507 width
= compat_get_terminal_size().columns
511 width_args
= ['-w', str(width
)]
513 stdin
=subprocess
.PIPE
,
515 stderr
=self
._err
_file
)
517 self
._output
_process
= subprocess
.Popen(
518 ['bidiv'] + width_args
, **sp_kwargs
521 self
._output
_process
= subprocess
.Popen(
522 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
523 self
._output
_channel
= os
.fdopen(master
, 'rb')
524 except OSError as ose
:
525 if ose
.errno
== errno
.ENOENT
:
526 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
530 if (sys
.platform
!= 'win32'
531 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
532 and not params
.get('restrictfilenames', False)):
533 # Unicode filesystem API will throw errors (#1474, #13027)
535 'Assuming --restrict-filenames since file system encoding '
536 'cannot encode all characters. '
537 'Set the LC_ALL environment variable to fix this.')
538 self
.params
['restrictfilenames'] = True
540 self
.outtmpl_dict
= self
.parse_outtmpl()
544 """Preload the archive, if any is specified"""
545 def preload_download_archive(fn
):
548 self
.write_debug('Loading archive file %r\n' % fn
)
550 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
551 for line
in archive_file
:
552 self
.archive
.add(line
.strip())
553 except IOError as ioe
:
554 if ioe
.errno
!= errno
.ENOENT
:
560 preload_download_archive(self
.params
.get('download_archive'))
563 self
.print_debug_header()
564 self
.add_default_info_extractors()
566 for pp_def_raw
in self
.params
.get('postprocessors', []):
567 pp_class
= get_postprocessor(pp_def_raw
['key'])
568 pp_def
= dict(pp_def_raw
)
571 when
= pp_def
['when']
574 when
= 'post_process'
575 pp
= pp_class(self
, **compat_kwargs(pp_def
))
576 self
.add_post_processor(pp
, when
=when
)
578 for ph
in self
.params
.get('post_hooks', []):
579 self
.add_post_hook(ph
)
581 for ph
in self
.params
.get('progress_hooks', []):
582 self
.add_progress_hook(ph
)
584 register_socks_protocols()
586 def warn_if_short_id(self
, argv
):
587 # short YouTube ID starting with dash?
589 i
for i
, a
in enumerate(argv
)
590 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
594 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
595 + ['--'] + [argv
[i
] for i
in idxs
]
598 'Long argument string detected. '
599 'Use -- to separate parameters and URLs, like this:\n%s\n' %
600 args_to_str(correct_argv
))
602 def add_info_extractor(self
, ie
):
603 """Add an InfoExtractor object to the end of the list."""
605 if not isinstance(ie
, type):
606 self
._ies
_instances
[ie
.ie_key()] = ie
607 ie
.set_downloader(self
)
609 def get_info_extractor(self
, ie_key
):
611 Get an instance of an IE with name ie_key, it will try to get one from
612 the _ies list, if there's no instance it will create a new one and add
613 it to the extractor list.
615 ie
= self
._ies
_instances
.get(ie_key
)
617 ie
= get_info_extractor(ie_key
)()
618 self
.add_info_extractor(ie
)
621 def add_default_info_extractors(self
):
623 Add the InfoExtractors returned by gen_extractors to the end of the list
625 for ie
in gen_extractor_classes():
626 self
.add_info_extractor(ie
)
628 def add_post_processor(self
, pp
, when
='post_process'):
629 """Add a PostProcessor object to the end of the chain."""
630 self
._pps
[when
].append(pp
)
631 pp
.set_downloader(self
)
633 def add_post_hook(self
, ph
):
634 """Add the post hook"""
635 self
._post
_hooks
.append(ph
)
637 def add_progress_hook(self
, ph
):
638 """Add the progress hook (currently only for the file downloader)"""
639 self
._progress
_hooks
.append(ph
)
641 def _bidi_workaround(self
, message
):
642 if not hasattr(self
, '_output_channel'):
645 assert hasattr(self
, '_output_process')
646 assert isinstance(message
, compat_str
)
647 line_count
= message
.count('\n') + 1
648 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
649 self
._output
_process
.stdin
.flush()
650 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
651 for _
in range(line_count
))
652 return res
[:-len('\n')]
654 def _write_string(self
, s
, out
=None):
655 write_string(s
, out
=out
, encoding
=self
.params
.get('encoding'))
657 def to_stdout(self
, message
, skip_eol
=False, quiet
=False):
658 """Print message to stdout"""
659 if self
.params
.get('logger'):
660 self
.params
['logger'].debug(message
)
661 elif not quiet
or self
.params
.get('verbose'):
663 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
664 self
._err
_file
if quiet
else self
._screen
_file
)
666 def to_stderr(self
, message
):
667 """Print message to stderr"""
668 assert isinstance(message
, compat_str
)
669 if self
.params
.get('logger'):
670 self
.params
['logger'].error(message
)
672 self
._write
_string
('%s\n' % self
._bidi
_workaround
(message
), self
._err
_file
)
674 def to_console_title(self
, message
):
675 if not self
.params
.get('consoletitle', False):
677 if compat_os_name
== 'nt':
678 if ctypes
.windll
.kernel32
.GetConsoleWindow():
679 # c_wchar_p() might not be necessary if `message` is
680 # already of type unicode()
681 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
682 elif 'TERM' in os
.environ
:
683 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
685 def save_console_title(self
):
686 if not self
.params
.get('consoletitle', False):
688 if self
.params
.get('simulate', False):
690 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
691 # Save the title on stack
692 self
._write
_string
('\033[22;0t', self
._screen
_file
)
694 def restore_console_title(self
):
695 if not self
.params
.get('consoletitle', False):
697 if self
.params
.get('simulate', False):
699 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
700 # Restore the title from stack
701 self
._write
_string
('\033[23;0t', self
._screen
_file
)
704 self
.save_console_title()
707 def __exit__(self
, *args
):
708 self
.restore_console_title()
710 if self
.params
.get('cookiefile') is not None:
711 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
713 def trouble(self
, message
=None, tb
=None):
714 """Determine action to take when a download problem appears.
716 Depending on if the downloader has been configured to ignore
717 download errors or not, this method may throw an exception or
718 not when errors are found, after printing the message.
720 tb, if given, is additional traceback information.
722 if message
is not None:
723 self
.to_stderr(message
)
724 if self
.params
.get('verbose'):
726 if sys
.exc_info()[0]: # if .trouble has been called from an except block
728 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
729 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
730 tb
+= encode_compat_str(traceback
.format_exc())
732 tb_data
= traceback
.format_list(traceback
.extract_stack())
733 tb
= ''.join(tb_data
)
736 if not self
.params
.get('ignoreerrors', False):
737 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
738 exc_info
= sys
.exc_info()[1].exc_info
740 exc_info
= sys
.exc_info()
741 raise DownloadError(message
, exc_info
)
742 self
._download
_retcode
= 1
744 def to_screen(self
, message
, skip_eol
=False):
745 """Print message to stdout if not in quiet mode"""
747 message
, skip_eol
, quiet
=self
.params
.get('quiet', False))
749 def report_warning(self
, message
):
751 Print the message to stderr, it will be prefixed with 'WARNING:'
752 If stderr is a tty file the 'WARNING:' will be colored
754 if self
.params
.get('logger') is not None:
755 self
.params
['logger'].warning(message
)
757 if self
.params
.get('no_warnings'):
759 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
760 _msg_header
= '\033[0;33mWARNING:\033[0m'
762 _msg_header
= 'WARNING:'
763 warning_message
= '%s %s' % (_msg_header
, message
)
764 self
.to_stderr(warning_message
)
766 def report_error(self
, message
, tb
=None):
768 Do the same as trouble, but prefixes the message with 'ERROR:', colored
769 in red if stderr is a tty file.
771 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
772 _msg_header
= '\033[0;31mERROR:\033[0m'
774 _msg_header
= 'ERROR:'
775 error_message
= '%s %s' % (_msg_header
, message
)
776 self
.trouble(error_message
, tb
)
778 def write_debug(self
, message
):
779 '''Log debug message or Print message to stderr'''
780 if not self
.params
.get('verbose', False):
782 message
= '[debug] %s' % message
783 if self
.params
.get('logger'):
784 self
.params
['logger'].debug(message
)
786 self
._write
_string
('%s\n' % message
)
788 def report_file_already_downloaded(self
, file_name
):
789 """Report file has already been fully downloaded."""
791 self
.to_screen('[download] %s has already been downloaded' % file_name
)
792 except UnicodeEncodeError:
793 self
.to_screen('[download] The file has already been downloaded')
795 def report_file_delete(self
, file_name
):
796 """Report that existing file will be deleted."""
798 self
.to_screen('Deleting existing file %s' % file_name
)
799 except UnicodeEncodeError:
800 self
.to_screen('Deleting existing file')
802 def parse_outtmpl(self
):
803 outtmpl_dict
= self
.params
.get('outtmpl', {})
804 if not isinstance(outtmpl_dict
, dict):
805 outtmpl_dict
= {'default': outtmpl_dict}
806 outtmpl_dict
.update({
807 k
: v
for k
, v
in DEFAULT_OUTTMPL
.items()
808 if not outtmpl_dict
.get(k
)})
809 for key
, val
in outtmpl_dict
.items():
810 if isinstance(val
, bytes):
812 'Parameter outtmpl is bytes, but should be a unicode string. '
813 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
817 def validate_outtmpl(tmpl
):
818 ''' @return None or Exception object '''
821 STR_FORMAT_RE
.format(''),
822 lambda mobj
: ('%' if not mobj
.group('has_key') else '') + mobj
.group(0),
824 ) % collections
.defaultdict(int)
826 except ValueError as err
:
829 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=None):
830 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
831 info_dict
= dict(info_dict
)
832 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
834 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
835 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
836 if info_dict
.get('duration', None) is not None
838 info_dict
['epoch'] = int(time
.time())
839 info_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
840 if info_dict
.get('resolution') is None:
841 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
843 # For fields playlist_index and autonumber convert all occurrences
844 # of %(field)s to %(field)0Nd for backward compatibility
845 field_size_compat_map
= {
846 'playlist_index': len(str(info_dict
.get('_last_playlist_index') or '')),
847 'autonumber': self
.params
.get('autonumber_size') or 5,
851 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE
.format('[^)]*'))
856 # Field is of the form key1.key2...
857 # where keys (except first) can be string, int or slice
858 FIELD_RE
= r
'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num
=r
'(?:-?\d+)')
859 MATH_FIELD_RE
= r
'''{field}|{num}'''.format(field
=FIELD_RE
, num
=r
'-?\d+(?:.\d+)?')
860 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
861 INTERNAL_FORMAT_RE
= re
.compile(r
'''(?x)
864 (?P<maths>(?:{math_op}{math_field})*)
865 (?:>(?P<strf_format>.+?))?
866 (?:\|(?P<default>.*?))?
867 $'''.format(field
=FIELD_RE
, math_op
=MATH_OPERATORS_RE
, math_field
=MATH_FIELD_RE
))
869 get_key
= lambda k
: traverse_obj(
870 info_dict
, k
.split('.'), is_user_input
=True, traverse_string
=True)
872 def get_value(mdict
):
874 value
= get_key(mdict
['fields'])
877 value
= float_or_none(value
)
878 if value
is not None:
881 offset_key
= mdict
['maths']
883 value
= float_or_none(value
)
887 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
889 offset_key
= offset_key
[len(item
):]
891 operator
= MATH_FUNCTIONS
[item
]
893 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
894 offset
= float_or_none(item
)
896 offset
= float_or_none(get_key(item
))
898 value
= operator(value
, multiplier
* offset
)
899 except (TypeError, ZeroDivisionError):
902 # Datetime formatting
903 if mdict
['strf_format']:
904 value
= strftime_or_none(value
, mdict
['strf_format'])
908 def create_key(outer_mobj
):
909 if not outer_mobj
.group('has_key'):
910 return '%{}'.format(outer_mobj
.group(0))
912 key
= outer_mobj
.group('key')
913 fmt
= outer_mobj
.group('format')
914 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
916 value
, default
= None, na
918 mobj
= mobj
.groupdict()
919 default
= mobj
['default'] if mobj
['default'] is not None else na
920 value
= get_value(mobj
)
922 if fmt
== 's' and value
is not None and key
in field_size_compat_map
.keys():
923 fmt
= '0{:d}d'.format(field_size_compat_map
[key
])
925 value
= default
if value
is None else value
929 value
= compat_str(value
)
931 value
, fmt
= default
, 's'
934 elif fmt
[-1] not in 'rs': # numeric
935 value
= float_or_none(value
)
937 value
, fmt
= default
, 's'
940 # If value is an object, sanitize might convert it to a string
941 # So we convert it to repr first
942 value
, fmt
= repr(value
), '%ss' % fmt
[:-1]
944 value
= sanitize(key
, value
)
945 TMPL_DICT
[key
] = value
946 return '%({key}){fmt}'.format(key
=key
, fmt
=fmt
)
948 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
950 def _prepare_filename(self
, info_dict
, tmpl_type
='default'):
952 sanitize
= lambda k
, v
: sanitize_filename(
954 restricted
=self
.params
.get('restrictfilenames'),
955 is_id
=(k
== 'id' or k
.endswith('_id')))
956 outtmpl
= self
.outtmpl_dict
.get(tmpl_type
, self
.outtmpl_dict
['default'])
957 outtmpl
, template_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, sanitize
)
959 # expand_path translates '%%' into '%' and '$$' into '$'
960 # correspondingly that is not what we want since we need to keep
961 # '%%' intact for template dict substitution step. Working around
962 # with boundary-alike separator hack.
963 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
964 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
966 # outtmpl should be expand_path'ed before template dict substitution
967 # because meta fields may contain env variables we don't want to
968 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
969 # title "Hello $PATH", we don't want `$PATH` to be expanded.
970 filename
= expand_path(outtmpl
).replace(sep
, '') % template_dict
972 force_ext
= OUTTMPL_TYPES
.get(tmpl_type
)
973 if force_ext
is not None:
974 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
976 # https://github.com/blackjack4494/youtube-dlc/issues/85
977 trim_file_name
= self
.params
.get('trim_file_name', False)
979 fn_groups
= filename
.rsplit('.')
982 if len(fn_groups
) > 2:
983 sub_ext
= fn_groups
[-2]
984 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
987 except ValueError as err
:
988 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
991 def prepare_filename(self
, info_dict
, dir_type
='', warn
=False):
992 """Generate the output filename."""
993 paths
= self
.params
.get('paths', {})
994 assert isinstance(paths
, dict)
995 filename
= self
._prepare
_filename
(info_dict
, dir_type
or 'default')
997 if warn
and not self
.__prepare
_filename
_warned
:
1000 elif filename
== '-':
1001 self
.report_warning('--paths is ignored when an outputting to stdout')
1002 elif os
.path
.isabs(filename
):
1003 self
.report_warning('--paths is ignored since an absolute path is given in output template')
1004 self
.__prepare
_filename
_warned
= True
1005 if filename
== '-' or not filename
:
1008 homepath
= expand_path(paths
.get('home', '').strip())
1009 assert isinstance(homepath
, compat_str
)
1010 subdir
= expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else ''
1011 assert isinstance(subdir
, compat_str
)
1012 path
= os
.path
.join(homepath
, subdir
, filename
)
1014 # Temporary fix for #4787
1015 # 'Treat' all problem characters by passing filename through preferredencoding
1016 # to workaround encoding issues with subprocess on python2 @ Windows
1017 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
1018 path
= encodeFilename(path
, True).decode(preferredencoding())
1019 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1021 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1022 """ Returns None if the file should be downloaded """
1024 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
1027 if 'title' in info_dict
:
1028 # This can happen when we're just evaluating the playlist
1029 title
= info_dict
['title']
1030 matchtitle
= self
.params
.get('matchtitle', False)
1032 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1033 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1034 rejecttitle
= self
.params
.get('rejecttitle', False)
1036 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1037 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1038 date
= info_dict
.get('upload_date')
1039 if date
is not None:
1040 dateRange
= self
.params
.get('daterange', DateRange())
1041 if date
not in dateRange
:
1042 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
1043 view_count
= info_dict
.get('view_count')
1044 if view_count
is not None:
1045 min_views
= self
.params
.get('min_views')
1046 if min_views
is not None and view_count
< min_views
:
1047 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1048 max_views
= self
.params
.get('max_views')
1049 if max_views
is not None and view_count
> max_views
:
1050 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1051 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1052 return 'Skipping "%s" because it is age restricted' % video_title
1055 match_filter
= self
.params
.get('match_filter')
1056 if match_filter
is not None:
1057 ret
= match_filter(info_dict
)
1062 if self
.in_download_archive(info_dict
):
1063 reason
= '%s has already been recorded in the archive' % video_title
1064 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1066 reason
= check_filter()
1067 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1068 if reason
is not None:
1070 self
.to_screen('[download] ' + reason
)
1071 if self
.params
.get(break_opt
, False):
1076 def add_extra_info(info_dict
, extra_info
):
1077 '''Set the keys from extra_info in info dict if they are missing'''
1078 for key
, value
in extra_info
.items():
1079 info_dict
.setdefault(key
, value
)
1081 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
={},
1082 process
=True, force_generic_extractor
=False):
1084 Return a list with a dictionary for each video extracted.
1087 url -- URL to extract
1090 download -- whether to download videos during extraction
1091 ie_key -- extractor key hint
1092 extra_info -- dictionary containing the extra values to add to each result
1093 process -- whether to resolve all unresolved references (URLs, playlist items),
1094 must be True for download to work.
1095 force_generic_extractor -- force using the generic extractor
1098 if not ie_key
and force_generic_extractor
:
1102 ies
= [self
.get_info_extractor(ie_key
)]
1107 if not ie
.suitable(url
):
1110 ie_key
= ie
.ie_key()
1111 ie
= self
.get_info_extractor(ie_key
)
1112 if not ie
.working():
1113 self
.report_warning('The program functionality for this site has been marked as broken, '
1114 'and will probably not work.')
1117 temp_id
= str_or_none(
1118 ie
.extract_id(url
) if callable(getattr(ie
, 'extract_id', None))
1119 else ie
._match
_id
(url
))
1120 except (AssertionError, IndexError, AttributeError):
1122 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
1123 self
.to_screen("[%s] %s: has already been recorded in archive" % (
1126 return self
.__extract
_info
(url
, ie
, download
, extra_info
, process
)
1128 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
1130 def __handle_extraction_exceptions(func
):
1131 def wrapper(self
, *args
, **kwargs
):
1133 return func(self
, *args
, **kwargs
)
1134 except GeoRestrictedError
as e
:
1137 msg
+= '\nThis video is available in %s.' % ', '.join(
1138 map(ISO3166Utils
.short2full
, e
.countries
))
1139 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1140 self
.report_error(msg
)
1141 except ExtractorError
as e
: # An error we somewhat expected
1142 self
.report_error(compat_str(e
), e
.format_traceback())
1143 except (MaxDownloadsReached
, ExistingVideoReached
, RejectedVideoReached
):
1145 except Exception as e
:
1146 if self
.params
.get('ignoreerrors', False):
1147 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1152 @__handle_extraction_exceptions
1153 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1154 ie_result
= ie
.extract(url
)
1155 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1157 if isinstance(ie_result
, list):
1158 # Backwards compatibility: old IE result format
1160 '_type': 'compat_list',
1161 'entries': ie_result
,
1163 self
.add_default_extra_info(ie_result
, ie
, url
)
1165 return self
.process_ie_result(ie_result
, download
, extra_info
)
1169 def add_default_extra_info(self
, ie_result
, ie
, url
):
1170 self
.add_extra_info(ie_result
, {
1171 'extractor': ie
.IE_NAME
,
1173 'original_url': url
,
1174 'webpage_url_basename': url_basename(url
),
1175 'extractor_key': ie
.ie_key(),
1178 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
1180 Take the result of the ie(may be modified) and resolve all unresolved
1181 references (URLs, playlist items).
1183 It will also download the videos if 'download'.
1184 Returns the resolved ie_result.
1186 result_type
= ie_result
.get('_type', 'video')
1188 if result_type
in ('url', 'url_transparent'):
1189 ie_result
['url'] = sanitize_url(ie_result
['url'])
1190 extract_flat
= self
.params
.get('extract_flat', False)
1191 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1192 or extract_flat
is True):
1193 info_copy
= ie_result
.copy()
1194 self
.add_extra_info(info_copy
, extra_info
)
1195 self
.add_default_extra_info(
1196 info_copy
, self
.get_info_extractor(ie_result
.get('ie_key')), ie_result
['url'])
1197 self
.__forced
_printings
(info_copy
, self
.prepare_filename(info_copy
), incomplete
=True)
1200 if result_type
== 'video':
1201 self
.add_extra_info(ie_result
, extra_info
)
1202 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1203 additional_urls
= (ie_result
or {}).get('additional_urls')
1205 # TODO: Improve MetadataFromFieldPP to allow setting a list
1206 if isinstance(additional_urls
, compat_str
):
1207 additional_urls
= [additional_urls
]
1209 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1210 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1211 ie_result
['additional_entries'] = [
1213 url
, download
, extra_info
,
1214 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1215 for url
in additional_urls
1218 elif result_type
== 'url':
1219 # We have to add extra_info to the results because it may be
1220 # contained in a playlist
1221 return self
.extract_info(
1222 ie_result
['url'], download
,
1223 ie_key
=ie_result
.get('ie_key'),
1224 extra_info
=extra_info
)
1225 elif result_type
== 'url_transparent':
1226 # Use the information from the embedding page
1227 info
= self
.extract_info(
1228 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1229 extra_info
=extra_info
, download
=False, process
=False)
1231 # extract_info may return None when ignoreerrors is enabled and
1232 # extraction failed with an error, don't crash and return early
1237 force_properties
= dict(
1238 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
1239 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1240 if f
in force_properties
:
1241 del force_properties
[f
]
1242 new_result
= info
.copy()
1243 new_result
.update(force_properties
)
1245 # Extracted info may not be a video result (i.e.
1246 # info.get('_type', 'video') != video) but rather an url or
1247 # url_transparent. In such cases outer metadata (from ie_result)
1248 # should be propagated to inner one (info). For this to happen
1249 # _type of info should be overridden with url_transparent. This
1250 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1251 if new_result
.get('_type') == 'url':
1252 new_result
['_type'] = 'url_transparent'
1254 return self
.process_ie_result(
1255 new_result
, download
=download
, extra_info
=extra_info
)
1256 elif result_type
in ('playlist', 'multi_video'):
1257 # Protect from infinite recursion due to recursively nested playlists
1258 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1259 webpage_url
= ie_result
['webpage_url']
1260 if webpage_url
in self
._playlist
_urls
:
1262 '[download] Skipping already downloaded playlist: %s'
1263 % ie_result
.get('title') or ie_result
.get('id'))
1266 self
._playlist
_level
+= 1
1267 self
._playlist
_urls
.add(webpage_url
)
1268 self
._sanitize
_thumbnails
(ie_result
)
1270 return self
.__process
_playlist
(ie_result
, download
)
1272 self
._playlist
_level
-= 1
1273 if not self
._playlist
_level
:
1274 self
._playlist
_urls
.clear()
1275 elif result_type
== 'compat_list':
1276 self
.report_warning(
1277 'Extractor %s returned a compat_list result. '
1278 'It needs to be updated.' % ie_result
.get('extractor'))
1281 self
.add_extra_info(
1284 'extractor': ie_result
['extractor'],
1285 'webpage_url': ie_result
['webpage_url'],
1286 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1287 'extractor_key': ie_result
['extractor_key'],
1291 ie_result
['entries'] = [
1292 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1293 for r
in ie_result
['entries']
1297 raise Exception('Invalid result type: %s' % result_type
)
1299 def _ensure_dir_exists(self
, path
):
1300 return make_dir(path
, self
.report_error
)
1302 def __process_playlist(self
, ie_result
, download
):
1303 # We process each entry in the playlist
1304 playlist
= ie_result
.get('title') or ie_result
.get('id')
1305 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1307 if 'entries' not in ie_result
:
1308 raise EntryNotInPlaylist()
1309 incomplete_entries
= bool(ie_result
.get('requested_entries'))
1310 if incomplete_entries
:
1311 def fill_missing_entries(entries
, indexes
):
1312 ret
= [None] * max(*indexes
)
1313 for i
, entry
in zip(indexes
, entries
):
1316 ie_result
['entries'] = fill_missing_entries(ie_result
['entries'], ie_result
['requested_entries'])
1318 playlist_results
= []
1320 playliststart
= self
.params
.get('playliststart', 1)
1321 playlistend
= self
.params
.get('playlistend')
1322 # For backwards compatibility, interpret -1 as whole list
1323 if playlistend
== -1:
1326 playlistitems_str
= self
.params
.get('playlist_items')
1327 playlistitems
= None
1328 if playlistitems_str
is not None:
1329 def iter_playlistitems(format
):
1330 for string_segment
in format
.split(','):
1331 if '-' in string_segment
:
1332 start
, end
= string_segment
.split('-')
1333 for item
in range(int(start
), int(end
) + 1):
1336 yield int(string_segment
)
1337 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1339 ie_entries
= ie_result
['entries']
1341 'Downloading %d videos' if not isinstance(ie_entries
, list)
1342 else 'Collected %d videos; downloading %%d of them' % len(ie_entries
))
1343 if not isinstance(ie_entries
, (list, PagedList
)):
1344 ie_entries
= LazyList(ie_entries
)
1347 for i
in playlistitems
or itertools
.count(playliststart
):
1348 if playlistitems
is None and playlistend
is not None and playlistend
< i
:
1352 entry
= ie_entries
[i
- 1]
1354 raise EntryNotInPlaylist()
1355 except (IndexError, EntryNotInPlaylist
):
1356 if incomplete_entries
:
1357 raise EntryNotInPlaylist()
1358 elif not playlistitems
:
1360 entries
.append(entry
)
1362 if entry
is not None:
1363 self
._match
_entry
(entry
, incomplete
=True, silent
=True)
1364 except (ExistingVideoReached
, RejectedVideoReached
):
1366 ie_result
['entries'] = entries
1368 # Save playlist_index before re-ordering
1370 ((playlistitems
[i
- 1] if playlistitems
else i
), entry
)
1371 for i
, entry
in enumerate(entries
, 1)
1372 if entry
is not None]
1373 n_entries
= len(entries
)
1375 if not playlistitems
and (playliststart
or playlistend
):
1376 playlistitems
= list(range(playliststart
, playliststart
+ n_entries
))
1377 ie_result
['requested_entries'] = playlistitems
1379 if self
.params
.get('allow_playlist_files', True):
1381 'playlist': playlist
,
1382 'playlist_id': ie_result
.get('id'),
1383 'playlist_title': ie_result
.get('title'),
1384 'playlist_uploader': ie_result
.get('uploader'),
1385 'playlist_uploader_id': ie_result
.get('uploader_id'),
1386 'playlist_index': 0,
1388 ie_copy
.update(dict(ie_result
))
1390 if self
.params
.get('writeinfojson', False):
1391 infofn
= self
.prepare_filename(ie_copy
, 'pl_infojson')
1392 if not self
._ensure
_dir
_exists
(encodeFilename(infofn
)):
1394 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
1395 self
.to_screen('[info] Playlist metadata is already present')
1397 self
.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn
)
1399 write_json_file(self
.filter_requested_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
1400 except (OSError, IOError):
1401 self
.report_error('Cannot write playlist metadata to JSON file ' + infofn
)
1403 # TODO: This should be passed to ThumbnailsConvertor if necessary
1404 self
._write
_thumbnails
(ie_copy
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1406 if self
.params
.get('writedescription', False):
1407 descfn
= self
.prepare_filename(ie_copy
, 'pl_description')
1408 if not self
._ensure
_dir
_exists
(encodeFilename(descfn
)):
1410 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
1411 self
.to_screen('[info] Playlist description is already present')
1412 elif ie_result
.get('description') is None:
1413 self
.report_warning('There\'s no playlist description to write.')
1416 self
.to_screen('[info] Writing playlist description to: ' + descfn
)
1417 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1418 descfile
.write(ie_result
['description'])
1419 except (OSError, IOError):
1420 self
.report_error('Cannot write playlist description file ' + descfn
)
1423 if self
.params
.get('playlistreverse', False):
1424 entries
= entries
[::-1]
1425 if self
.params
.get('playlistrandom', False):
1426 random
.shuffle(entries
)
1428 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1430 self
.to_screen('[%s] playlist %s: %s' % (ie_result
['extractor'], playlist
, msg
% n_entries
))
1432 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1433 for i
, entry_tuple
in enumerate(entries
, 1):
1434 playlist_index
, entry
= entry_tuple
1435 if 'playlist_index' in self
.params
.get('compat_options', []):
1436 playlist_index
= playlistitems
[i
- 1] if playlistitems
else i
1437 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1438 # This __x_forwarded_for_ip thing is a bit ugly but requires
1441 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1443 'n_entries': n_entries
,
1444 '_last_playlist_index': max(playlistitems
) if playlistitems
else (playlistend
or n_entries
),
1445 'playlist_index': playlist_index
,
1446 'playlist_autonumber': i
,
1447 'playlist': playlist
,
1448 'playlist_id': ie_result
.get('id'),
1449 'playlist_title': ie_result
.get('title'),
1450 'playlist_uploader': ie_result
.get('uploader'),
1451 'playlist_uploader_id': ie_result
.get('uploader_id'),
1452 'extractor': ie_result
['extractor'],
1453 'webpage_url': ie_result
['webpage_url'],
1454 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1455 'extractor_key': ie_result
['extractor_key'],
1458 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1461 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1462 if not entry_result
:
1464 if failures
>= max_failures
:
1466 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist
, failures
))
1468 # TODO: skip failed (empty) entries?
1469 playlist_results
.append(entry_result
)
1470 ie_result
['entries'] = playlist_results
1471 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1474 @__handle_extraction_exceptions
1475 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1476 return self
.process_ie_result(
1477 entry
, download
=download
, extra_info
=extra_info
)
1479 def _build_format_filter(self
, filter_spec
):
1480 " Returns a function to filter the formats according to the filter_spec "
1490 operator_rex
= re
.compile(r
'''(?x)\s*
1491 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1492 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1493 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1495 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1496 m
= operator_rex
.search(filter_spec
)
1499 comparison_value
= int(m
.group('value'))
1501 comparison_value
= parse_filesize(m
.group('value'))
1502 if comparison_value
is None:
1503 comparison_value
= parse_filesize(m
.group('value') + 'B')
1504 if comparison_value
is None:
1506 'Invalid value %r in format specification %r' % (
1507 m
.group('value'), filter_spec
))
1508 op
= OPERATORS
[m
.group('op')]
1513 '^=': lambda attr
, value
: attr
.startswith(value
),
1514 '$=': lambda attr
, value
: attr
.endswith(value
),
1515 '*=': lambda attr
, value
: value
in attr
,
1517 str_operator_rex
= re
.compile(r
'''(?x)
1518 \s*(?P<key>[a-zA-Z0-9._-]+)
1519 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1520 \s*(?P<value>[a-zA-Z0-9._-]+)
1522 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1523 m
= str_operator_rex
.search(filter_spec
)
1525 comparison_value
= m
.group('value')
1526 str_op
= STR_OPERATORS
[m
.group('op')]
1527 if m
.group('negation'):
1528 op
= lambda attr
, value
: not str_op(attr
, value
)
1533 raise ValueError('Invalid filter specification %r' % filter_spec
)
1536 actual_value
= f
.get(m
.group('key'))
1537 if actual_value
is None:
1538 return m
.group('none_inclusive')
1539 return op(actual_value
, comparison_value
)
1542 def _default_format_spec(self
, info_dict
, download
=True):
1545 merger
= FFmpegMergerPP(self
)
1546 return merger
.available
and merger
.can_merge()
1549 not self
.params
.get('simulate', False)
1553 or info_dict
.get('is_live', False)
1554 or self
.outtmpl_dict
['default'] == '-'))
1557 or self
.params
.get('allow_multiple_audio_streams', False)
1558 or 'format-spec' in self
.params
.get('compat_opts', []))
1561 'best/bestvideo+bestaudio' if prefer_best
1562 else 'bestvideo*+bestaudio/best' if not compat
1563 else 'bestvideo+bestaudio/best')
1565 def build_format_selector(self
, format_spec
):
1566 def syntax_error(note
, start
):
1568 'Invalid format specification: '
1569 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1570 return SyntaxError(message
)
1572 PICKFIRST
= 'PICKFIRST'
1576 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1578 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1579 'video': self
.params
.get('allow_multiple_video_streams', False)}
1581 check_formats
= self
.params
.get('check_formats')
1583 def _parse_filter(tokens
):
1585 for type, string
, start
, _
, _
in tokens
:
1586 if type == tokenize
.OP
and string
== ']':
1587 return ''.join(filter_parts
)
1589 filter_parts
.append(string
)
1591 def _remove_unused_ops(tokens
):
1592 # Remove operators that we don't use and join them with the surrounding strings
1593 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1594 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1595 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1596 for type, string
, start
, end
, line
in tokens
:
1597 if type == tokenize
.OP
and string
== '[':
1599 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1601 yield type, string
, start
, end
, line
1602 # everything inside brackets will be handled by _parse_filter
1603 for type, string
, start
, end
, line
in tokens
:
1604 yield type, string
, start
, end
, line
1605 if type == tokenize
.OP
and string
== ']':
1607 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1609 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1611 yield type, string
, start
, end
, line
1612 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1614 last_string
= string
1618 last_string
+= string
1620 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1622 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1624 current_selector
= None
1625 for type, string
, start
, _
, _
in tokens
:
1626 # ENCODING is only defined in python 3.x
1627 if type == getattr(tokenize
, 'ENCODING', None):
1629 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1630 current_selector
= FormatSelector(SINGLE
, string
, [])
1631 elif type == tokenize
.OP
:
1633 if not inside_group
:
1634 # ')' will be handled by the parentheses group
1635 tokens
.restore_last_token()
1637 elif inside_merge
and string
in ['/', ',']:
1638 tokens
.restore_last_token()
1640 elif inside_choice
and string
== ',':
1641 tokens
.restore_last_token()
1644 if not current_selector
:
1645 raise syntax_error('"," must follow a format selector', start
)
1646 selectors
.append(current_selector
)
1647 current_selector
= None
1649 if not current_selector
:
1650 raise syntax_error('"/" must follow a format selector', start
)
1651 first_choice
= current_selector
1652 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1653 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1655 if not current_selector
:
1656 current_selector
= FormatSelector(SINGLE
, 'best', [])
1657 format_filter
= _parse_filter(tokens
)
1658 current_selector
.filters
.append(format_filter
)
1660 if current_selector
:
1661 raise syntax_error('Unexpected "("', start
)
1662 group
= _parse_format_selection(tokens
, inside_group
=True)
1663 current_selector
= FormatSelector(GROUP
, group
, [])
1665 if not current_selector
:
1666 raise syntax_error('Unexpected "+"', start
)
1667 selector_1
= current_selector
1668 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1670 raise syntax_error('Expected a selector', start
)
1671 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1673 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1674 elif type == tokenize
.ENDMARKER
:
1676 if current_selector
:
1677 selectors
.append(current_selector
)
1680 def _merge(formats_pair
):
1681 format_1
, format_2
= formats_pair
1684 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1685 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1687 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
1688 get_no_more
= {"video": False, "audio": False}
1689 for (i
, fmt_info
) in enumerate(formats_info
):
1690 for aud_vid
in ["audio", "video"]:
1691 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
1692 if get_no_more
[aud_vid
]:
1694 get_no_more
[aud_vid
] = True
1696 if len(formats_info
) == 1:
1697 return formats_info
[0]
1699 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1700 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1702 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1703 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1705 output_ext
= self
.params
.get('merge_output_format')
1708 output_ext
= the_only_video
['ext']
1709 elif the_only_audio
and not video_fmts
:
1710 output_ext
= the_only_audio
['ext']
1715 'requested_formats': formats_info
,
1716 'format': '+'.join(fmt_info
.get('format') for fmt_info
in formats_info
),
1717 'format_id': '+'.join(fmt_info
.get('format_id') for fmt_info
in formats_info
),
1723 'width': the_only_video
.get('width'),
1724 'height': the_only_video
.get('height'),
1725 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
1726 'fps': the_only_video
.get('fps'),
1727 'vcodec': the_only_video
.get('vcodec'),
1728 'vbr': the_only_video
.get('vbr'),
1729 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1734 'acodec': the_only_audio
.get('acodec'),
1735 'abr': the_only_audio
.get('abr'),
1740 def _check_formats(formats
):
1742 self
.to_screen('[info] Testing format %s' % f
['format_id'])
1743 paths
= self
.params
.get('paths', {})
1744 temp_file
= os
.path
.join(
1745 expand_path(paths
.get('home', '').strip()),
1746 expand_path(paths
.get('temp', '').strip()),
1747 'ytdl.%s.f%s.check-format' % (random_uuidv4(), f
['format_id']))
1749 dl
, _
= self
.dl(temp_file
, f
, test
=True)
1750 except (ExtractorError
, IOError, OSError, ValueError) + network_exceptions
:
1753 if os
.path
.exists(temp_file
):
1754 os
.remove(temp_file
)
1758 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
1760 def _build_selector_function(selector
):
1761 if isinstance(selector
, list): # ,
1762 fs
= [_build_selector_function(s
) for s
in selector
]
1764 def selector_function(ctx
):
1766 for format
in f(ctx
):
1768 return selector_function
1770 elif selector
.type == GROUP
: # ()
1771 selector_function
= _build_selector_function(selector
.selector
)
1773 elif selector
.type == PICKFIRST
: # /
1774 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1776 def selector_function(ctx
):
1778 picked_formats
= list(f(ctx
))
1780 return picked_formats
1783 elif selector
.type == SINGLE
: # atom
1784 format_spec
= selector
.selector
or 'best'
1786 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1787 if format_spec
== 'all':
1788 def selector_function(ctx
):
1789 formats
= list(ctx
['formats'])
1791 formats
= _check_formats(formats
)
1794 elif format_spec
== 'mergeall':
1795 def selector_function(ctx
):
1796 formats
= list(_check_formats(ctx
['formats']))
1799 merged_format
= formats
[-1]
1800 for f
in formats
[-2::-1]:
1801 merged_format
= _merge((merged_format
, f
))
1805 format_fallback
, format_reverse
, format_idx
= False, True, 1
1807 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1809 if mobj
is not None:
1810 format_idx
= int_or_none(mobj
.group('n'), default
=1)
1811 format_reverse
= mobj
.group('bw')[0] == 'b'
1812 format_type
= (mobj
.group('type') or [None])[0]
1813 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
1814 format_modified
= mobj
.group('mod') is not None
1816 format_fallback
= not format_type
and not format_modified
# for b, w
1818 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
1819 if format_type
and format_modified
# bv*, ba*, wv*, wa*
1820 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
1821 if format_type
# bv, ba, wv, wa
1822 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
1823 if not format_modified
# b, w
1826 filter_f
= ((lambda f
: f
.get('ext') == format_spec
)
1827 if format_spec
in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1828 else (lambda f
: f
.get('format_id') == format_spec
)) # id
1830 def selector_function(ctx
):
1831 formats
= list(ctx
['formats'])
1834 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
1835 if format_fallback
and ctx
['incomplete_formats'] and not matches
:
1836 # for extractors with incomplete formats (audio only (soundcloud)
1837 # or video only (imgur)) best/worst will fallback to
1838 # best/worst {video,audio}-only format
1841 matches
= matches
[::-1]
1843 matches
= list(itertools
.islice(_check_formats(matches
), format_idx
))
1845 if -n
<= format_idx
- 1 < n
:
1846 yield matches
[format_idx
- 1]
1848 elif selector
.type == MERGE
: # +
1849 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1851 def selector_function(ctx
):
1852 for pair
in itertools
.product(
1853 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1856 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1858 def final_selector(ctx
):
1859 ctx_copy
= copy
.deepcopy(ctx
)
1860 for _filter
in filters
:
1861 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
1862 return selector_function(ctx_copy
)
1863 return final_selector
1865 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1867 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1868 except tokenize
.TokenError
:
1869 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1871 class TokenIterator(object):
1872 def __init__(self
, tokens
):
1873 self
.tokens
= tokens
1880 if self
.counter
>= len(self
.tokens
):
1881 raise StopIteration()
1882 value
= self
.tokens
[self
.counter
]
1888 def restore_last_token(self
):
1891 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1892 return _build_selector_function(parsed_selector
)
1894 def _calc_headers(self
, info_dict
):
1895 res
= std_headers
.copy()
1897 add_headers
= info_dict
.get('http_headers')
1899 res
.update(add_headers
)
1901 cookies
= self
._calc
_cookies
(info_dict
)
1903 res
['Cookie'] = cookies
1905 if 'X-Forwarded-For' not in res
:
1906 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
1907 if x_forwarded_for_ip
:
1908 res
['X-Forwarded-For'] = x_forwarded_for_ip
1912 def _calc_cookies(self
, info_dict
):
1913 pr
= sanitized_Request(info_dict
['url'])
1914 self
.cookiejar
.add_cookie_header(pr
)
1915 return pr
.get_header('Cookie')
1918 def _sanitize_thumbnails(info_dict
):
1919 thumbnails
= info_dict
.get('thumbnails')
1920 if thumbnails
is None:
1921 thumbnail
= info_dict
.get('thumbnail')
1923 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
1925 thumbnails
.sort(key
=lambda t
: (
1926 t
.get('preference') if t
.get('preference') is not None else -1,
1927 t
.get('width') if t
.get('width') is not None else -1,
1928 t
.get('height') if t
.get('height') is not None else -1,
1929 t
.get('id') if t
.get('id') is not None else '',
1931 for i
, t
in enumerate(thumbnails
):
1932 t
['url'] = sanitize_url(t
['url'])
1933 if t
.get('width') and t
.get('height'):
1934 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
1935 if t
.get('id') is None:
1938 def process_video_result(self
, info_dict
, download
=True):
1939 assert info_dict
.get('_type', 'video') == 'video'
1941 if 'id' not in info_dict
:
1942 raise ExtractorError('Missing "id" field in extractor result')
1943 if 'title' not in info_dict
:
1944 raise ExtractorError('Missing "title" field in extractor result')
1946 def report_force_conversion(field
, field_not
, conversion
):
1947 self
.report_warning(
1948 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1949 % (field
, field_not
, conversion
))
1951 def sanitize_string_field(info
, string_field
):
1952 field
= info
.get(string_field
)
1953 if field
is None or isinstance(field
, compat_str
):
1955 report_force_conversion(string_field
, 'a string', 'string')
1956 info
[string_field
] = compat_str(field
)
1958 def sanitize_numeric_fields(info
):
1959 for numeric_field
in self
._NUMERIC
_FIELDS
:
1960 field
= info
.get(numeric_field
)
1961 if field
is None or isinstance(field
, compat_numeric_types
):
1963 report_force_conversion(numeric_field
, 'numeric', 'int')
1964 info
[numeric_field
] = int_or_none(field
)
1966 sanitize_string_field(info_dict
, 'id')
1967 sanitize_numeric_fields(info_dict
)
1969 if 'playlist' not in info_dict
:
1970 # It isn't part of a playlist
1971 info_dict
['playlist'] = None
1972 info_dict
['playlist_index'] = None
1974 self
._sanitize
_thumbnails
(info_dict
)
1976 if self
.params
.get('list_thumbnails'):
1977 self
.list_thumbnails(info_dict
)
1980 thumbnail
= info_dict
.get('thumbnail')
1981 thumbnails
= info_dict
.get('thumbnails')
1983 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
1985 info_dict
['thumbnail'] = thumbnails
[-1]['url']
1987 if 'display_id' not in info_dict
and 'id' in info_dict
:
1988 info_dict
['display_id'] = info_dict
['id']
1990 for ts_key
, date_key
in (
1991 ('timestamp', 'upload_date'),
1992 ('release_timestamp', 'release_date'),
1994 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
1995 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1996 # see http://bugs.python.org/issue1646728)
1998 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
1999 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2000 except (ValueError, OverflowError, OSError):
2003 # Auto generate title fields corresponding to the *_number fields when missing
2004 # in order to always have clean titles. This is very common for TV series.
2005 for field
in ('chapter', 'season', 'episode'):
2006 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2007 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2009 for cc_kind
in ('subtitles', 'automatic_captions'):
2010 cc
= info_dict
.get(cc_kind
)
2012 for _
, subtitle
in cc
.items():
2013 for subtitle_format
in subtitle
:
2014 if subtitle_format
.get('url'):
2015 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2016 if subtitle_format
.get('ext') is None:
2017 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2019 automatic_captions
= info_dict
.get('automatic_captions')
2020 subtitles
= info_dict
.get('subtitles')
2022 if self
.params
.get('listsubtitles', False):
2023 if 'automatic_captions' in info_dict
:
2024 self
.list_subtitles(
2025 info_dict
['id'], automatic_captions
, 'automatic captions')
2026 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2029 info_dict
['requested_subtitles'] = self
.process_subtitles(
2030 info_dict
['id'], subtitles
, automatic_captions
)
2032 # We now pick which formats have to be downloaded
2033 if info_dict
.get('formats') is None:
2034 # There's only one format available
2035 formats
= [info_dict
]
2037 formats
= info_dict
['formats']
2040 if not self
.params
.get('ignore_no_formats_error'):
2041 raise ExtractorError('No video formats found!')
2043 self
.report_warning('No video formats found!')
2045 def is_wellformed(f
):
2048 self
.report_warning(
2049 '"url" field is missing or empty - skipping format, '
2050 'there is an error in extractor')
2052 if isinstance(url
, bytes):
2053 sanitize_string_field(f
, 'url')
2056 # Filter out malformed formats for better extraction robustness
2057 formats
= list(filter(is_wellformed
, formats
))
2061 # We check that all the formats have the format and format_id fields
2062 for i
, format
in enumerate(formats
):
2063 sanitize_string_field(format
, 'format_id')
2064 sanitize_numeric_fields(format
)
2065 format
['url'] = sanitize_url(format
['url'])
2066 if not format
.get('format_id'):
2067 format
['format_id'] = compat_str(i
)
2069 # Sanitize format_id from characters used in format selector expression
2070 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2071 format_id
= format
['format_id']
2072 if format_id
not in formats_dict
:
2073 formats_dict
[format_id
] = []
2074 formats_dict
[format_id
].append(format
)
2076 # Make sure all formats have unique format_id
2077 for format_id
, ambiguous_formats
in formats_dict
.items():
2078 if len(ambiguous_formats
) > 1:
2079 for i
, format
in enumerate(ambiguous_formats
):
2080 format
['format_id'] = '%s-%d' % (format_id
, i
)
2082 for i
, format
in enumerate(formats
):
2083 if format
.get('format') is None:
2084 format
['format'] = '{id} - {res}{note}'.format(
2085 id=format
['format_id'],
2086 res
=self
.format_resolution(format
),
2087 note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
2089 # Automatically determine file extension if missing
2090 if format
.get('ext') is None:
2091 format
['ext'] = determine_ext(format
['url']).lower()
2092 # Automatically determine protocol if missing (useful for format
2093 # selection purposes)
2094 if format
.get('protocol') is None:
2095 format
['protocol'] = determine_protocol(format
)
2096 # Add HTTP headers, so that external programs can use them from the
2098 full_format_info
= info_dict
.copy()
2099 full_format_info
.update(format
)
2100 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
2101 # Remove private housekeeping stuff
2102 if '__x_forwarded_for_ip' in info_dict
:
2103 del info_dict
['__x_forwarded_for_ip']
2105 # TODO Central sorting goes here
2107 if formats
and formats
[0] is not info_dict
:
2108 # only set the 'formats' fields if the original info_dict list them
2109 # otherwise we end up with a circular reference, the first (and unique)
2110 # element in the 'formats' field in info_dict is info_dict itself,
2111 # which can't be exported to json
2112 info_dict
['formats'] = formats
2114 info_dict
, _
= self
.pre_process(info_dict
)
2116 if self
.params
.get('listformats'):
2117 if not info_dict
.get('formats'):
2118 raise ExtractorError('No video formats found', expected
=True)
2119 self
.list_formats(info_dict
)
2122 req_format
= self
.params
.get('format')
2123 if req_format
is None:
2124 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2125 self
.write_debug('Default format spec: %s' % req_format
)
2127 format_selector
= self
.build_format_selector(req_format
)
2129 # While in format selection we may need to have an access to the original
2130 # format set in order to calculate some metrics or do some processing.
2131 # For now we need to be able to guess whether original formats provided
2132 # by extractor are incomplete or not (i.e. whether extractor provides only
2133 # video-only or audio-only formats) for proper formats selection for
2134 # extractors with such incomplete formats (see
2135 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2136 # Since formats may be filtered during format selection and may not match
2137 # the original formats the results may be incorrect. Thus original formats
2138 # or pre-calculated metrics should be passed to format selection routines
2140 # We will pass a context object containing all necessary additional data
2141 # instead of just formats.
2142 # This fixes incorrect format selection issue (see
2143 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2144 incomplete_formats
= (
2145 # All formats are video-only or
2146 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
2147 # all formats are audio-only
2148 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
2152 'incomplete_formats': incomplete_formats
,
2155 formats_to_download
= list(format_selector(ctx
))
2156 if not formats_to_download
:
2157 if not self
.params
.get('ignore_no_formats_error'):
2158 raise ExtractorError('Requested format is not available', expected
=True)
2160 self
.report_warning('Requested format is not available')
2163 '[info] %s: Downloading %d format(s): %s' % (
2164 info_dict
['id'], len(formats_to_download
),
2165 ", ".join([f
['format_id'] for f
in formats_to_download
])))
2166 for fmt
in formats_to_download
:
2167 new_info
= dict(info_dict
)
2168 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2169 new_info
['__original_infodict'] = info_dict
2170 new_info
.update(fmt
)
2171 self
.process_info(new_info
)
2172 # We update the info dict with the best quality format (backwards compatibility)
2173 if formats_to_download
:
2174 info_dict
.update(formats_to_download
[-1])
2177 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2178 """Select the requested subtitles and their format"""
2180 if normal_subtitles
and self
.params
.get('writesubtitles'):
2181 available_subs
.update(normal_subtitles
)
2182 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2183 for lang
, cap_info
in automatic_captions
.items():
2184 if lang
not in available_subs
:
2185 available_subs
[lang
] = cap_info
2187 if (not self
.params
.get('writesubtitles') and not
2188 self
.params
.get('writeautomaticsub') or not
2192 all_sub_langs
= available_subs
.keys()
2193 if self
.params
.get('allsubtitles', False):
2194 requested_langs
= all_sub_langs
2195 elif self
.params
.get('subtitleslangs', False):
2196 requested_langs
= set()
2197 for lang
in self
.params
.get('subtitleslangs'):
2199 requested_langs
.update(all_sub_langs
)
2201 discard
= lang
[0] == '-'
2204 current_langs
= filter(re
.compile(lang
+ '$').match
, all_sub_langs
)
2206 for lang
in current_langs
:
2207 requested_langs
.discard(lang
)
2209 requested_langs
.update(current_langs
)
2210 elif 'en' in available_subs
:
2211 requested_langs
= ['en']
2213 requested_langs
= [list(all_sub_langs
)[0]]
2214 self
.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs
))
2216 formats_query
= self
.params
.get('subtitlesformat', 'best')
2217 formats_preference
= formats_query
.split('/') if formats_query
else []
2219 for lang
in requested_langs
:
2220 formats
= available_subs
.get(lang
)
2222 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
2224 for ext
in formats_preference
:
2228 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2234 self
.report_warning(
2235 'No subtitle format found matching "%s" for language %s, '
2236 'using %s' % (formats_query
, lang
, f
['ext']))
2240 def __forced_printings(self
, info_dict
, filename
, incomplete
):
2241 def print_mandatory(field
, actual_field
=None):
2242 if actual_field
is None:
2243 actual_field
= field
2244 if (self
.params
.get('force%s' % field
, False)
2245 and (not incomplete
or info_dict
.get(actual_field
) is not None)):
2246 self
.to_stdout(info_dict
[actual_field
])
2248 def print_optional(field
):
2249 if (self
.params
.get('force%s' % field
, False)
2250 and info_dict
.get(field
) is not None):
2251 self
.to_stdout(info_dict
[field
])
2253 info_dict
= info_dict
.copy()
2254 if filename
is not None:
2255 info_dict
['filename'] = filename
2256 if info_dict
.get('requested_formats') is not None:
2257 # For RTMP URLs, also include the playpath
2258 info_dict
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2259 elif 'url' in info_dict
:
2260 info_dict
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
2262 for tmpl
in self
.params
.get('forceprint', []):
2263 if re
.match(r
'\w+$', tmpl
):
2264 tmpl
= '%({})s'.format(tmpl
)
2265 tmpl
, info_copy
= self
.prepare_outtmpl(tmpl
, info_dict
)
2266 self
.to_stdout(tmpl
% info_copy
)
2268 print_mandatory('title')
2269 print_mandatory('id')
2270 print_mandatory('url', 'urls')
2271 print_optional('thumbnail')
2272 print_optional('description')
2273 print_optional('filename')
2274 if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None:
2275 self
.to_stdout(formatSeconds(info_dict
['duration']))
2276 print_mandatory('format')
2278 if self
.params
.get('forcejson', False):
2279 self
.post_extract(info_dict
)
2280 self
.to_stdout(json
.dumps(info_dict
, default
=repr))
2282 def dl(self
, name
, info
, subtitle
=False, test
=False):
2285 verbose
= self
.params
.get('verbose')
2288 'quiet': not verbose
,
2290 'noprogress': not verbose
,
2292 'skip_unavailable_fragments': False,
2293 'keep_fragments': False,
2295 '_no_ytdl_file': True,
2298 params
= self
.params
2299 fd
= get_suitable_downloader(info
, params
)(self
, params
)
2301 for ph
in self
._progress
_hooks
:
2302 fd
.add_progress_hook(ph
)
2303 urls
= '", "'.join([f
['url'] for f
in info
.get('requested_formats', [])] or [info
['url']])
2304 self
.write_debug('Invoking downloader on "%s"' % urls
)
2305 new_info
= dict(info
)
2306 if new_info
.get('http_headers') is None:
2307 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
2308 return fd
.download(name
, new_info
, subtitle
)
2310 def process_info(self
, info_dict
):
2311 """Process a single resolved IE result."""
2313 assert info_dict
.get('_type', 'video') == 'video'
2315 info_dict
.setdefault('__postprocessors', [])
2317 max_downloads
= self
.params
.get('max_downloads')
2318 if max_downloads
is not None:
2319 if self
._num
_downloads
>= int(max_downloads
):
2320 raise MaxDownloadsReached()
2322 # TODO: backward compatibility, to be removed
2323 info_dict
['fulltitle'] = info_dict
['title']
2325 if 'format' not in info_dict
:
2326 info_dict
['format'] = info_dict
['ext']
2328 if self
._match
_entry
(info_dict
) is not None:
2331 self
.post_extract(info_dict
)
2332 self
._num
_downloads
+= 1
2334 # info_dict['_filename'] needs to be set for backward compatibility
2335 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2336 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2340 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=False)
2342 if self
.params
.get('simulate', False):
2343 if self
.params
.get('force_write_download_archive', False):
2344 self
.record_download_archive(info_dict
)
2346 # Do nothing else if in simulate mode
2349 if full_filename
is None:
2352 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
2354 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
2357 if self
.params
.get('writedescription', False):
2358 descfn
= self
.prepare_filename(info_dict
, 'description')
2359 if not self
._ensure
_dir
_exists
(encodeFilename(descfn
)):
2361 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
2362 self
.to_screen('[info] Video description is already present')
2363 elif info_dict
.get('description') is None:
2364 self
.report_warning('There\'s no description to write.')
2367 self
.to_screen('[info] Writing video description to: ' + descfn
)
2368 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
2369 descfile
.write(info_dict
['description'])
2370 except (OSError, IOError):
2371 self
.report_error('Cannot write description file ' + descfn
)
2374 if self
.params
.get('writeannotations', False):
2375 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2376 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
2378 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2379 self
.to_screen('[info] Video annotations are already present')
2380 elif not info_dict
.get('annotations'):
2381 self
.report_warning('There are no annotations to write.')
2384 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2385 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2386 annofile
.write(info_dict
['annotations'])
2387 except (KeyError, TypeError):
2388 self
.report_warning('There are no annotations to write.')
2389 except (OSError, IOError):
2390 self
.report_error('Cannot write annotations file: ' + annofn
)
2393 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
2394 self
.params
.get('writeautomaticsub')])
2396 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
2397 # subtitles download errors are already managed as troubles in relevant IE
2398 # that way it will silently go on when used with unsupporting IE
2399 subtitles
= info_dict
['requested_subtitles']
2400 # ie = self.get_info_extractor(info_dict['extractor_key'])
2401 for sub_lang
, sub_info
in subtitles
.items():
2402 sub_format
= sub_info
['ext']
2403 sub_filename
= subtitles_filename(temp_filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
2404 sub_filename_final
= subtitles_filename(
2405 self
.prepare_filename(info_dict
, 'subtitle'), sub_lang
, sub_format
, info_dict
.get('ext'))
2406 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(sub_filename
)):
2407 self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
))
2408 sub_info
['filepath'] = sub_filename
2409 files_to_move
[sub_filename
] = sub_filename_final
2411 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
2412 if sub_info
.get('data') is not None:
2414 # Use newline='' to prevent conversion of newline characters
2415 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2416 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
:
2417 subfile
.write(sub_info
['data'])
2418 sub_info
['filepath'] = sub_filename
2419 files_to_move
[sub_filename
] = sub_filename_final
2420 except (OSError, IOError):
2421 self
.report_error('Cannot write subtitles file ' + sub_filename
)
2425 self
.dl(sub_filename
, sub_info
.copy(), subtitle
=True)
2426 sub_info
['filepath'] = sub_filename
2427 files_to_move
[sub_filename
] = sub_filename_final
2428 except (ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
2429 self
.report_warning('Unable to download subtitle for "%s": %s' %
2430 (sub_lang
, error_to_compat_str(err
)))
2433 if self
.params
.get('writeinfojson', False):
2434 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2435 if not self
._ensure
_dir
_exists
(encodeFilename(infofn
)):
2437 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
2438 self
.to_screen('[info] Video metadata is already present')
2440 self
.to_screen('[info] Writing video metadata as JSON to: ' + infofn
)
2442 write_json_file(self
.filter_requested_info(info_dict
, self
.params
.get('clean_infojson', True)), infofn
)
2443 except (OSError, IOError):
2444 self
.report_error('Cannot write video metadata to JSON file ' + infofn
)
2446 info_dict
['__infojson_filename'] = infofn
2448 for thumb_ext
in self
._write
_thumbnails
(info_dict
, temp_filename
):
2449 thumb_filename_temp
= replace_extension(temp_filename
, thumb_ext
, info_dict
.get('ext'))
2450 thumb_filename
= replace_extension(
2451 self
.prepare_filename(info_dict
, 'thumbnail'), thumb_ext
, info_dict
.get('ext'))
2452 files_to_move
[thumb_filename_temp
] = thumb_filename
2454 # Write internet shortcut files
2455 url_link
= webloc_link
= desktop_link
= False
2456 if self
.params
.get('writelink', False):
2457 if sys
.platform
== "darwin": # macOS.
2459 elif sys
.platform
.startswith("linux"):
2461 else: # if sys.platform in ['win32', 'cygwin']:
2463 if self
.params
.get('writeurllink', False):
2465 if self
.params
.get('writewebloclink', False):
2467 if self
.params
.get('writedesktoplink', False):
2470 if url_link
or webloc_link
or desktop_link
:
2471 if 'webpage_url' not in info_dict
:
2472 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2474 ascii_url
= iri_to_uri(info_dict
['webpage_url'])
2476 def _write_link_file(extension
, template
, newline
, embed_filename
):
2477 linkfn
= replace_extension(full_filename
, extension
, info_dict
.get('ext'))
2478 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2479 self
.to_screen('[info] Internet shortcut is already present')
2482 self
.to_screen('[info] Writing internet shortcut to: ' + linkfn
)
2483 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8', newline
=newline
) as linkfile
:
2484 template_vars
= {'url': ascii_url}
2486 template_vars
['filename'] = linkfn
[:-(len(extension
) + 1)]
2487 linkfile
.write(template
% template_vars
)
2488 except (OSError, IOError):
2489 self
.report_error('Cannot write internet shortcut ' + linkfn
)
2494 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE
, '\r\n', embed_filename
=False):
2497 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE
, '\n', embed_filename
=False):
2500 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE
, '\n', embed_filename
=True):
2504 info_dict
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
2505 except PostProcessingError
as err
:
2506 self
.report_error('Preprocessing: %s' % str(err
))
2509 must_record_download_archive
= False
2510 if self
.params
.get('skip_download', False):
2511 info_dict
['filepath'] = temp_filename
2512 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2513 info_dict
['__files_to_move'] = files_to_move
2514 info_dict
= self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
)
2519 def existing_file(*filepaths
):
2520 ext
= info_dict
.get('ext')
2521 final_ext
= self
.params
.get('final_ext', ext
)
2523 for file in orderedSet(filepaths
):
2524 if final_ext
!= ext
:
2525 converted
= replace_extension(file, final_ext
, ext
)
2526 if os
.path
.exists(encodeFilename(converted
)):
2527 existing_files
.append(converted
)
2528 if os
.path
.exists(encodeFilename(file)):
2529 existing_files
.append(file)
2531 if not existing_files
or self
.params
.get('overwrites', False):
2532 for file in orderedSet(existing_files
):
2533 self
.report_file_delete(file)
2534 os
.remove(encodeFilename(file))
2537 self
.report_file_already_downloaded(existing_files
[0])
2538 info_dict
['ext'] = os
.path
.splitext(existing_files
[0])[1][1:]
2539 return existing_files
[0]
2542 if info_dict
.get('requested_formats') is not None:
2544 def compatible_formats(formats
):
2545 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2546 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2547 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2548 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2552 exts
= set(format
.get('ext') for format
in formats
)
2554 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2557 for ext_sets
in COMPATIBLE_EXTS
:
2558 if ext_sets
.issuperset(exts
):
2560 # TODO: Check acodec/vcodec
2563 requested_formats
= info_dict
['requested_formats']
2564 old_ext
= info_dict
['ext']
2565 if self
.params
.get('merge_output_format') is None:
2566 if not compatible_formats(requested_formats
):
2567 info_dict
['ext'] = 'mkv'
2568 self
.report_warning(
2569 'Requested formats are incompatible for merge and will be merged into mkv.')
2570 if (info_dict
['ext'] == 'webm'
2571 and self
.params
.get('writethumbnail', False)
2572 and info_dict
.get('thumbnails')):
2573 info_dict
['ext'] = 'mkv'
2574 self
.report_warning(
2575 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2577 def correct_ext(filename
):
2578 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2580 os
.path
.splitext(filename
)[0]
2581 if filename_real_ext
== old_ext
2583 return '%s.%s' % (filename_wo_ext
, info_dict
['ext'])
2585 # Ensure filename always has a correct extension for successful merge
2586 full_filename
= correct_ext(full_filename
)
2587 temp_filename
= correct_ext(temp_filename
)
2588 dl_filename
= existing_file(full_filename
, temp_filename
)
2589 info_dict
['__real_download'] = False
2591 _protocols
= set(determine_protocol(f
) for f
in requested_formats
)
2592 if len(_protocols
) == 1:
2593 info_dict
['protocol'] = _protocols
.pop()
2594 directly_mergable
= (
2595 'no-direct-merge' not in self
.params
.get('compat_opts', [])
2596 and info_dict
.get('protocol') is not None # All requested formats have same protocol
2597 and not self
.params
.get('allow_unplayable_formats')
2598 and get_suitable_downloader(info_dict
, self
.params
).__name
__ == 'FFmpegFD')
2599 if directly_mergable
:
2600 info_dict
['url'] = requested_formats
[0]['url']
2601 # Treat it as a single download
2602 dl_filename
= existing_file(full_filename
, temp_filename
)
2603 if dl_filename
is None:
2604 success
, real_download
= self
.dl(temp_filename
, info_dict
)
2605 info_dict
['__real_download'] = real_download
2608 merger
= FFmpegMergerPP(self
)
2609 if self
.params
.get('allow_unplayable_formats'):
2610 self
.report_warning(
2611 'You have requested merging of multiple formats '
2612 'while also allowing unplayable formats to be downloaded. '
2613 'The formats won\'t be merged to prevent data corruption.')
2614 elif not merger
.available
:
2615 self
.report_warning(
2616 'You have requested merging of multiple formats but ffmpeg is not installed. '
2617 'The formats won\'t be merged.')
2619 if dl_filename
is None:
2620 for f
in requested_formats
:
2621 new_info
= dict(info_dict
)
2622 del new_info
['requested_formats']
2624 fname
= prepend_extension(
2625 self
.prepare_filename(new_info
, 'temp'),
2626 'f%s' % f
['format_id'], new_info
['ext'])
2627 if not self
._ensure
_dir
_exists
(fname
):
2629 downloaded
.append(fname
)
2630 partial_success
, real_download
= self
.dl(fname
, new_info
)
2631 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
2632 success
= success
and partial_success
2633 if merger
.available
and not self
.params
.get('allow_unplayable_formats'):
2634 info_dict
['__postprocessors'].append(merger
)
2635 info_dict
['__files_to_merge'] = downloaded
2636 # Even if there were no downloads, it is being merged only now
2637 info_dict
['__real_download'] = True
2639 for file in downloaded
:
2640 files_to_move
[file] = None
2642 # Just a single file
2643 dl_filename
= existing_file(full_filename
, temp_filename
)
2644 if dl_filename
is None:
2645 success
, real_download
= self
.dl(temp_filename
, info_dict
)
2646 info_dict
['__real_download'] = real_download
2648 dl_filename
= dl_filename
or temp_filename
2649 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2651 except network_exceptions
as err
:
2652 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2654 except (OSError, IOError) as err
:
2655 raise UnavailableVideoError(err
)
2656 except (ContentTooShortError
, ) as err
:
2657 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2660 if success
and full_filename
!= '-':
2662 fixup_policy
= self
.params
.get('fixup')
2663 if fixup_policy
is None:
2664 fixup_policy
= 'detect_or_warn'
2666 INSTALL_FFMPEG_MESSAGE
= 'Install ffmpeg to fix this automatically.'
2668 stretched_ratio
= info_dict
.get('stretched_ratio')
2669 if stretched_ratio
is not None and stretched_ratio
!= 1:
2670 if fixup_policy
== 'warn':
2671 self
.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2672 info_dict
['id'], stretched_ratio
))
2673 elif fixup_policy
== 'detect_or_warn':
2674 stretched_pp
= FFmpegFixupStretchedPP(self
)
2675 if stretched_pp
.available
:
2676 info_dict
['__postprocessors'].append(stretched_pp
)
2678 self
.report_warning(
2679 '%s: Non-uniform pixel ratio (%s). %s'
2680 % (info_dict
['id'], stretched_ratio
, INSTALL_FFMPEG_MESSAGE
))
2682 assert fixup_policy
in ('ignore', 'never')
2684 if (info_dict
.get('requested_formats') is None
2685 and info_dict
.get('container') == 'm4a_dash'
2686 and info_dict
.get('ext') == 'm4a'):
2687 if fixup_policy
== 'warn':
2688 self
.report_warning(
2689 '%s: writing DASH m4a. '
2690 'Only some players support this container.'
2692 elif fixup_policy
== 'detect_or_warn':
2693 fixup_pp
= FFmpegFixupM4aPP(self
)
2694 if fixup_pp
.available
:
2695 info_dict
['__postprocessors'].append(fixup_pp
)
2697 self
.report_warning(
2698 '%s: writing DASH m4a. '
2699 'Only some players support this container. %s'
2700 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2702 assert fixup_policy
in ('ignore', 'never')
2704 if ('protocol' in info_dict
2705 and get_suitable_downloader(info_dict
, self
.params
).__name
__ == 'HlsFD'):
2706 if fixup_policy
== 'warn':
2707 self
.report_warning('%s: malformed AAC bitstream detected.' % (
2709 elif fixup_policy
== 'detect_or_warn':
2710 fixup_pp
= FFmpegFixupM3u8PP(self
)
2711 if fixup_pp
.available
:
2712 info_dict
['__postprocessors'].append(fixup_pp
)
2714 self
.report_warning(
2715 '%s: malformed AAC bitstream detected. %s'
2716 % (info_dict
['id'], INSTALL_FFMPEG_MESSAGE
))
2718 assert fixup_policy
in ('ignore', 'never')
2721 info_dict
= self
.post_process(dl_filename
, info_dict
, files_to_move
)
2722 except PostProcessingError
as err
:
2723 self
.report_error('Postprocessing: %s' % str(err
))
2726 for ph
in self
._post
_hooks
:
2727 ph(info_dict
['filepath'])
2728 except Exception as err
:
2729 self
.report_error('post hooks: %s' % str(err
))
2731 must_record_download_archive
= True
2733 if must_record_download_archive
or self
.params
.get('force_write_download_archive', False):
2734 self
.record_download_archive(info_dict
)
2735 max_downloads
= self
.params
.get('max_downloads')
2736 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
2737 raise MaxDownloadsReached()
2739 def download(self
, url_list
):
2740 """Download a given list of URLs."""
2741 outtmpl
= self
.outtmpl_dict
['default']
2742 if (len(url_list
) > 1
2744 and '%' not in outtmpl
2745 and self
.params
.get('max_downloads') != 1):
2746 raise SameFileError(outtmpl
)
2748 for url
in url_list
:
2750 # It also downloads the videos
2751 res
= self
.extract_info(
2752 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2753 except UnavailableVideoError
:
2754 self
.report_error('unable to download video')
2755 except MaxDownloadsReached
:
2756 self
.to_screen('[info] Maximum number of downloaded files reached')
2758 except ExistingVideoReached
:
2759 self
.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2761 except RejectedVideoReached
:
2762 self
.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2765 if self
.params
.get('dump_single_json', False):
2766 self
.post_extract(res
)
2767 self
.to_stdout(json
.dumps(res
, default
=repr))
2769 return self
._download
_retcode
2771 def download_with_info_file(self
, info_filename
):
2772 with contextlib
.closing(fileinput
.FileInput(
2773 [info_filename
], mode
='r',
2774 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2775 # FileInput doesn't have a read method, we can't call json.load
2776 info
= self
.filter_requested_info(json
.loads('\n'.join(f
)), self
.params
.get('clean_infojson', True))
2778 self
.process_ie_result(info
, download
=True)
2779 except (DownloadError
, EntryNotInPlaylist
):
2780 webpage_url
= info
.get('webpage_url')
2781 if webpage_url
is not None:
2782 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2783 return self
.download([webpage_url
])
2786 return self
._download
_retcode
2789 def filter_requested_info(info_dict
, actually_filter
=True):
2790 remove_keys
= ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2791 keep_keys
= ['_type'], # Always keep this to facilitate load-info-json
2793 remove_keys
+= ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2794 empty_values
= (None, {}, [], set(), tuple())
2795 reject
= lambda k
, v
: k
not in keep_keys
and (
2796 k
.startswith('_') or k
in remove_keys
or v
in empty_values
)
2798 info_dict
['epoch'] = int(time
.time())
2799 reject
= lambda k
, v
: k
in remove_keys
2800 filter_fn
= lambda obj
: (
2801 list(map(filter_fn
, obj
)) if isinstance(obj
, (list, tuple, set))
2802 else obj
if not isinstance(obj
, dict)
2803 else dict((k
, filter_fn(v
)) for k
, v
in obj
.items() if not reject(k
, v
)))
2804 return filter_fn(info_dict
)
2806 def run_pp(self
, pp
, infodict
):
2807 files_to_delete
= []
2808 if '__files_to_move' not in infodict
:
2809 infodict
['__files_to_move'] = {}
2810 files_to_delete
, infodict
= pp
.run(infodict
)
2811 if not files_to_delete
:
2814 if self
.params
.get('keepvideo', False):
2815 for f
in files_to_delete
:
2816 infodict
['__files_to_move'].setdefault(f
, '')
2818 for old_filename
in set(files_to_delete
):
2819 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
2821 os
.remove(encodeFilename(old_filename
))
2822 except (IOError, OSError):
2823 self
.report_warning('Unable to remove downloaded original file')
2824 if old_filename
in infodict
['__files_to_move']:
2825 del infodict
['__files_to_move'][old_filename
]
2829 def post_extract(info_dict
):
2830 def actual_post_extract(info_dict
):
2831 if info_dict
.get('_type') in ('playlist', 'multi_video'):
2832 for video_dict
in info_dict
.get('entries', {}):
2833 actual_post_extract(video_dict
or {})
2836 post_extractor
= info_dict
.get('__post_extractor') or (lambda: {})
2837 extra
= post_extractor().items()
2838 info_dict
.update(extra
)
2839 info_dict
.pop('__post_extractor', None)
2841 original_infodict
= info_dict
.get('__original_infodict') or {}
2842 original_infodict
.update(extra
)
2843 original_infodict
.pop('__post_extractor', None)
2845 actual_post_extract(info_dict
or {})
2847 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
2848 info
= dict(ie_info
)
2849 info
['__files_to_move'] = files_to_move
or {}
2850 for pp
in self
._pps
[key
]:
2851 info
= self
.run_pp(pp
, info
)
2852 return info
, info
.pop('__files_to_move', None)
2854 def post_process(self
, filename
, ie_info
, files_to_move
=None):
2855 """Run all the postprocessors on the given file."""
2856 info
= dict(ie_info
)
2857 info
['filepath'] = filename
2858 info
['__files_to_move'] = files_to_move
or {}
2860 for pp
in ie_info
.get('__postprocessors', []) + self
._pps
['post_process']:
2861 info
= self
.run_pp(pp
, info
)
2862 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
2863 del info
['__files_to_move']
2864 for pp
in self
._pps
['after_move']:
2865 info
= self
.run_pp(pp
, info
)
2868 def _make_archive_id(self
, info_dict
):
2869 video_id
= info_dict
.get('id')
2872 # Future-proof against any change in case
2873 # and backwards compatibility with prior versions
2874 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
2875 if extractor
is None:
2876 url
= str_or_none(info_dict
.get('url'))
2879 # Try to find matching extractor for the URL and take its ie_key
2880 for ie
in self
._ies
:
2881 if ie
.suitable(url
):
2882 extractor
= ie
.ie_key()
2886 return '%s %s' % (extractor
.lower(), video_id
)
2888 def in_download_archive(self
, info_dict
):
2889 fn
= self
.params
.get('download_archive')
2893 vid_id
= self
._make
_archive
_id
(info_dict
)
2895 return False # Incomplete video information
2897 return vid_id
in self
.archive
2899 def record_download_archive(self
, info_dict
):
2900 fn
= self
.params
.get('download_archive')
2903 vid_id
= self
._make
_archive
_id
(info_dict
)
2905 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
2906 archive_file
.write(vid_id
+ '\n')
2907 self
.archive
.add(vid_id
)
2910 def format_resolution(format
, default
='unknown'):
2911 if format
.get('vcodec') == 'none':
2913 if format
.get('resolution') is not None:
2914 return format
['resolution']
2915 if format
.get('width') and format
.get('height'):
2916 res
= '%dx%d' % (format
['width'], format
['height'])
2917 elif format
.get('height'):
2918 res
= '%sp' % format
['height']
2919 elif format
.get('width'):
2920 res
= '%dx?' % format
['width']
2925 def _format_note(self
, fdict
):
2927 if fdict
.get('ext') in ['f4f', 'f4m']:
2928 res
+= '(unsupported) '
2929 if fdict
.get('language'):
2932 res
+= '[%s] ' % fdict
['language']
2933 if fdict
.get('format_note') is not None:
2934 res
+= fdict
['format_note'] + ' '
2935 if fdict
.get('tbr') is not None:
2936 res
+= '%4dk ' % fdict
['tbr']
2937 if fdict
.get('container') is not None:
2940 res
+= '%s container' % fdict
['container']
2941 if (fdict
.get('vcodec') is not None
2942 and fdict
.get('vcodec') != 'none'):
2945 res
+= fdict
['vcodec']
2946 if fdict
.get('vbr') is not None:
2948 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
2950 if fdict
.get('vbr') is not None:
2951 res
+= '%4dk' % fdict
['vbr']
2952 if fdict
.get('fps') is not None:
2955 res
+= '%sfps' % fdict
['fps']
2956 if fdict
.get('acodec') is not None:
2959 if fdict
['acodec'] == 'none':
2962 res
+= '%-5s' % fdict
['acodec']
2963 elif fdict
.get('abr') is not None:
2967 if fdict
.get('abr') is not None:
2968 res
+= '@%3dk' % fdict
['abr']
2969 if fdict
.get('asr') is not None:
2970 res
+= ' (%5dHz)' % fdict
['asr']
2971 if fdict
.get('filesize') is not None:
2974 res
+= format_bytes(fdict
['filesize'])
2975 elif fdict
.get('filesize_approx') is not None:
2978 res
+= '~' + format_bytes(fdict
['filesize_approx'])
2981 def _format_note_table(self
, f
):
2982 def join_fields(*vargs
):
2983 return ', '.join((val
for val
in vargs
if val
!= ''))
2986 'UNSUPPORTED' if f
.get('ext') in ('f4f', 'f4m') else '',
2987 format_field(f
, 'language', '[%s]'),
2988 format_field(f
, 'format_note'),
2989 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
2990 format_field(f
, 'asr', '%5dHz'))
2992 def list_formats(self
, info_dict
):
2993 formats
= info_dict
.get('formats', [info_dict
])
2995 'list-formats' not in self
.params
.get('compat_opts', [])
2996 and self
.params
.get('list_formats_as_table', True) is not False)
3000 format_field(f
, 'format_id'),
3001 format_field(f
, 'ext'),
3002 self
.format_resolution(f
),
3003 format_field(f
, 'fps', '%d'),
3005 format_field(f
, 'filesize', ' %s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~%s', func
=format_bytes
),
3006 format_field(f
, 'tbr', '%4dk'),
3007 shorten_protocol_name(f
.get('protocol', '').replace("native", "n")),
3009 format_field(f
, 'vcodec', default
='unknown').replace('none', ''),
3010 format_field(f
, 'vbr', '%4dk'),
3011 format_field(f
, 'acodec', default
='unknown').replace('none', ''),
3012 format_field(f
, 'abr', '%3dk'),
3013 format_field(f
, 'asr', '%5dHz'),
3014 self
._format
_note
_table
(f
)]
3016 if f
.get('preference') is None or f
['preference'] >= -1000]
3017 header_line
= ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3018 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
3022 format_field(f
, 'format_id'),
3023 format_field(f
, 'ext'),
3024 self
.format_resolution(f
),
3025 self
._format
_note
(f
)]
3027 if f
.get('preference') is None or f
['preference'] >= -1000]
3028 header_line
= ['format code', 'extension', 'resolution', 'note']
3031 '[info] Available formats for %s:\n%s' % (info_dict
['id'], render_table(
3035 extraGap
=(0 if new_format
else 1),
3036 hideEmpty
=new_format
)))
3038 def list_thumbnails(self
, info_dict
):
3039 thumbnails
= info_dict
.get('thumbnails')
3041 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
3045 '[info] Thumbnails for %s:' % info_dict
['id'])
3046 self
.to_screen(render_table(
3047 ['ID', 'width', 'height', 'URL'],
3048 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
3050 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3052 self
.to_screen('%s has no %s' % (video_id
, name
))
3055 'Available %s for %s:' % (name
, video_id
))
3057 def _row(lang
, formats
):
3058 exts
, names
= zip(*((f
['ext'], f
.get('name', 'unknown')) for f
in reversed(formats
)))
3059 if len(set(names
)) == 1:
3060 names
= [] if names
[0] == 'unknown' else names
[:1]
3061 return [lang
, ', '.join(names
), ', '.join(exts
)]
3063 self
.to_screen(render_table(
3064 ['Language', 'Name', 'Formats'],
3065 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3068 def urlopen(self
, req
):
3069 """ Start an HTTP download """
3070 if isinstance(req
, compat_basestring
):
3071 req
= sanitized_Request(req
)
3072 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
3074 def print_debug_header(self
):
3075 if not self
.params
.get('verbose'):
3078 if type('') is not compat_str
:
3079 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3080 self
.report_warning(
3081 'Your Python is broken! Update to a newer and supported version')
3083 stdout_encoding
= getattr(
3084 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
3086 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3087 locale
.getpreferredencoding(),
3088 sys
.getfilesystemencoding(),
3090 self
.get_encoding()))
3091 write_string(encoding_str
, encoding
=None)
3094 '(exe)' if hasattr(sys
, 'frozen')
3095 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter
)
3096 else '(source)' if os
.path
.basename(sys
.argv
[0]) == '__main__.py'
3098 self
._write
_string
('[debug] yt-dlp version %s %s\n' % (__version__
, source
))
3100 self
._write
_string
('[debug] Lazy loading extractors enabled\n')
3103 '[debug] Plugin Extractors: %s\n' % [ie
.ie_key() for ie
in _PLUGIN_CLASSES
])
3104 if self
.params
.get('compat_opts'):
3106 '[debug] Compatibility options: %s\n' % ', '.join(self
.params
.get('compat_opts')))
3108 sp
= subprocess
.Popen(
3109 ['git', 'rev-parse', '--short', 'HEAD'],
3110 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
3111 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
3112 out
, err
= process_communicate_or_kill(sp
)
3113 out
= out
.decode().strip()
3114 if re
.match('[0-9a-f]+', out
):
3115 self
._write
_string
('[debug] Git HEAD: %s\n' % out
)
3122 def python_implementation():
3123 impl_name
= platform
.python_implementation()
3124 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
3125 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
3128 self
._write
_string
('[debug] Python version %s (%s %s) - %s\n' % (
3129 platform
.python_version(),
3130 python_implementation(),
3131 platform
.architecture()[0],
3134 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
3135 exe_versions
['rtmpdump'] = rtmpdump_version()
3136 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3137 exe_str
= ', '.join(
3139 for exe
, v
in sorted(exe_versions
.items())
3144 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
3147 for handler
in self
._opener
.handlers
:
3148 if hasattr(handler
, 'proxies'):
3149 proxy_map
.update(handler
.proxies
)
3150 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
3152 if self
.params
.get('call_home', False):
3153 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3154 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
3156 latest_version
= self
.urlopen(
3157 'https://yt-dl.org/latest/version').read().decode('utf-8')
3158 if version_tuple(latest_version
) > version_tuple(__version__
):
3159 self
.report_warning(
3160 'You are using an outdated version (newest version: %s)! '
3161 'See https://yt-dl.org/update if you need help updating.' %
3164 def _setup_opener(self
):
3165 timeout_val
= self
.params
.get('socket_timeout')
3166 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
3168 opts_cookiefile
= self
.params
.get('cookiefile')
3169 opts_proxy
= self
.params
.get('proxy')
3171 if opts_cookiefile
is None:
3172 self
.cookiejar
= compat_cookiejar
.CookieJar()
3174 opts_cookiefile
= expand_path(opts_cookiefile
)
3175 self
.cookiejar
= YoutubeDLCookieJar(opts_cookiefile
)
3176 if os
.access(opts_cookiefile
, os
.R_OK
):
3177 self
.cookiejar
.load(ignore_discard
=True, ignore_expires
=True)
3179 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
3180 if opts_proxy
is not None:
3181 if opts_proxy
== '':
3184 proxies
= {'http': opts_proxy, 'https': opts_proxy}
3186 proxies
= compat_urllib_request
.getproxies()
3187 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3188 if 'http' in proxies
and 'https' not in proxies
:
3189 proxies
['https'] = proxies
['http']
3190 proxy_handler
= PerRequestProxyHandler(proxies
)
3192 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
3193 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
3194 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
3195 redirect_handler
= YoutubeDLRedirectHandler()
3196 data_handler
= compat_urllib_request_DataHandler()
3198 # When passing our own FileHandler instance, build_opener won't add the
3199 # default FileHandler and allows us to disable the file protocol, which
3200 # can be used for malicious purposes (see
3201 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3202 file_handler
= compat_urllib_request
.FileHandler()
3204 def file_open(*args
, **kwargs
):
3205 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3206 file_handler
.file_open
= file_open
3208 opener
= compat_urllib_request
.build_opener(
3209 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
3211 # Delete the default user-agent header, which would otherwise apply in
3212 # cases where our custom HTTP handler doesn't come into play
3213 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3214 opener
.addheaders
= []
3215 self
._opener
= opener
3217 def encode(self
, s
):
3218 if isinstance(s
, bytes):
3219 return s
# Already encoded
3222 return s
.encode(self
.get_encoding())
3223 except UnicodeEncodeError as err
:
3224 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
3227 def get_encoding(self
):
3228 encoding
= self
.params
.get('encoding')
3229 if encoding
is None:
3230 encoding
= preferredencoding()
3233 def _write_thumbnails(self
, info_dict
, filename
): # return the extensions
3234 write_all
= self
.params
.get('write_all_thumbnails', False)
3236 if write_all
or self
.params
.get('writethumbnail', False):
3237 thumbnails
= info_dict
.get('thumbnails') or []
3238 multiple
= write_all
and len(thumbnails
) > 1
3241 for t
in thumbnails
[::1 if write_all
else -1]:
3242 thumb_ext
= determine_ext(t
['url'], 'jpg')
3243 suffix
= '%s.' % t
['id'] if multiple
else ''
3244 thumb_display_id
= '%s ' % t
['id'] if multiple
else ''
3245 thumb_filename
= replace_extension(filename
, suffix
+ thumb_ext
, info_dict
.get('ext'))
3247 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(thumb_filename
)):
3248 ret
.append(suffix
+ thumb_ext
)
3249 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
3250 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
3252 self
.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3253 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
3255 uf
= self
.urlopen(t
['url'])
3256 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
3257 shutil
.copyfileobj(uf
, thumbf
)
3258 ret
.append(suffix
+ thumb_ext
)
3259 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3260 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
3261 t
['filepath'] = thumb_filename
3262 except network_exceptions
as err
:
3263 self
.report_warning('Unable to download thumbnail "%s": %s' %
3264 (t
['url'], error_to_compat_str(err
)))
3265 if ret
and not write_all
: