4 from __future__
import absolute_import
, unicode_literals
29 from string
import ascii_letters
30 from zipimport
import zipimporter
34 compat_get_terminal_size
,
39 compat_tokenize_tokenize
,
41 compat_urllib_request
,
42 compat_urllib_request_DataHandler
,
44 from .cookies
import load_cookies
54 DOT_DESKTOP_LINK_TEMPLATE
,
55 DOT_URL_LINK_TEMPLATE
,
56 DOT_WEBLOC_LINK_TEMPLATE
,
85 PerRequestProxyHandler
,
90 process_communicate_or_kill
,
91 register_socks_protocols
,
108 UnavailableVideoError
,
113 YoutubeDLCookieProcessor
,
115 YoutubeDLRedirectHandler
,
117 from .cache
import Cache
118 from .extractor
import (
119 gen_extractor_classes
,
124 from .extractor
.openload
import PhantomJSwrapper
125 from .downloader
import (
126 get_suitable_downloader
,
127 shorten_protocol_name
129 from .downloader
.rtmp
import rtmpdump_version
130 from .postprocessor
import (
132 FFmpegFixupDurationPP
,
135 FFmpegFixupStretchedPP
,
136 FFmpegFixupTimestampPP
,
139 MoveFilesAfterDownloadPP
,
141 from .version
import __version__
143 if compat_os_name
== 'nt':
147 class YoutubeDL(object):
150 YoutubeDL objects are the ones responsible of downloading the
151 actual video file and writing it to disk if the user has requested
152 it, among some other tasks. In most cases there should be one per
153 program. As, given a video URL, the downloader doesn't know how to
154 extract all the needed information, task that InfoExtractors do, it
155 has to pass the URL to one of them.
157 For this, YoutubeDL objects have a method that allows
158 InfoExtractors to be registered in a given order. When it is passed
159 a URL, the YoutubeDL object handles it to the first InfoExtractor it
160 finds that reports being able to handle it. The InfoExtractor extracts
161 all the information about the video or videos the URL refers to, and
162 YoutubeDL process the extracted information, possibly using a File
163 Downloader to download the video.
165 YoutubeDL objects accept a lot of parameters. In order not to saturate
166 the object constructor with arguments, it receives a dictionary of
167 options instead. These options are available through the params
168 attribute for the InfoExtractors to use. The YoutubeDL also
169 registers itself as the downloader in charge for the InfoExtractors
170 that are added to it, so this is a "mutual registration".
174 username: Username for authentication purposes.
175 password: Password for authentication purposes.
176 videopassword: Password for accessing a video.
177 ap_mso: Adobe Pass multiple-system operator identifier.
178 ap_username: Multiple-system operator account username.
179 ap_password: Multiple-system operator account password.
180 usenetrc: Use netrc for authentication instead.
181 verbose: Print additional info to stdout.
182 quiet: Do not print messages to stdout.
183 no_warnings: Do not print out anything for warnings.
184 forceprint: A list of templates to force print
185 forceurl: Force printing final URL. (Deprecated)
186 forcetitle: Force printing title. (Deprecated)
187 forceid: Force printing ID. (Deprecated)
188 forcethumbnail: Force printing thumbnail URL. (Deprecated)
189 forcedescription: Force printing description. (Deprecated)
190 forcefilename: Force printing final filename. (Deprecated)
191 forceduration: Force printing duration. (Deprecated)
192 forcejson: Force printing info_dict as JSON.
193 dump_single_json: Force printing the info_dict of the whole playlist
194 (or video) as a single JSON line.
195 force_write_download_archive: Force writing download archive regardless
196 of 'skip_download' or 'simulate'.
197 simulate: Do not download the video files.
198 format: Video format code. see "FORMAT SELECTION" for more details.
199 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
200 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
201 extracting metadata even if the video is not actually
202 available for download (experimental)
203 format_sort: How to sort the video formats. see "Sorting Formats"
205 format_sort_force: Force the given format_sort. see "Sorting Formats"
207 allow_multiple_video_streams: Allow multiple video streams to be merged
209 allow_multiple_audio_streams: Allow multiple audio streams to be merged
211 check_formats Whether to test if the formats are downloadable.
212 Can be True (check all), False (check none)
213 or None (check only if requested by extractor)
214 paths: Dictionary of output paths. The allowed keys are 'home'
215 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
216 outtmpl: Dictionary of templates for output names. Allowed keys
217 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
218 A string a also accepted for backward compatibility
219 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
220 restrictfilenames: Do not allow "&" and spaces in file names
221 trim_file_name: Limit length of filename (extension excluded)
222 windowsfilenames: Force the filenames to be windows compatible
223 ignoreerrors: Do not stop on download errors
224 (Default True when running yt-dlp,
225 but False when directly accessing YoutubeDL class)
226 skip_playlist_after_errors: Number of allowed failures until the rest of
227 the playlist is skipped
228 force_generic_extractor: Force downloader to use the generic extractor
229 overwrites: Overwrite all video and metadata files if True,
230 overwrite only non-video files if None
231 and don't overwrite any file if False
232 playliststart: Playlist item to start at.
233 playlistend: Playlist item to end at.
234 playlist_items: Specific indices of playlist to download.
235 playlistreverse: Download playlist items in reverse order.
236 playlistrandom: Download playlist items in random order.
237 matchtitle: Download only matching titles.
238 rejecttitle: Reject downloads for matching titles.
239 logger: Log messages to a logging.Logger instance.
240 logtostderr: Log messages to stderr instead of stdout.
241 writedescription: Write the video description to a .description file
242 writeinfojson: Write the video description to a .info.json file
243 clean_infojson: Remove private fields from the infojson
244 writecomments: Extract video comments. This will not be written to disk
245 unless writeinfojson is also given
246 writeannotations: Write the video annotations to a .annotations.xml file
247 writethumbnail: Write the thumbnail image to a file
248 allow_playlist_files: Whether to write playlists' description, infojson etc
249 also to disk when using the 'write*' options
250 write_all_thumbnails: Write all thumbnail formats to files
251 writelink: Write an internet shortcut file, depending on the
252 current platform (.url/.webloc/.desktop)
253 writeurllink: Write a Windows internet shortcut file (.url)
254 writewebloclink: Write a macOS internet shortcut file (.webloc)
255 writedesktoplink: Write a Linux internet shortcut file (.desktop)
256 writesubtitles: Write the video subtitles to a file
257 writeautomaticsub: Write the automatically generated subtitles to a file
258 allsubtitles: Deprecated - Use subtitlelangs = ['all']
259 Downloads all the subtitles of the video
260 (requires writesubtitles or writeautomaticsub)
261 listsubtitles: Lists all available subtitles for the video
262 subtitlesformat: The format code for subtitles
263 subtitleslangs: List of languages of the subtitles to download (can be regex).
264 The list may contain "all" to refer to all the available
265 subtitles. The language can be prefixed with a "-" to
266 exclude it from the requested languages. Eg: ['all', '-live_chat']
267 keepvideo: Keep the video file after post-processing
268 daterange: A DateRange object, download only if the upload_date is in the range.
269 skip_download: Skip the actual download of the video file
270 cachedir: Location of the cache files in the filesystem.
271 False to disable filesystem cache.
272 noplaylist: Download single video instead of a playlist if in doubt.
273 age_limit: An integer representing the user's age in years.
274 Unsuitable videos for the given age are skipped.
275 min_views: An integer representing the minimum view count the video
276 must have in order to not be skipped.
277 Videos without view count information are always
278 downloaded. None for no limit.
279 max_views: An integer representing the maximum view count.
280 Videos that are more popular than that are not
282 Videos without view count information are always
283 downloaded. None for no limit.
284 download_archive: File name of a file where all downloads are recorded.
285 Videos already present in the file are not downloaded
287 break_on_existing: Stop the download process after attempting to download a
288 file that is in the archive.
289 break_on_reject: Stop the download process when encountering a video that
290 has been filtered out.
291 cookiefile: File name where cookies should be read from and dumped to
292 cookiesfrombrowser: A tuple containing the name of the browser and the profile
293 name/path from where cookies are loaded.
294 Eg: ('chrome', ) or (vivaldi, 'default')
295 nocheckcertificate:Do not verify SSL certificates
296 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
297 At the moment, this is only supported by YouTube.
298 proxy: URL of the proxy server to use
299 geo_verification_proxy: URL of the proxy to use for IP address verification
300 on geo-restricted sites.
301 socket_timeout: Time to wait for unresponsive hosts, in seconds
302 bidi_workaround: Work around buggy terminals without bidirectional text
303 support, using fridibi
304 debug_printtraffic:Print out sent and received HTTP traffic
305 include_ads: Download ads as well
306 default_search: Prepend this string if an input url is not valid.
307 'auto' for elaborate guessing
308 encoding: Use this encoding instead of the system-specified.
309 extract_flat: Do not resolve URLs, return the immediate result.
310 Pass in 'in_playlist' to only show this behavior for
312 postprocessors: A list of dictionaries, each with an entry
313 * key: The name of the postprocessor. See
314 yt_dlp/postprocessor/__init__.py for a list.
315 * when: When to run the postprocessor. Can be one of
316 pre_process|before_dl|post_process|after_move.
317 Assumed to be 'post_process' if not given
318 post_hooks: A list of functions that get called as the final step
319 for each video file, after all postprocessors have been
320 called. The filename will be passed as the only argument.
321 progress_hooks: A list of functions that get called on download
322 progress, with a dictionary with the entries
323 * status: One of "downloading", "error", or "finished".
324 Check this first and ignore unknown values.
326 If status is one of "downloading", or "finished", the
327 following properties may also be present:
328 * filename: The final filename (always present)
329 * tmpfilename: The filename we're currently writing to
330 * downloaded_bytes: Bytes on disk
331 * total_bytes: Size of the whole file, None if unknown
332 * total_bytes_estimate: Guess of the eventual file size,
334 * elapsed: The number of seconds since download started.
335 * eta: The estimated time in seconds, None if unknown
336 * speed: The download speed in bytes/second, None if
338 * fragment_index: The counter of the currently
339 downloaded video fragment.
340 * fragment_count: The number of fragments (= individual
341 files that will be merged)
343 Progress hooks are guaranteed to be called at least once
344 (with status "finished") if the download is successful.
345 merge_output_format: Extension to use when merging formats.
346 final_ext: Expected final extension; used to detect when the file was
347 already downloaded and converted. "merge_output_format" is
348 replaced by this extension when given
349 fixup: Automatically correct known faults of the file.
351 - "never": do nothing
352 - "warn": only emit a warning
353 - "detect_or_warn": check whether we can do anything
354 about it, warn otherwise (default)
355 source_address: Client-side IP address to bind to.
356 call_home: Boolean, true iff we are allowed to contact the
357 yt-dlp servers for debugging. (BROKEN)
358 sleep_interval_requests: Number of seconds to sleep between requests
360 sleep_interval: Number of seconds to sleep before each download when
361 used alone or a lower bound of a range for randomized
362 sleep before each download (minimum possible number
363 of seconds to sleep) when used along with
365 max_sleep_interval:Upper bound of a range for randomized sleep before each
366 download (maximum possible number of seconds to sleep).
367 Must only be used along with sleep_interval.
368 Actual sleep time will be a random float from range
369 [sleep_interval; max_sleep_interval].
370 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
371 listformats: Print an overview of available video formats and exit.
372 list_thumbnails: Print a table of all thumbnails and exit.
373 match_filter: A function that gets called with the info_dict of
375 If it returns a message, the video is ignored.
376 If it returns None, the video is downloaded.
377 match_filter_func in utils.py is one example for this.
378 no_color: Do not emit color codes in output.
379 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
382 Two-letter ISO 3166-2 country code that will be used for
383 explicit geographic restriction bypassing via faking
384 X-Forwarded-For HTTP header
386 IP range in CIDR notation that will be used similarly to
389 The following options determine which downloader is picked:
390 external_downloader: A dictionary of protocol keys and the executable of the
391 external downloader to use for it. The allowed protocols
392 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
393 Set the value to 'native' to use the native downloader
394 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
395 or {'m3u8': 'ffmpeg'} instead.
396 Use the native HLS downloader instead of ffmpeg/avconv
397 if True, otherwise use ffmpeg/avconv if False, otherwise
398 use downloader suggested by extractor if None.
399 compat_opts: Compatibility options. See "Differences in default behavior".
400 The following options do not work when used through the API:
401 filename, abort-on-error, multistreams, no-live-chat,
402 no-playlist-metafiles. Refer __init__.py for their implementation
404 The following parameters are not used by YoutubeDL itself, they are used by
405 the downloader (see yt_dlp/downloader/common.py):
406 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
407 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
408 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
410 The following options are used by the post processors:
411 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
412 otherwise prefer ffmpeg. (avconv support is deprecated)
413 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
414 to the binary or its containing directory.
415 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
416 and a list of additional command-line arguments for the
417 postprocessor/executable. The dict can also have "PP+EXE" keys
418 which are used when the given exe is used by the given PP.
419 Use 'default' as the name for arguments to passed to all PP
421 The following options are used by the extractors:
422 extractor_retries: Number of times to retry for known errors
423 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
424 hls_split_discontinuity: Split HLS playlists to different formats at
425 discontinuities such as ad breaks (default: False)
426 extractor_args: A dictionary of arguments to be passed to the extractors.
427 See "EXTRACTOR ARGUMENTS" for details.
428 Eg: {'youtube': {'skip': ['dash', 'hls']}}
429 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
430 If True (default), DASH manifests and related
431 data will be downloaded and processed by extractor.
432 You can reduce network I/O by disabling it if you don't
433 care about DASH. (only for youtube)
434 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
435 If True (default), HLS manifests and related
436 data will be downloaded and processed by extractor.
437 You can reduce network I/O by disabling it if you don't
438 care about HLS. (only for youtube)
441 _NUMERIC_FIELDS
= set((
442 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
443 'timestamp', 'upload_year', 'upload_month', 'upload_day',
444 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
445 'average_rating', 'comment_count', 'age_limit',
446 'start_time', 'end_time',
447 'chapter_number', 'season_number', 'episode_number',
448 'track_number', 'disc_number', 'release_year',
454 _pps
= {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
455 _printed_messages
= set()
456 _first_webpage_request
= True
457 _download_retcode
= None
458 _num_downloads
= None
460 _playlist_urls
= set()
463 def __init__(self
, params
=None, auto_init
=True):
464 """Create a FileDownloader object with the given options."""
468 self
._ies
_instances
= {}
469 self
._pps
= {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
470 self
._printed
_messages
= set()
471 self
._first
_webpage
_request
= True
472 self
._post
_hooks
= []
473 self
._progress
_hooks
= []
474 self
._download
_retcode
= 0
475 self
._num
_downloads
= 0
476 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
477 self
._err
_file
= sys
.stderr
480 'nocheckcertificate': False,
482 self
.params
.update(params
)
483 self
.cache
= Cache(self
)
485 if sys
.version_info
< (3, 6):
487 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys
.version_info
[:2])
489 def check_deprecated(param
, option
, suggestion
):
490 if self
.params
.get(param
) is not None:
491 self
.report_warning('%s is deprecated. Use %s instead' % (option
, suggestion
))
495 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
496 if self
.params
.get('geo_verification_proxy') is None:
497 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
499 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
500 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
501 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
503 for msg
in self
.params
.get('warnings', []):
504 self
.report_warning(msg
)
506 if self
.params
.get('final_ext'):
507 if self
.params
.get('merge_output_format'):
508 self
.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
509 self
.params
['merge_output_format'] = self
.params
['final_ext']
511 if 'overwrites' in self
.params
and self
.params
['overwrites'] is None:
512 del self
.params
['overwrites']
514 if params
.get('bidi_workaround', False):
517 master
, slave
= pty
.openpty()
518 width
= compat_get_terminal_size().columns
522 width_args
= ['-w', str(width
)]
524 stdin
=subprocess
.PIPE
,
526 stderr
=self
._err
_file
)
528 self
._output
_process
= subprocess
.Popen(
529 ['bidiv'] + width_args
, **sp_kwargs
532 self
._output
_process
= subprocess
.Popen(
533 ['fribidi', '-c', 'UTF-8'] + width_args
, **sp_kwargs
)
534 self
._output
_channel
= os
.fdopen(master
, 'rb')
535 except OSError as ose
:
536 if ose
.errno
== errno
.ENOENT
:
537 self
.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
541 if (sys
.platform
!= 'win32'
542 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
543 and not params
.get('restrictfilenames', False)):
544 # Unicode filesystem API will throw errors (#1474, #13027)
546 'Assuming --restrict-filenames since file system encoding '
547 'cannot encode all characters. '
548 'Set the LC_ALL environment variable to fix this.')
549 self
.params
['restrictfilenames'] = True
551 self
.outtmpl_dict
= self
.parse_outtmpl()
553 # Creating format selector here allows us to catch syntax errors before the extraction
554 self
.format_selector
= (
555 None if self
.params
.get('format') is None
556 else self
.build_format_selector(self
.params
['format']))
560 """Preload the archive, if any is specified"""
561 def preload_download_archive(fn
):
564 self
.write_debug('Loading archive file %r\n' % fn
)
566 with locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
567 for line
in archive_file
:
568 self
.archive
.add(line
.strip())
569 except IOError as ioe
:
570 if ioe
.errno
!= errno
.ENOENT
:
576 preload_download_archive(self
.params
.get('download_archive'))
579 self
.print_debug_header()
580 self
.add_default_info_extractors()
582 for pp_def_raw
in self
.params
.get('postprocessors', []):
583 pp_def
= dict(pp_def_raw
)
584 when
= pp_def
.pop('when', 'post_process')
585 pp_class
= get_postprocessor(pp_def
.pop('key'))
586 pp
= pp_class(self
, **compat_kwargs(pp_def
))
587 self
.add_post_processor(pp
, when
=when
)
589 for ph
in self
.params
.get('post_hooks', []):
590 self
.add_post_hook(ph
)
592 for ph
in self
.params
.get('progress_hooks', []):
593 self
.add_progress_hook(ph
)
595 register_socks_protocols()
597 def warn_if_short_id(self
, argv
):
598 # short YouTube ID starting with dash?
600 i
for i
, a
in enumerate(argv
)
601 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
605 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
606 + ['--'] + [argv
[i
] for i
in idxs
]
609 'Long argument string detected. '
610 'Use -- to separate parameters and URLs, like this:\n%s\n' %
611 args_to_str(correct_argv
))
613 def add_info_extractor(self
, ie
):
614 """Add an InfoExtractor object to the end of the list."""
616 if not isinstance(ie
, type):
617 self
._ies
_instances
[ie
.ie_key()] = ie
618 ie
.set_downloader(self
)
620 def get_info_extractor(self
, ie_key
):
622 Get an instance of an IE with name ie_key, it will try to get one from
623 the _ies list, if there's no instance it will create a new one and add
624 it to the extractor list.
626 ie
= self
._ies
_instances
.get(ie_key
)
628 ie
= get_info_extractor(ie_key
)()
629 self
.add_info_extractor(ie
)
632 def add_default_info_extractors(self
):
634 Add the InfoExtractors returned by gen_extractors to the end of the list
636 for ie
in gen_extractor_classes():
637 self
.add_info_extractor(ie
)
639 def add_post_processor(self
, pp
, when
='post_process'):
640 """Add a PostProcessor object to the end of the chain."""
641 self
._pps
[when
].append(pp
)
642 pp
.set_downloader(self
)
644 def add_post_hook(self
, ph
):
645 """Add the post hook"""
646 self
._post
_hooks
.append(ph
)
648 def add_progress_hook(self
, ph
):
649 """Add the progress hook (currently only for the file downloader)"""
650 self
._progress
_hooks
.append(ph
)
652 def _bidi_workaround(self
, message
):
653 if not hasattr(self
, '_output_channel'):
656 assert hasattr(self
, '_output_process')
657 assert isinstance(message
, compat_str
)
658 line_count
= message
.count('\n') + 1
659 self
._output
_process
.stdin
.write((message
+ '\n').encode('utf-8'))
660 self
._output
_process
.stdin
.flush()
661 res
= ''.join(self
._output
_channel
.readline().decode('utf-8')
662 for _
in range(line_count
))
663 return res
[:-len('\n')]
665 def _write_string(self
, message
, out
=None, only_once
=False):
667 if message
in self
._printed
_messages
:
669 self
._printed
_messages
.add(message
)
670 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
672 def to_stdout(self
, message
, skip_eol
=False, quiet
=False):
673 """Print message to stdout"""
674 if self
.params
.get('logger'):
675 self
.params
['logger'].debug(message
)
676 elif not quiet
or self
.params
.get('verbose'):
678 '%s%s' % (self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
679 self
._err
_file
if quiet
else self
._screen
_file
)
681 def to_stderr(self
, message
, only_once
=False):
682 """Print message to stderr"""
683 assert isinstance(message
, compat_str
)
684 if self
.params
.get('logger'):
685 self
.params
['logger'].error(message
)
687 self
._write
_string
('%s\n' % self
._bidi
_workaround
(message
), self
._err
_file
, only_once
=only_once
)
689 def to_console_title(self
, message
):
690 if not self
.params
.get('consoletitle', False):
692 if compat_os_name
== 'nt':
693 if ctypes
.windll
.kernel32
.GetConsoleWindow():
694 # c_wchar_p() might not be necessary if `message` is
695 # already of type unicode()
696 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
697 elif 'TERM' in os
.environ
:
698 self
._write
_string
('\033]0;%s\007' % message
, self
._screen
_file
)
700 def save_console_title(self
):
701 if not self
.params
.get('consoletitle', False):
703 if self
.params
.get('simulate', False):
705 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
706 # Save the title on stack
707 self
._write
_string
('\033[22;0t', self
._screen
_file
)
709 def restore_console_title(self
):
710 if not self
.params
.get('consoletitle', False):
712 if self
.params
.get('simulate', False):
714 if compat_os_name
!= 'nt' and 'TERM' in os
.environ
:
715 # Restore the title from stack
716 self
._write
_string
('\033[23;0t', self
._screen
_file
)
719 self
.save_console_title()
722 def __exit__(self
, *args
):
723 self
.restore_console_title()
725 if self
.params
.get('cookiefile') is not None:
726 self
.cookiejar
.save(ignore_discard
=True, ignore_expires
=True)
728 def trouble(self
, message
=None, tb
=None):
729 """Determine action to take when a download problem appears.
731 Depending on if the downloader has been configured to ignore
732 download errors or not, this method may throw an exception or
733 not when errors are found, after printing the message.
735 tb, if given, is additional traceback information.
737 if message
is not None:
738 self
.to_stderr(message
)
739 if self
.params
.get('verbose'):
741 if sys
.exc_info()[0]: # if .trouble has been called from an except block
743 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
744 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
745 tb
+= encode_compat_str(traceback
.format_exc())
747 tb_data
= traceback
.format_list(traceback
.extract_stack())
748 tb
= ''.join(tb_data
)
751 if not self
.params
.get('ignoreerrors', False):
752 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
753 exc_info
= sys
.exc_info()[1].exc_info
755 exc_info
= sys
.exc_info()
756 raise DownloadError(message
, exc_info
)
757 self
._download
_retcode
= 1
759 def to_screen(self
, message
, skip_eol
=False):
760 """Print message to stdout if not in quiet mode"""
762 message
, skip_eol
, quiet
=self
.params
.get('quiet', False))
764 def report_warning(self
, message
, only_once
=False):
766 Print the message to stderr, it will be prefixed with 'WARNING:'
767 If stderr is a tty file the 'WARNING:' will be colored
769 if self
.params
.get('logger') is not None:
770 self
.params
['logger'].warning(message
)
772 if self
.params
.get('no_warnings'):
774 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
775 _msg_header
= '\033[0;33mWARNING:\033[0m'
777 _msg_header
= 'WARNING:'
778 warning_message
= '%s %s' % (_msg_header
, message
)
779 self
.to_stderr(warning_message
, only_once
)
781 def report_error(self
, message
, tb
=None):
783 Do the same as trouble, but prefixes the message with 'ERROR:', colored
784 in red if stderr is a tty file.
786 if not self
.params
.get('no_color') and self
._err
_file
.isatty() and compat_os_name
!= 'nt':
787 _msg_header
= '\033[0;31mERROR:\033[0m'
789 _msg_header
= 'ERROR:'
790 error_message
= '%s %s' % (_msg_header
, message
)
791 self
.trouble(error_message
, tb
)
793 def write_debug(self
, message
, only_once
=False):
794 '''Log debug message or Print message to stderr'''
795 if not self
.params
.get('verbose', False):
797 message
= '[debug] %s' % message
798 if self
.params
.get('logger'):
799 self
.params
['logger'].debug(message
)
801 self
.to_stderr(message
, only_once
)
803 def report_file_already_downloaded(self
, file_name
):
804 """Report file has already been fully downloaded."""
806 self
.to_screen('[download] %s has already been downloaded' % file_name
)
807 except UnicodeEncodeError:
808 self
.to_screen('[download] The file has already been downloaded')
810 def report_file_delete(self
, file_name
):
811 """Report that existing file will be deleted."""
813 self
.to_screen('Deleting existing file %s' % file_name
)
814 except UnicodeEncodeError:
815 self
.to_screen('Deleting existing file')
817 def parse_outtmpl(self
):
818 outtmpl_dict
= self
.params
.get('outtmpl', {})
819 if not isinstance(outtmpl_dict
, dict):
820 outtmpl_dict
= {'default': outtmpl_dict}
821 outtmpl_dict
.update({
822 k
: v
for k
, v
in DEFAULT_OUTTMPL
.items()
823 if not outtmpl_dict
.get(k
)})
824 for key
, val
in outtmpl_dict
.items():
825 if isinstance(val
, bytes):
827 'Parameter outtmpl is bytes, but should be a unicode string. '
828 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
831 def get_output_path(self
, dir_type
='', filename
=None):
832 paths
= self
.params
.get('paths', {})
833 assert isinstance(paths
, dict)
835 expand_path(paths
.get('home', '').strip()),
836 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
839 # Temporary fix for #4787
840 # 'Treat' all problem characters by passing filename through preferredencoding
841 # to workaround encoding issues with subprocess on python2 @ Windows
842 if sys
.version_info
< (3, 0) and sys
.platform
== 'win32':
843 path
= encodeFilename(path
, True).decode(preferredencoding())
844 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
847 def validate_outtmpl(tmpl
):
848 ''' @return None or Exception object '''
851 STR_FORMAT_RE
.format(''),
852 lambda mobj
: ('%' if not mobj
.group('has_key') else '') + mobj
.group(0),
854 ) % collections
.defaultdict(int)
856 except ValueError as err
:
859 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=None):
860 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
861 info_dict
= dict(info_dict
)
862 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
864 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
865 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
866 if info_dict
.get('duration', None) is not None
868 info_dict
['epoch'] = int(time
.time())
869 info_dict
['autonumber'] = self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
870 if info_dict
.get('resolution') is None:
871 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
873 # For fields playlist_index and autonumber convert all occurrences
874 # of %(field)s to %(field)0Nd for backward compatibility
875 field_size_compat_map
= {
876 'playlist_index': len(str(info_dict
.get('_last_playlist_index') or '')),
877 'autonumber': self
.params
.get('autonumber_size') or 5,
881 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE
.format('[^)]*'))
886 # Field is of the form key1.key2...
887 # where keys (except first) can be string, int or slice
888 FIELD_RE
= r
'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num
=r
'(?:-?\d+)')
889 MATH_FIELD_RE
= r
'''{field}|{num}'''.format(field
=FIELD_RE
, num
=r
'-?\d+(?:.\d+)?')
890 MATH_OPERATORS_RE
= r
'(?:%s)' % '|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys()))
891 INTERNAL_FORMAT_RE
= re
.compile(r
'''(?x)
894 (?P<maths>(?:{math_op}{math_field})*)
895 (?:>(?P<strf_format>.+?))?
896 (?:\|(?P<default>.*?))?
897 $'''.format(field
=FIELD_RE
, math_op
=MATH_OPERATORS_RE
, math_field
=MATH_FIELD_RE
))
899 get_key
= lambda k
: traverse_obj(
900 info_dict
, k
.split('.'), is_user_input
=True, traverse_string
=True)
902 def get_value(mdict
):
904 value
= get_key(mdict
['fields'])
907 value
= float_or_none(value
)
908 if value
is not None:
911 offset_key
= mdict
['maths']
913 value
= float_or_none(value
)
917 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
919 offset_key
= offset_key
[len(item
):]
921 operator
= MATH_FUNCTIONS
[item
]
923 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
924 offset
= float_or_none(item
)
926 offset
= float_or_none(get_key(item
))
928 value
= operator(value
, multiplier
* offset
)
929 except (TypeError, ZeroDivisionError):
932 # Datetime formatting
933 if mdict
['strf_format']:
934 value
= strftime_or_none(value
, mdict
['strf_format'])
938 def create_key(outer_mobj
):
939 if not outer_mobj
.group('has_key'):
940 return '%{}'.format(outer_mobj
.group(0))
942 key
= outer_mobj
.group('key')
943 fmt
= outer_mobj
.group('format')
944 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
946 value
, default
, mobj
= None, na
, {'fields': ''}
948 mobj
= mobj
.groupdict()
949 default
= mobj
['default'] if mobj
['default'] is not None else na
950 value
= get_value(mobj
)
952 if fmt
== 's' and value
is not None and key
in field_size_compat_map
.keys():
953 fmt
= '0{:d}d'.format(field_size_compat_map
[key
])
955 value
= default
if value
is None else value
958 value
= compat_str(value
)
960 value
, fmt
= default
, 's'
963 elif fmt
[-1] not in 'rs': # numeric
964 value
= float_or_none(value
)
966 value
, fmt
= default
, 's'
969 # If value is an object, sanitize might convert it to a string
970 # So we convert it to repr first
971 value
, fmt
= repr(value
), '%ss' % fmt
[:-1]
973 value
= sanitize(mobj
['fields'].split('.')[-1], value
)
975 TMPL_DICT
[key
] = value
976 return '%({key}){fmt}'.format(key
=key
, fmt
=fmt
)
978 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
980 def _prepare_filename(self
, info_dict
, tmpl_type
='default'):
982 sanitize
= lambda k
, v
: sanitize_filename(
984 restricted
=self
.params
.get('restrictfilenames'),
985 is_id
=(k
== 'id' or k
.endswith('_id')))
986 outtmpl
= self
.outtmpl_dict
.get(tmpl_type
, self
.outtmpl_dict
['default'])
987 outtmpl
, template_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, sanitize
)
989 # expand_path translates '%%' into '%' and '$$' into '$'
990 # correspondingly that is not what we want since we need to keep
991 # '%%' intact for template dict substitution step. Working around
992 # with boundary-alike separator hack.
993 sep
= ''.join([random
.choice(ascii_letters
) for _
in range(32)])
994 outtmpl
= outtmpl
.replace('%%', '%{0}%'.format(sep
)).replace('$$', '${0}$'.format(sep
))
996 # outtmpl should be expand_path'ed before template dict substitution
997 # because meta fields may contain env variables we don't want to
998 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
999 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1000 filename
= expand_path(outtmpl
).replace(sep
, '') % template_dict
1002 force_ext
= OUTTMPL_TYPES
.get(tmpl_type
)
1003 if force_ext
is not None:
1004 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1006 # https://github.com/blackjack4494/youtube-dlc/issues/85
1007 trim_file_name
= self
.params
.get('trim_file_name', False)
1009 fn_groups
= filename
.rsplit('.')
1012 if len(fn_groups
) > 2:
1013 sub_ext
= fn_groups
[-2]
1014 filename
= '.'.join(filter(None, [fn_groups
[0][:trim_file_name
], sub_ext
, ext
]))
1017 except ValueError as err
:
1018 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1021 def prepare_filename(self
, info_dict
, dir_type
='', warn
=False):
1022 """Generate the output filename."""
1024 filename
= self
._prepare
_filename
(info_dict
, dir_type
or 'default')
1027 if not self
.params
.get('paths'):
1029 elif filename
== '-':
1030 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1031 elif os
.path
.isabs(filename
):
1032 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1033 self
.__prepare
_filename
_warned
= True
1034 if filename
== '-' or not filename
:
1037 return self
.get_output_path(dir_type
, filename
)
1039 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1040 """ Returns None if the file should be downloaded """
1042 video_title
= info_dict
.get('title', info_dict
.get('id', 'video'))
1045 if 'title' in info_dict
:
1046 # This can happen when we're just evaluating the playlist
1047 title
= info_dict
['title']
1048 matchtitle
= self
.params
.get('matchtitle', False)
1050 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1051 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1052 rejecttitle
= self
.params
.get('rejecttitle', False)
1054 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1055 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1056 date
= info_dict
.get('upload_date')
1057 if date
is not None:
1058 dateRange
= self
.params
.get('daterange', DateRange())
1059 if date
not in dateRange
:
1060 return '%s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
1061 view_count
= info_dict
.get('view_count')
1062 if view_count
is not None:
1063 min_views
= self
.params
.get('min_views')
1064 if min_views
is not None and view_count
< min_views
:
1065 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1066 max_views
= self
.params
.get('max_views')
1067 if max_views
is not None and view_count
> max_views
:
1068 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1069 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1070 return 'Skipping "%s" because it is age restricted' % video_title
1073 match_filter
= self
.params
.get('match_filter')
1074 if match_filter
is not None:
1075 ret
= match_filter(info_dict
)
1080 if self
.in_download_archive(info_dict
):
1081 reason
= '%s has already been recorded in the archive' % video_title
1082 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1084 reason
= check_filter()
1085 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1086 if reason
is not None:
1088 self
.to_screen('[download] ' + reason
)
1089 if self
.params
.get(break_opt
, False):
1094 def add_extra_info(info_dict
, extra_info
):
1095 '''Set the keys from extra_info in info dict if they are missing'''
1096 for key
, value
in extra_info
.items():
1097 info_dict
.setdefault(key
, value
)
1099 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
={},
1100 process
=True, force_generic_extractor
=False):
1102 Return a list with a dictionary for each video extracted.
1105 url -- URL to extract
1108 download -- whether to download videos during extraction
1109 ie_key -- extractor key hint
1110 extra_info -- dictionary containing the extra values to add to each result
1111 process -- whether to resolve all unresolved references (URLs, playlist items),
1112 must be True for download to work.
1113 force_generic_extractor -- force using the generic extractor
1116 if not ie_key
and force_generic_extractor
:
1120 ies
= [self
.get_info_extractor(ie_key
)]
1125 if not ie
.suitable(url
):
1128 ie_key
= ie
.ie_key()
1129 ie
= self
.get_info_extractor(ie_key
)
1130 if not ie
.working():
1131 self
.report_warning('The program functionality for this site has been marked as broken, '
1132 'and will probably not work.')
1135 temp_id
= str_or_none(
1136 ie
.extract_id(url
) if callable(getattr(ie
, 'extract_id', None))
1137 else ie
._match
_id
(url
))
1138 except (AssertionError, IndexError, AttributeError):
1140 if temp_id
is not None and self
.in_download_archive({'id': temp_id, 'ie_key': ie_key}
):
1141 self
.to_screen("[%s] %s: has already been recorded in archive" % (
1144 return self
.__extract
_info
(url
, ie
, download
, extra_info
, process
)
1146 self
.report_error('no suitable InfoExtractor for URL %s' % url
)
1148 def __handle_extraction_exceptions(func
, handle_all_errors
=True):
1149 def wrapper(self
, *args
, **kwargs
):
1151 return func(self
, *args
, **kwargs
)
1152 except GeoRestrictedError
as e
:
1155 msg
+= '\nThis video is available in %s.' % ', '.join(
1156 map(ISO3166Utils
.short2full
, e
.countries
))
1157 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1158 self
.report_error(msg
)
1159 except ExtractorError
as e
: # An error we somewhat expected
1160 self
.report_error(compat_str(e
), e
.format_traceback())
1161 except ThrottledDownload
:
1162 self
.to_stderr('\r')
1163 self
.report_warning('The download speed is below throttle limit. Re-extracting data')
1164 return wrapper(self
, *args
, **kwargs
)
1165 except (MaxDownloadsReached
, ExistingVideoReached
, RejectedVideoReached
):
1167 except Exception as e
:
1168 if handle_all_errors
and self
.params
.get('ignoreerrors', False):
1169 self
.report_error(error_to_compat_str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1174 @__handle_extraction_exceptions
1175 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1176 ie_result
= ie
.extract(url
)
1177 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1179 if isinstance(ie_result
, list):
1180 # Backwards compatibility: old IE result format
1182 '_type': 'compat_list',
1183 'entries': ie_result
,
1185 if extra_info
.get('original_url'):
1186 ie_result
.setdefault('original_url', extra_info
['original_url'])
1187 self
.add_default_extra_info(ie_result
, ie
, url
)
1189 return self
.process_ie_result(ie_result
, download
, extra_info
)
1193 def add_default_extra_info(self
, ie_result
, ie
, url
):
1195 self
.add_extra_info(ie_result
, {
1197 'original_url': url
,
1198 'webpage_url_basename': url_basename(url
),
1201 self
.add_extra_info(ie_result
, {
1202 'extractor': ie
.IE_NAME
,
1203 'extractor_key': ie
.ie_key(),
1206 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
1208 Take the result of the ie(may be modified) and resolve all unresolved
1209 references (URLs, playlist items).
1211 It will also download the videos if 'download'.
1212 Returns the resolved ie_result.
1214 result_type
= ie_result
.get('_type', 'video')
1216 if result_type
in ('url', 'url_transparent'):
1217 ie_result
['url'] = sanitize_url(ie_result
['url'])
1218 if ie_result
.get('original_url'):
1219 extra_info
.setdefault('original_url', ie_result
['original_url'])
1221 extract_flat
= self
.params
.get('extract_flat', False)
1222 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1223 or extract_flat
is True):
1224 info_copy
= ie_result
.copy()
1225 self
.add_extra_info(info_copy
, extra_info
)
1226 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1227 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1228 self
.__forced
_printings
(info_copy
, self
.prepare_filename(info_copy
), incomplete
=True)
1231 if result_type
== 'video':
1232 self
.add_extra_info(ie_result
, extra_info
)
1233 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1234 additional_urls
= (ie_result
or {}).get('additional_urls')
1236 # TODO: Improve MetadataFromFieldPP to allow setting a list
1237 if isinstance(additional_urls
, compat_str
):
1238 additional_urls
= [additional_urls
]
1240 '[info] %s: %d additional URL(s) requested' % (ie_result
['id'], len(additional_urls
)))
1241 self
.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls
))
1242 ie_result
['additional_entries'] = [
1244 url
, download
, extra_info
,
1245 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1246 for url
in additional_urls
1249 elif result_type
== 'url':
1250 # We have to add extra_info to the results because it may be
1251 # contained in a playlist
1252 return self
.extract_info(
1253 ie_result
['url'], download
,
1254 ie_key
=ie_result
.get('ie_key'),
1255 extra_info
=extra_info
)
1256 elif result_type
== 'url_transparent':
1257 # Use the information from the embedding page
1258 info
= self
.extract_info(
1259 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1260 extra_info
=extra_info
, download
=False, process
=False)
1262 # extract_info may return None when ignoreerrors is enabled and
1263 # extraction failed with an error, don't crash and return early
1268 force_properties
= dict(
1269 (k
, v
) for k
, v
in ie_result
.items() if v
is not None)
1270 for f
in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1271 if f
in force_properties
:
1272 del force_properties
[f
]
1273 new_result
= info
.copy()
1274 new_result
.update(force_properties
)
1276 # Extracted info may not be a video result (i.e.
1277 # info.get('_type', 'video') != video) but rather an url or
1278 # url_transparent. In such cases outer metadata (from ie_result)
1279 # should be propagated to inner one (info). For this to happen
1280 # _type of info should be overridden with url_transparent. This
1281 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1282 if new_result
.get('_type') == 'url':
1283 new_result
['_type'] = 'url_transparent'
1285 return self
.process_ie_result(
1286 new_result
, download
=download
, extra_info
=extra_info
)
1287 elif result_type
in ('playlist', 'multi_video'):
1288 # Protect from infinite recursion due to recursively nested playlists
1289 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1290 webpage_url
= ie_result
['webpage_url']
1291 if webpage_url
in self
._playlist
_urls
:
1293 '[download] Skipping already downloaded playlist: %s'
1294 % ie_result
.get('title') or ie_result
.get('id'))
1297 self
._playlist
_level
+= 1
1298 self
._playlist
_urls
.add(webpage_url
)
1299 self
._sanitize
_thumbnails
(ie_result
)
1301 return self
.__process
_playlist
(ie_result
, download
)
1303 self
._playlist
_level
-= 1
1304 if not self
._playlist
_level
:
1305 self
._playlist
_urls
.clear()
1306 elif result_type
== 'compat_list':
1307 self
.report_warning(
1308 'Extractor %s returned a compat_list result. '
1309 'It needs to be updated.' % ie_result
.get('extractor'))
1312 self
.add_extra_info(
1315 'extractor': ie_result
['extractor'],
1316 'webpage_url': ie_result
['webpage_url'],
1317 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1318 'extractor_key': ie_result
['extractor_key'],
1322 ie_result
['entries'] = [
1323 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1324 for r
in ie_result
['entries']
1328 raise Exception('Invalid result type: %s' % result_type
)
1330 def _ensure_dir_exists(self
, path
):
1331 return make_dir(path
, self
.report_error
)
1333 def __process_playlist(self
, ie_result
, download
):
1334 # We process each entry in the playlist
1335 playlist
= ie_result
.get('title') or ie_result
.get('id')
1336 self
.to_screen('[download] Downloading playlist: %s' % playlist
)
1338 if 'entries' not in ie_result
:
1339 raise EntryNotInPlaylist()
1340 incomplete_entries
= bool(ie_result
.get('requested_entries'))
1341 if incomplete_entries
:
1342 def fill_missing_entries(entries
, indexes
):
1343 ret
= [None] * max(*indexes
)
1344 for i
, entry
in zip(indexes
, entries
):
1347 ie_result
['entries'] = fill_missing_entries(ie_result
['entries'], ie_result
['requested_entries'])
1349 playlist_results
= []
1351 playliststart
= self
.params
.get('playliststart', 1)
1352 playlistend
= self
.params
.get('playlistend')
1353 # For backwards compatibility, interpret -1 as whole list
1354 if playlistend
== -1:
1357 playlistitems_str
= self
.params
.get('playlist_items')
1358 playlistitems
= None
1359 if playlistitems_str
is not None:
1360 def iter_playlistitems(format
):
1361 for string_segment
in format
.split(','):
1362 if '-' in string_segment
:
1363 start
, end
= string_segment
.split('-')
1364 for item
in range(int(start
), int(end
) + 1):
1367 yield int(string_segment
)
1368 playlistitems
= orderedSet(iter_playlistitems(playlistitems_str
))
1370 ie_entries
= ie_result
['entries']
1372 'Downloading %d videos' if not isinstance(ie_entries
, list)
1373 else 'Collected %d videos; downloading %%d of them' % len(ie_entries
))
1374 if not isinstance(ie_entries
, (list, PagedList
)):
1375 ie_entries
= LazyList(ie_entries
)
1378 return YoutubeDL
.__handle
_extraction
_exceptions
(
1379 lambda self
, i
: ie_entries
[i
- 1],
1384 for i
in playlistitems
or itertools
.count(playliststart
):
1385 if playlistitems
is None and playlistend
is not None and playlistend
< i
:
1389 entry
= get_entry(i
)
1391 raise EntryNotInPlaylist()
1392 except (IndexError, EntryNotInPlaylist
):
1393 if incomplete_entries
:
1394 raise EntryNotInPlaylist()
1395 elif not playlistitems
:
1397 entries
.append(entry
)
1399 if entry
is not None:
1400 self
._match
_entry
(entry
, incomplete
=True, silent
=True)
1401 except (ExistingVideoReached
, RejectedVideoReached
):
1403 ie_result
['entries'] = entries
1405 # Save playlist_index before re-ordering
1407 ((playlistitems
[i
- 1] if playlistitems
else i
), entry
)
1408 for i
, entry
in enumerate(entries
, 1)
1409 if entry
is not None]
1410 n_entries
= len(entries
)
1412 if not playlistitems
and (playliststart
or playlistend
):
1413 playlistitems
= list(range(playliststart
, playliststart
+ n_entries
))
1414 ie_result
['requested_entries'] = playlistitems
1416 if self
.params
.get('allow_playlist_files', True):
1418 'playlist': playlist
,
1419 'playlist_id': ie_result
.get('id'),
1420 'playlist_title': ie_result
.get('title'),
1421 'playlist_uploader': ie_result
.get('uploader'),
1422 'playlist_uploader_id': ie_result
.get('uploader_id'),
1423 'playlist_index': 0,
1425 ie_copy
.update(dict(ie_result
))
1427 if self
.params
.get('writeinfojson', False):
1428 infofn
= self
.prepare_filename(ie_copy
, 'pl_infojson')
1429 if not self
._ensure
_dir
_exists
(encodeFilename(infofn
)):
1431 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
1432 self
.to_screen('[info] Playlist metadata is already present')
1434 self
.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn
)
1436 write_json_file(self
.filter_requested_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
1437 except (OSError, IOError):
1438 self
.report_error('Cannot write playlist metadata to JSON file ' + infofn
)
1440 # TODO: This should be passed to ThumbnailsConvertor if necessary
1441 self
._write
_thumbnails
(ie_copy
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
1443 if self
.params
.get('writedescription', False):
1444 descfn
= self
.prepare_filename(ie_copy
, 'pl_description')
1445 if not self
._ensure
_dir
_exists
(encodeFilename(descfn
)):
1447 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
1448 self
.to_screen('[info] Playlist description is already present')
1449 elif ie_result
.get('description') is None:
1450 self
.report_warning('There\'s no playlist description to write.')
1453 self
.to_screen('[info] Writing playlist description to: ' + descfn
)
1454 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
1455 descfile
.write(ie_result
['description'])
1456 except (OSError, IOError):
1457 self
.report_error('Cannot write playlist description file ' + descfn
)
1460 if self
.params
.get('playlistreverse', False):
1461 entries
= entries
[::-1]
1462 if self
.params
.get('playlistrandom', False):
1463 random
.shuffle(entries
)
1465 x_forwarded_for
= ie_result
.get('__x_forwarded_for_ip')
1467 self
.to_screen('[%s] playlist %s: %s' % (ie_result
['extractor'], playlist
, msg
% n_entries
))
1469 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
1470 for i
, entry_tuple
in enumerate(entries
, 1):
1471 playlist_index
, entry
= entry_tuple
1472 if 'playlist_index' in self
.params
.get('compat_options', []):
1473 playlist_index
= playlistitems
[i
- 1] if playlistitems
else i
1474 self
.to_screen('[download] Downloading video %s of %s' % (i
, n_entries
))
1475 # This __x_forwarded_for_ip thing is a bit ugly but requires
1478 entry
['__x_forwarded_for_ip'] = x_forwarded_for
1480 'n_entries': n_entries
,
1481 '_last_playlist_index': max(playlistitems
) if playlistitems
else (playlistend
or n_entries
),
1482 'playlist_index': playlist_index
,
1483 'playlist_autonumber': i
,
1484 'playlist': playlist
,
1485 'playlist_id': ie_result
.get('id'),
1486 'playlist_title': ie_result
.get('title'),
1487 'playlist_uploader': ie_result
.get('uploader'),
1488 'playlist_uploader_id': ie_result
.get('uploader_id'),
1489 'extractor': ie_result
['extractor'],
1490 'webpage_url': ie_result
['webpage_url'],
1491 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1492 'extractor_key': ie_result
['extractor_key'],
1495 if self
._match
_entry
(entry
, incomplete
=True) is not None:
1498 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, extra
)
1499 if not entry_result
:
1501 if failures
>= max_failures
:
1503 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist
, failures
))
1505 # TODO: skip failed (empty) entries?
1506 playlist_results
.append(entry_result
)
1507 ie_result
['entries'] = playlist_results
1508 self
.to_screen('[download] Finished downloading playlist: %s' % playlist
)
1511 @__handle_extraction_exceptions
1512 def __process_iterable_entry(self
, entry
, download
, extra_info
):
1513 return self
.process_ie_result(
1514 entry
, download
=download
, extra_info
=extra_info
)
1516 def _build_format_filter(self
, filter_spec
):
1517 " Returns a function to filter the formats according to the filter_spec "
1527 operator_rex
= re
.compile(r
'''(?x)\s*
1528 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1529 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1530 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1531 ''' % '|'.join(map(re
.escape
, OPERATORS
.keys())))
1532 m
= operator_rex
.fullmatch(filter_spec
)
1535 comparison_value
= int(m
.group('value'))
1537 comparison_value
= parse_filesize(m
.group('value'))
1538 if comparison_value
is None:
1539 comparison_value
= parse_filesize(m
.group('value') + 'B')
1540 if comparison_value
is None:
1542 'Invalid value %r in format specification %r' % (
1543 m
.group('value'), filter_spec
))
1544 op
= OPERATORS
[m
.group('op')]
1549 '^=': lambda attr
, value
: attr
.startswith(value
),
1550 '$=': lambda attr
, value
: attr
.endswith(value
),
1551 '*=': lambda attr
, value
: value
in attr
,
1553 str_operator_rex
= re
.compile(r
'''(?x)\s*
1554 (?P<key>[a-zA-Z0-9._-]+)\s*
1555 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1556 (?P<value>[a-zA-Z0-9._-]+)\s*
1557 ''' % '|'.join(map(re
.escape
, STR_OPERATORS
.keys())))
1558 m
= str_operator_rex
.fullmatch(filter_spec
)
1560 comparison_value
= m
.group('value')
1561 str_op
= STR_OPERATORS
[m
.group('op')]
1562 if m
.group('negation'):
1563 op
= lambda attr
, value
: not str_op(attr
, value
)
1568 raise SyntaxError('Invalid filter specification %r' % filter_spec
)
1571 actual_value
= f
.get(m
.group('key'))
1572 if actual_value
is None:
1573 return m
.group('none_inclusive')
1574 return op(actual_value
, comparison_value
)
1577 def _default_format_spec(self
, info_dict
, download
=True):
1580 merger
= FFmpegMergerPP(self
)
1581 return merger
.available
and merger
.can_merge()
1584 not self
.params
.get('simulate', False)
1588 or info_dict
.get('is_live', False)
1589 or self
.outtmpl_dict
['default'] == '-'))
1592 or self
.params
.get('allow_multiple_audio_streams', False)
1593 or 'format-spec' in self
.params
.get('compat_opts', []))
1596 'best/bestvideo+bestaudio' if prefer_best
1597 else 'bestvideo*+bestaudio/best' if not compat
1598 else 'bestvideo+bestaudio/best')
1600 def build_format_selector(self
, format_spec
):
1601 def syntax_error(note
, start
):
1603 'Invalid format specification: '
1604 '{0}\n\t{1}\n\t{2}^'.format(note
, format_spec
, ' ' * start
[1]))
1605 return SyntaxError(message
)
1607 PICKFIRST
= 'PICKFIRST'
1611 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1613 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
1614 'video': self
.params
.get('allow_multiple_video_streams', False)}
1616 check_formats
= self
.params
.get('check_formats')
1618 def _parse_filter(tokens
):
1620 for type, string
, start
, _
, _
in tokens
:
1621 if type == tokenize
.OP
and string
== ']':
1622 return ''.join(filter_parts
)
1624 filter_parts
.append(string
)
1626 def _remove_unused_ops(tokens
):
1627 # Remove operators that we don't use and join them with the surrounding strings
1628 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1629 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
1630 last_string
, last_start
, last_end
, last_line
= None, None, None, None
1631 for type, string
, start
, end
, line
in tokens
:
1632 if type == tokenize
.OP
and string
== '[':
1634 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1636 yield type, string
, start
, end
, line
1637 # everything inside brackets will be handled by _parse_filter
1638 for type, string
, start
, end
, line
in tokens
:
1639 yield type, string
, start
, end
, line
1640 if type == tokenize
.OP
and string
== ']':
1642 elif type == tokenize
.OP
and string
in ALLOWED_OPS
:
1644 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1646 yield type, string
, start
, end
, line
1647 elif type in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
1649 last_string
= string
1653 last_string
+= string
1655 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
1657 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
1659 current_selector
= None
1660 for type, string
, start
, _
, _
in tokens
:
1661 # ENCODING is only defined in python 3.x
1662 if type == getattr(tokenize
, 'ENCODING', None):
1664 elif type in [tokenize
.NAME
, tokenize
.NUMBER
]:
1665 current_selector
= FormatSelector(SINGLE
, string
, [])
1666 elif type == tokenize
.OP
:
1668 if not inside_group
:
1669 # ')' will be handled by the parentheses group
1670 tokens
.restore_last_token()
1672 elif inside_merge
and string
in ['/', ',']:
1673 tokens
.restore_last_token()
1675 elif inside_choice
and string
== ',':
1676 tokens
.restore_last_token()
1679 if not current_selector
:
1680 raise syntax_error('"," must follow a format selector', start
)
1681 selectors
.append(current_selector
)
1682 current_selector
= None
1684 if not current_selector
:
1685 raise syntax_error('"/" must follow a format selector', start
)
1686 first_choice
= current_selector
1687 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
1688 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
1690 if not current_selector
:
1691 current_selector
= FormatSelector(SINGLE
, 'best', [])
1692 format_filter
= _parse_filter(tokens
)
1693 current_selector
.filters
.append(format_filter
)
1695 if current_selector
:
1696 raise syntax_error('Unexpected "("', start
)
1697 group
= _parse_format_selection(tokens
, inside_group
=True)
1698 current_selector
= FormatSelector(GROUP
, group
, [])
1700 if not current_selector
:
1701 raise syntax_error('Unexpected "+"', start
)
1702 selector_1
= current_selector
1703 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
1705 raise syntax_error('Expected a selector', start
)
1706 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
1708 raise syntax_error('Operator not recognized: "{0}"'.format(string
), start
)
1709 elif type == tokenize
.ENDMARKER
:
1711 if current_selector
:
1712 selectors
.append(current_selector
)
1715 def _merge(formats_pair
):
1716 format_1
, format_2
= formats_pair
1719 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
1720 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
1722 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
1723 get_no_more
= {'video': False, 'audio': False}
1724 for (i
, fmt_info
) in enumerate(formats_info
):
1725 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
1728 for aud_vid
in ['audio', 'video']:
1729 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
1730 if get_no_more
[aud_vid
]:
1732 get_no_more
[aud_vid
] = True
1734 if len(formats_info
) == 1:
1735 return formats_info
[0]
1737 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
1738 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
1740 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
1741 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
1743 output_ext
= self
.params
.get('merge_output_format')
1746 output_ext
= the_only_video
['ext']
1747 elif the_only_audio
and not video_fmts
:
1748 output_ext
= the_only_audio
['ext']
1753 'requested_formats': formats_info
,
1754 'format': '+'.join(fmt_info
.get('format') for fmt_info
in formats_info
),
1755 'format_id': '+'.join(fmt_info
.get('format_id') for fmt_info
in formats_info
),
1761 'width': the_only_video
.get('width'),
1762 'height': the_only_video
.get('height'),
1763 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
1764 'fps': the_only_video
.get('fps'),
1765 'vcodec': the_only_video
.get('vcodec'),
1766 'vbr': the_only_video
.get('vbr'),
1767 'stretched_ratio': the_only_video
.get('stretched_ratio'),
1772 'acodec': the_only_audio
.get('acodec'),
1773 'abr': the_only_audio
.get('abr'),
1778 def _check_formats(formats
):
1779 if not check_formats
:
1783 self
.to_screen('[info] Testing format %s' % f
['format_id'])
1784 temp_file
= tempfile
.NamedTemporaryFile(
1785 suffix
='.tmp', delete
=False,
1786 dir=self
.get_output_path('temp') or None)
1789 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
1790 except (DownloadError
, IOError, OSError, ValueError) + network_exceptions
:
1793 if os
.path
.exists(temp_file
.name
):
1795 os
.remove(temp_file
.name
)
1797 self
.report_warning('Unable to delete temporary file "%s"' % temp_file
.name
)
1801 self
.to_screen('[info] Unable to download format %s. Skipping...' % f
['format_id'])
1803 def _build_selector_function(selector
):
1804 if isinstance(selector
, list): # ,
1805 fs
= [_build_selector_function(s
) for s
in selector
]
1807 def selector_function(ctx
):
1810 return selector_function
1812 elif selector
.type == GROUP
: # ()
1813 selector_function
= _build_selector_function(selector
.selector
)
1815 elif selector
.type == PICKFIRST
: # /
1816 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
1818 def selector_function(ctx
):
1820 picked_formats
= list(f(ctx
))
1822 return picked_formats
1825 elif selector
.type == MERGE
: # +
1826 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
1828 def selector_function(ctx
):
1829 for pair
in itertools
.product(
1830 selector_1(copy
.deepcopy(ctx
)), selector_2(copy
.deepcopy(ctx
))):
1833 elif selector
.type == SINGLE
: # atom
1834 format_spec
= selector
.selector
or 'best'
1836 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1837 if format_spec
== 'all':
1838 def selector_function(ctx
):
1839 yield from _check_formats(ctx
['formats'])
1840 elif format_spec
== 'mergeall':
1841 def selector_function(ctx
):
1842 formats
= list(_check_formats(ctx
['formats']))
1845 merged_format
= formats
[-1]
1846 for f
in formats
[-2::-1]:
1847 merged_format
= _merge((merged_format
, f
))
1851 format_fallback
, format_reverse
, format_idx
= False, True, 1
1853 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1855 if mobj
is not None:
1856 format_idx
= int_or_none(mobj
.group('n'), default
=1)
1857 format_reverse
= mobj
.group('bw')[0] == 'b'
1858 format_type
= (mobj
.group('type') or [None])[0]
1859 not_format_type
= {'v': 'a', 'a': 'v'}
.get(format_type
)
1860 format_modified
= mobj
.group('mod') is not None
1862 format_fallback
= not format_type
and not format_modified
# for b, w
1864 (lambda f
: f
.get('%scodec' % format_type
) != 'none')
1865 if format_type
and format_modified
# bv*, ba*, wv*, wa*
1866 else (lambda f
: f
.get('%scodec' % not_format_type
) == 'none')
1867 if format_type
# bv, ba, wv, wa
1868 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
1869 if not format_modified
# b, w
1870 else lambda f
: True) # b*, w*
1871 filter_f
= lambda f
: _filter_f(f
) and (
1872 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
1874 filter_f
= ((lambda f
: f
.get('ext') == format_spec
)
1875 if format_spec
in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1876 else (lambda f
: f
.get('format_id') == format_spec
)) # id
1878 def selector_function(ctx
):
1879 formats
= list(ctx
['formats'])
1880 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
1881 if format_fallback
and ctx
['incomplete_formats'] and not matches
:
1882 # for extractors with incomplete formats (audio only (soundcloud)
1883 # or video only (imgur)) best/worst will fallback to
1884 # best/worst {video,audio}-only format
1886 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
1888 yield matches
[format_idx
- 1]
1892 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
1894 def final_selector(ctx
):
1895 ctx_copy
= copy
.deepcopy(ctx
)
1896 for _filter
in filters
:
1897 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
1898 return selector_function(ctx_copy
)
1899 return final_selector
1901 stream
= io
.BytesIO(format_spec
.encode('utf-8'))
1903 tokens
= list(_remove_unused_ops(compat_tokenize_tokenize(stream
.readline
)))
1904 except tokenize
.TokenError
:
1905 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
1907 class TokenIterator(object):
1908 def __init__(self
, tokens
):
1909 self
.tokens
= tokens
1916 if self
.counter
>= len(self
.tokens
):
1917 raise StopIteration()
1918 value
= self
.tokens
[self
.counter
]
1924 def restore_last_token(self
):
1927 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
1928 return _build_selector_function(parsed_selector
)
1930 def _calc_headers(self
, info_dict
):
1931 res
= std_headers
.copy()
1933 add_headers
= info_dict
.get('http_headers')
1935 res
.update(add_headers
)
1937 cookies
= self
._calc
_cookies
(info_dict
)
1939 res
['Cookie'] = cookies
1941 if 'X-Forwarded-For' not in res
:
1942 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
1943 if x_forwarded_for_ip
:
1944 res
['X-Forwarded-For'] = x_forwarded_for_ip
1948 def _calc_cookies(self
, info_dict
):
1949 pr
= sanitized_Request(info_dict
['url'])
1950 self
.cookiejar
.add_cookie_header(pr
)
1951 return pr
.get_header('Cookie')
1953 def _sanitize_thumbnails(self
, info_dict
):
1954 thumbnails
= info_dict
.get('thumbnails')
1955 if thumbnails
is None:
1956 thumbnail
= info_dict
.get('thumbnail')
1958 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail}
]
1960 thumbnails
.sort(key
=lambda t
: (
1961 t
.get('preference') if t
.get('preference') is not None else -1,
1962 t
.get('width') if t
.get('width') is not None else -1,
1963 t
.get('height') if t
.get('height') is not None else -1,
1964 t
.get('id') if t
.get('id') is not None else '',
1967 def thumbnail_tester():
1968 if self
.params
.get('check_formats'):
1970 to_screen
= lambda msg
: self
.to_screen(f
'[info] {msg}')
1973 to_screen
= self
.write_debug
1975 def test_thumbnail(t
):
1976 if not test_all
and not t
.get('_test_url'):
1978 to_screen('Testing thumbnail %s' % t
['id'])
1980 self
.urlopen(HEADRequest(t
['url']))
1981 except network_exceptions
as err
:
1982 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
1983 t
['id'], t
['url'], error_to_compat_str(err
)))
1987 return test_thumbnail
1989 for i
, t
in enumerate(thumbnails
):
1990 if t
.get('id') is None:
1992 if t
.get('width') and t
.get('height'):
1993 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
1994 t
['url'] = sanitize_url(t
['url'])
1996 if self
.params
.get('check_formats') is not False:
1997 info_dict
['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails
[::-1])).reverse()
1999 info_dict
['thumbnails'] = thumbnails
2001 def process_video_result(self
, info_dict
, download
=True):
2002 assert info_dict
.get('_type', 'video') == 'video'
2004 if 'id' not in info_dict
:
2005 raise ExtractorError('Missing "id" field in extractor result')
2006 if 'title' not in info_dict
:
2007 raise ExtractorError('Missing "title" field in extractor result')
2009 def report_force_conversion(field
, field_not
, conversion
):
2010 self
.report_warning(
2011 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2012 % (field
, field_not
, conversion
))
2014 def sanitize_string_field(info
, string_field
):
2015 field
= info
.get(string_field
)
2016 if field
is None or isinstance(field
, compat_str
):
2018 report_force_conversion(string_field
, 'a string', 'string')
2019 info
[string_field
] = compat_str(field
)
2021 def sanitize_numeric_fields(info
):
2022 for numeric_field
in self
._NUMERIC
_FIELDS
:
2023 field
= info
.get(numeric_field
)
2024 if field
is None or isinstance(field
, compat_numeric_types
):
2026 report_force_conversion(numeric_field
, 'numeric', 'int')
2027 info
[numeric_field
] = int_or_none(field
)
2029 sanitize_string_field(info_dict
, 'id')
2030 sanitize_numeric_fields(info_dict
)
2032 if 'playlist' not in info_dict
:
2033 # It isn't part of a playlist
2034 info_dict
['playlist'] = None
2035 info_dict
['playlist_index'] = None
2037 self
._sanitize
_thumbnails
(info_dict
)
2039 thumbnail
= info_dict
.get('thumbnail')
2040 thumbnails
= info_dict
.get('thumbnails')
2042 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2044 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2046 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2047 info_dict
['display_id'] = info_dict
['id']
2049 for ts_key
, date_key
in (
2050 ('timestamp', 'upload_date'),
2051 ('release_timestamp', 'release_date'),
2053 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2054 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2055 # see http://bugs.python.org/issue1646728)
2057 upload_date
= datetime
.datetime
.utcfromtimestamp(info_dict
[ts_key
])
2058 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2059 except (ValueError, OverflowError, OSError):
2062 live_keys
= ('is_live', 'was_live')
2063 live_status
= info_dict
.get('live_status')
2064 if live_status
is None:
2065 for key
in live_keys
:
2066 if info_dict
.get(key
) is False:
2068 if info_dict
.get(key
):
2071 if all(info_dict
.get(key
) is False for key
in live_keys
):
2072 live_status
= 'not_live'
2074 info_dict
['live_status'] = live_status
2075 for key
in live_keys
:
2076 if info_dict
.get(key
) is None:
2077 info_dict
[key
] = (live_status
== key
)
2079 # Auto generate title fields corresponding to the *_number fields when missing
2080 # in order to always have clean titles. This is very common for TV series.
2081 for field
in ('chapter', 'season', 'episode'):
2082 if info_dict
.get('%s_number' % field
) is not None and not info_dict
.get(field
):
2083 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
['%s_number' % field
])
2085 for cc_kind
in ('subtitles', 'automatic_captions'):
2086 cc
= info_dict
.get(cc_kind
)
2088 for _
, subtitle
in cc
.items():
2089 for subtitle_format
in subtitle
:
2090 if subtitle_format
.get('url'):
2091 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2092 if subtitle_format
.get('ext') is None:
2093 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2095 automatic_captions
= info_dict
.get('automatic_captions')
2096 subtitles
= info_dict
.get('subtitles')
2098 info_dict
['requested_subtitles'] = self
.process_subtitles(
2099 info_dict
['id'], subtitles
, automatic_captions
)
2101 # We now pick which formats have to be downloaded
2102 if info_dict
.get('formats') is None:
2103 # There's only one format available
2104 formats
= [info_dict
]
2106 formats
= info_dict
['formats']
2109 if not self
.params
.get('ignore_no_formats_error'):
2110 raise ExtractorError('No video formats found!')
2112 self
.report_warning('No video formats found!')
2114 def is_wellformed(f
):
2117 self
.report_warning(
2118 '"url" field is missing or empty - skipping format, '
2119 'there is an error in extractor')
2121 if isinstance(url
, bytes):
2122 sanitize_string_field(f
, 'url')
2125 # Filter out malformed formats for better extraction robustness
2126 formats
= list(filter(is_wellformed
, formats
))
2130 # We check that all the formats have the format and format_id fields
2131 for i
, format
in enumerate(formats
):
2132 sanitize_string_field(format
, 'format_id')
2133 sanitize_numeric_fields(format
)
2134 format
['url'] = sanitize_url(format
['url'])
2135 if not format
.get('format_id'):
2136 format
['format_id'] = compat_str(i
)
2138 # Sanitize format_id from characters used in format selector expression
2139 format
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', format
['format_id'])
2140 format_id
= format
['format_id']
2141 if format_id
not in formats_dict
:
2142 formats_dict
[format_id
] = []
2143 formats_dict
[format_id
].append(format
)
2145 # Make sure all formats have unique format_id
2146 for format_id
, ambiguous_formats
in formats_dict
.items():
2147 if len(ambiguous_formats
) > 1:
2148 for i
, format
in enumerate(ambiguous_formats
):
2149 format
['format_id'] = '%s-%d' % (format_id
, i
)
2151 for i
, format
in enumerate(formats
):
2152 if format
.get('format') is None:
2153 format
['format'] = '{id} - {res}{note}'.format(
2154 id=format
['format_id'],
2155 res
=self
.format_resolution(format
),
2156 note
=' ({0})'.format(format
['format_note']) if format
.get('format_note') is not None else '',
2158 # Automatically determine file extension if missing
2159 if format
.get('ext') is None:
2160 format
['ext'] = determine_ext(format
['url']).lower()
2161 # Automatically determine protocol if missing (useful for format
2162 # selection purposes)
2163 if format
.get('protocol') is None:
2164 format
['protocol'] = determine_protocol(format
)
2165 # Add HTTP headers, so that external programs can use them from the
2167 full_format_info
= info_dict
.copy()
2168 full_format_info
.update(format
)
2169 format
['http_headers'] = self
._calc
_headers
(full_format_info
)
2170 # Remove private housekeeping stuff
2171 if '__x_forwarded_for_ip' in info_dict
:
2172 del info_dict
['__x_forwarded_for_ip']
2174 # TODO Central sorting goes here
2176 if formats
and formats
[0] is not info_dict
:
2177 # only set the 'formats' fields if the original info_dict list them
2178 # otherwise we end up with a circular reference, the first (and unique)
2179 # element in the 'formats' field in info_dict is info_dict itself,
2180 # which can't be exported to json
2181 info_dict
['formats'] = formats
2183 info_dict
, _
= self
.pre_process(info_dict
)
2185 list_only
= self
.params
.get('list_thumbnails') or self
.params
.get('listformats') or self
.params
.get('listsubtitles')
2187 self
.__forced
_printings
(info_dict
, self
.prepare_filename(info_dict
), incomplete
=True)
2188 if self
.params
.get('list_thumbnails'):
2189 self
.list_thumbnails(info_dict
)
2190 if self
.params
.get('listformats'):
2191 if not info_dict
.get('formats'):
2192 raise ExtractorError('No video formats found', expected
=True)
2193 self
.list_formats(info_dict
)
2194 if self
.params
.get('listsubtitles'):
2195 if 'automatic_captions' in info_dict
:
2196 self
.list_subtitles(
2197 info_dict
['id'], automatic_captions
, 'automatic captions')
2198 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2201 format_selector
= self
.format_selector
2202 if format_selector
is None:
2203 req_format
= self
._default
_format
_spec
(info_dict
, download
=download
)
2204 self
.write_debug('Default format spec: %s' % req_format
)
2205 format_selector
= self
.build_format_selector(req_format
)
2207 # While in format selection we may need to have an access to the original
2208 # format set in order to calculate some metrics or do some processing.
2209 # For now we need to be able to guess whether original formats provided
2210 # by extractor are incomplete or not (i.e. whether extractor provides only
2211 # video-only or audio-only formats) for proper formats selection for
2212 # extractors with such incomplete formats (see
2213 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2214 # Since formats may be filtered during format selection and may not match
2215 # the original formats the results may be incorrect. Thus original formats
2216 # or pre-calculated metrics should be passed to format selection routines
2218 # We will pass a context object containing all necessary additional data
2219 # instead of just formats.
2220 # This fixes incorrect format selection issue (see
2221 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2222 incomplete_formats
= (
2223 # All formats are video-only or
2224 all(f
.get('vcodec') != 'none' and f
.get('acodec') == 'none' for f
in formats
)
2225 # all formats are audio-only
2226 or all(f
.get('vcodec') == 'none' and f
.get('acodec') != 'none' for f
in formats
))
2230 'incomplete_formats': incomplete_formats
,
2233 formats_to_download
= list(format_selector(ctx
))
2234 if not formats_to_download
:
2235 if not self
.params
.get('ignore_no_formats_error'):
2236 raise ExtractorError('Requested format is not available', expected
=True)
2238 self
.report_warning('Requested format is not available')
2239 # Process what we can, even without any available formats.
2240 self
.process_info(dict(info_dict
))
2243 '[info] %s: Downloading %d format(s): %s' % (
2244 info_dict
['id'], len(formats_to_download
),
2245 ", ".join([f
['format_id'] for f
in formats_to_download
])))
2246 for fmt
in formats_to_download
:
2247 new_info
= dict(info_dict
)
2248 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2249 new_info
['__original_infodict'] = info_dict
2250 new_info
.update(fmt
)
2251 self
.process_info(new_info
)
2252 # We update the info dict with the best quality format (backwards compatibility)
2253 if formats_to_download
:
2254 info_dict
.update(formats_to_download
[-1])
2257 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
2258 """Select the requested subtitles and their format"""
2260 if normal_subtitles
and self
.params
.get('writesubtitles'):
2261 available_subs
.update(normal_subtitles
)
2262 if automatic_captions
and self
.params
.get('writeautomaticsub'):
2263 for lang
, cap_info
in automatic_captions
.items():
2264 if lang
not in available_subs
:
2265 available_subs
[lang
] = cap_info
2267 if (not self
.params
.get('writesubtitles') and not
2268 self
.params
.get('writeautomaticsub') or not
2272 all_sub_langs
= available_subs
.keys()
2273 if self
.params
.get('allsubtitles', False):
2274 requested_langs
= all_sub_langs
2275 elif self
.params
.get('subtitleslangs', False):
2276 requested_langs
= set()
2277 for lang
in self
.params
.get('subtitleslangs'):
2279 requested_langs
.update(all_sub_langs
)
2281 discard
= lang
[0] == '-'
2284 current_langs
= filter(re
.compile(lang
+ '$').match
, all_sub_langs
)
2286 for lang
in current_langs
:
2287 requested_langs
.discard(lang
)
2289 requested_langs
.update(current_langs
)
2290 elif 'en' in available_subs
:
2291 requested_langs
= ['en']
2293 requested_langs
= [list(all_sub_langs
)[0]]
2294 self
.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs
))
2296 formats_query
= self
.params
.get('subtitlesformat', 'best')
2297 formats_preference
= formats_query
.split('/') if formats_query
else []
2299 for lang
in requested_langs
:
2300 formats
= available_subs
.get(lang
)
2302 self
.report_warning('%s subtitles not available for %s' % (lang
, video_id
))
2304 for ext
in formats_preference
:
2308 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
2314 self
.report_warning(
2315 'No subtitle format found matching "%s" for language %s, '
2316 'using %s' % (formats_query
, lang
, f
['ext']))
2320 def __forced_printings(self
, info_dict
, filename
, incomplete
):
2321 def print_mandatory(field
, actual_field
=None):
2322 if actual_field
is None:
2323 actual_field
= field
2324 if (self
.params
.get('force%s' % field
, False)
2325 and (not incomplete
or info_dict
.get(actual_field
) is not None)):
2326 self
.to_stdout(info_dict
[actual_field
])
2328 def print_optional(field
):
2329 if (self
.params
.get('force%s' % field
, False)
2330 and info_dict
.get(field
) is not None):
2331 self
.to_stdout(info_dict
[field
])
2333 info_dict
= info_dict
.copy()
2334 if filename
is not None:
2335 info_dict
['filename'] = filename
2336 if info_dict
.get('requested_formats') is not None:
2337 # For RTMP URLs, also include the playpath
2338 info_dict
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
2339 elif 'url' in info_dict
:
2340 info_dict
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
2342 for tmpl
in self
.params
.get('forceprint', []):
2343 if re
.match(r
'\w+$', tmpl
):
2344 tmpl
= '%({})s'.format(tmpl
)
2345 tmpl
, info_copy
= self
.prepare_outtmpl(tmpl
, info_dict
)
2346 self
.to_stdout(tmpl
% info_copy
)
2348 print_mandatory('title')
2349 print_mandatory('id')
2350 print_mandatory('url', 'urls')
2351 print_optional('thumbnail')
2352 print_optional('description')
2353 print_optional('filename')
2354 if self
.params
.get('forceduration', False) and info_dict
.get('duration') is not None:
2355 self
.to_stdout(formatSeconds(info_dict
['duration']))
2356 print_mandatory('format')
2358 if self
.params
.get('forcejson', False):
2359 self
.post_extract(info_dict
)
2360 self
.to_stdout(json
.dumps(info_dict
, default
=repr))
2362 def dl(self
, name
, info
, subtitle
=False, test
=False):
2365 verbose
= self
.params
.get('verbose')
2368 'quiet': not verbose
,
2370 'noprogress': not verbose
,
2372 'skip_unavailable_fragments': False,
2373 'keep_fragments': False,
2375 '_no_ytdl_file': True,
2378 params
= self
.params
2379 fd
= get_suitable_downloader(info
, params
)(self
, params
)
2381 for ph
in self
._progress
_hooks
:
2382 fd
.add_progress_hook(ph
)
2383 urls
= '", "'.join([f
['url'] for f
in info
.get('requested_formats', [])] or [info
['url']])
2384 self
.write_debug('Invoking downloader on "%s"' % urls
)
2385 new_info
= dict(info
)
2386 if new_info
.get('http_headers') is None:
2387 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
2388 return fd
.download(name
, new_info
, subtitle
)
2390 def process_info(self
, info_dict
):
2391 """Process a single resolved IE result."""
2393 assert info_dict
.get('_type', 'video') == 'video'
2395 info_dict
.setdefault('__postprocessors', [])
2397 max_downloads
= self
.params
.get('max_downloads')
2398 if max_downloads
is not None:
2399 if self
._num
_downloads
>= int(max_downloads
):
2400 raise MaxDownloadsReached()
2402 # TODO: backward compatibility, to be removed
2403 info_dict
['fulltitle'] = info_dict
['title']
2405 if 'format' not in info_dict
and 'ext' in info_dict
:
2406 info_dict
['format'] = info_dict
['ext']
2408 if self
._match
_entry
(info_dict
) is not None:
2411 self
.post_extract(info_dict
)
2412 self
._num
_downloads
+= 1
2414 # info_dict['_filename'] needs to be set for backward compatibility
2415 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
2416 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
2420 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
2422 if self
.params
.get('simulate', False):
2423 if self
.params
.get('force_write_download_archive', False):
2424 self
.record_download_archive(info_dict
)
2426 # Do nothing else if in simulate mode
2429 if full_filename
is None:
2432 if not self
._ensure
_dir
_exists
(encodeFilename(full_filename
)):
2434 if not self
._ensure
_dir
_exists
(encodeFilename(temp_filename
)):
2437 if self
.params
.get('writedescription', False):
2438 descfn
= self
.prepare_filename(info_dict
, 'description')
2439 if not self
._ensure
_dir
_exists
(encodeFilename(descfn
)):
2441 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(descfn
)):
2442 self
.to_screen('[info] Video description is already present')
2443 elif info_dict
.get('description') is None:
2444 self
.report_warning('There\'s no description to write.')
2447 self
.to_screen('[info] Writing video description to: ' + descfn
)
2448 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
2449 descfile
.write(info_dict
['description'])
2450 except (OSError, IOError):
2451 self
.report_error('Cannot write description file ' + descfn
)
2454 if self
.params
.get('writeannotations', False):
2455 annofn
= self
.prepare_filename(info_dict
, 'annotation')
2456 if not self
._ensure
_dir
_exists
(encodeFilename(annofn
)):
2458 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(annofn
)):
2459 self
.to_screen('[info] Video annotations are already present')
2460 elif not info_dict
.get('annotations'):
2461 self
.report_warning('There are no annotations to write.')
2464 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
2465 with io
.open(encodeFilename(annofn
), 'w', encoding
='utf-8') as annofile
:
2466 annofile
.write(info_dict
['annotations'])
2467 except (KeyError, TypeError):
2468 self
.report_warning('There are no annotations to write.')
2469 except (OSError, IOError):
2470 self
.report_error('Cannot write annotations file: ' + annofn
)
2473 subtitles_are_requested
= any([self
.params
.get('writesubtitles', False),
2474 self
.params
.get('writeautomaticsub')])
2476 if subtitles_are_requested
and info_dict
.get('requested_subtitles'):
2477 # subtitles download errors are already managed as troubles in relevant IE
2478 # that way it will silently go on when used with unsupporting IE
2479 subtitles
= info_dict
['requested_subtitles']
2480 # ie = self.get_info_extractor(info_dict['extractor_key'])
2481 for sub_lang
, sub_info
in subtitles
.items():
2482 sub_format
= sub_info
['ext']
2483 sub_filename
= subtitles_filename(temp_filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
2484 sub_filename_final
= subtitles_filename(
2485 self
.prepare_filename(info_dict
, 'subtitle'), sub_lang
, sub_format
, info_dict
.get('ext'))
2486 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(sub_filename
)):
2487 self
.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang
, sub_format
))
2488 sub_info
['filepath'] = sub_filename
2489 files_to_move
[sub_filename
] = sub_filename_final
2491 self
.to_screen('[info] Writing video subtitles to: ' + sub_filename
)
2492 if sub_info
.get('data') is not None:
2494 # Use newline='' to prevent conversion of newline characters
2495 # See https://github.com/ytdl-org/youtube-dl/issues/10268
2496 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8', newline
='') as subfile
:
2497 subfile
.write(sub_info
['data'])
2498 sub_info
['filepath'] = sub_filename
2499 files_to_move
[sub_filename
] = sub_filename_final
2500 except (OSError, IOError):
2501 self
.report_error('Cannot write subtitles file ' + sub_filename
)
2505 self
.dl(sub_filename
, sub_info
.copy(), subtitle
=True)
2506 sub_info
['filepath'] = sub_filename
2507 files_to_move
[sub_filename
] = sub_filename_final
2508 except (ExtractorError
, IOError, OSError, ValueError) + network_exceptions
as err
:
2509 self
.report_warning('Unable to download subtitle for "%s": %s' %
2510 (sub_lang
, error_to_compat_str(err
)))
2513 if self
.params
.get('writeinfojson', False):
2514 infofn
= self
.prepare_filename(info_dict
, 'infojson')
2515 if not self
._ensure
_dir
_exists
(encodeFilename(infofn
)):
2517 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(infofn
)):
2518 self
.to_screen('[info] Video metadata is already present')
2520 self
.to_screen('[info] Writing video metadata as JSON to: ' + infofn
)
2522 write_json_file(self
.filter_requested_info(info_dict
, self
.params
.get('clean_infojson', True)), infofn
)
2523 except (OSError, IOError):
2524 self
.report_error('Cannot write video metadata to JSON file ' + infofn
)
2526 info_dict
['__infojson_filename'] = infofn
2528 for thumb_ext
in self
._write
_thumbnails
(info_dict
, temp_filename
):
2529 thumb_filename_temp
= replace_extension(temp_filename
, thumb_ext
, info_dict
.get('ext'))
2530 thumb_filename
= replace_extension(
2531 self
.prepare_filename(info_dict
, 'thumbnail'), thumb_ext
, info_dict
.get('ext'))
2532 files_to_move
[thumb_filename_temp
] = thumb_filename
2534 # Write internet shortcut files
2535 url_link
= webloc_link
= desktop_link
= False
2536 if self
.params
.get('writelink', False):
2537 if sys
.platform
== "darwin": # macOS.
2539 elif sys
.platform
.startswith("linux"):
2541 else: # if sys.platform in ['win32', 'cygwin']:
2543 if self
.params
.get('writeurllink', False):
2545 if self
.params
.get('writewebloclink', False):
2547 if self
.params
.get('writedesktoplink', False):
2550 if url_link
or webloc_link
or desktop_link
:
2551 if 'webpage_url' not in info_dict
:
2552 self
.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2554 ascii_url
= iri_to_uri(info_dict
['webpage_url'])
2556 def _write_link_file(extension
, template
, newline
, embed_filename
):
2557 linkfn
= replace_extension(full_filename
, extension
, info_dict
.get('ext'))
2558 if self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(linkfn
)):
2559 self
.to_screen('[info] Internet shortcut is already present')
2562 self
.to_screen('[info] Writing internet shortcut to: ' + linkfn
)
2563 with io
.open(encodeFilename(to_high_limit_path(linkfn
)), 'w', encoding
='utf-8', newline
=newline
) as linkfile
:
2564 template_vars
= {'url': ascii_url}
2566 template_vars
['filename'] = linkfn
[:-(len(extension
) + 1)]
2567 linkfile
.write(template
% template_vars
)
2568 except (OSError, IOError):
2569 self
.report_error('Cannot write internet shortcut ' + linkfn
)
2574 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE
, '\r\n', embed_filename
=False):
2577 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE
, '\n', embed_filename
=False):
2580 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE
, '\n', embed_filename
=True):
2584 info_dict
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
2585 except PostProcessingError
as err
:
2586 self
.report_error('Preprocessing: %s' % str(err
))
2589 must_record_download_archive
= False
2590 if self
.params
.get('skip_download', False):
2591 info_dict
['filepath'] = temp_filename
2592 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2593 info_dict
['__files_to_move'] = files_to_move
2594 info_dict
= self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
)
2599 def existing_file(*filepaths
):
2600 ext
= info_dict
.get('ext')
2601 final_ext
= self
.params
.get('final_ext', ext
)
2603 for file in orderedSet(filepaths
):
2604 if final_ext
!= ext
:
2605 converted
= replace_extension(file, final_ext
, ext
)
2606 if os
.path
.exists(encodeFilename(converted
)):
2607 existing_files
.append(converted
)
2608 if os
.path
.exists(encodeFilename(file)):
2609 existing_files
.append(file)
2611 if not existing_files
or self
.params
.get('overwrites', False):
2612 for file in orderedSet(existing_files
):
2613 self
.report_file_delete(file)
2614 os
.remove(encodeFilename(file))
2617 self
.report_file_already_downloaded(existing_files
[0])
2618 info_dict
['ext'] = os
.path
.splitext(existing_files
[0])[1][1:]
2619 return existing_files
[0]
2622 if info_dict
.get('requested_formats') is not None:
2624 def compatible_formats(formats
):
2625 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2626 video_formats
= [format
for format
in formats
if format
.get('vcodec') != 'none']
2627 audio_formats
= [format
for format
in formats
if format
.get('acodec') != 'none']
2628 if len(video_formats
) > 2 or len(audio_formats
) > 2:
2632 exts
= set(format
.get('ext') for format
in formats
)
2634 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2637 for ext_sets
in COMPATIBLE_EXTS
:
2638 if ext_sets
.issuperset(exts
):
2640 # TODO: Check acodec/vcodec
2643 requested_formats
= info_dict
['requested_formats']
2644 old_ext
= info_dict
['ext']
2645 if self
.params
.get('merge_output_format') is None and not compatible_formats(requested_formats
):
2646 info_dict
['ext'] = 'mkv'
2647 self
.report_warning(
2648 'Requested formats are incompatible for merge and will be merged into mkv.')
2650 def correct_ext(filename
):
2651 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
2653 os
.path
.splitext(filename
)[0]
2654 if filename_real_ext
== old_ext
2656 return '%s.%s' % (filename_wo_ext
, info_dict
['ext'])
2658 # Ensure filename always has a correct extension for successful merge
2659 full_filename
= correct_ext(full_filename
)
2660 temp_filename
= correct_ext(temp_filename
)
2661 dl_filename
= existing_file(full_filename
, temp_filename
)
2662 info_dict
['__real_download'] = False
2664 _protocols
= set(determine_protocol(f
) for f
in requested_formats
)
2665 if len(_protocols
) == 1:
2666 info_dict
['protocol'] = _protocols
.pop()
2667 directly_mergable
= (
2668 'no-direct-merge' not in self
.params
.get('compat_opts', [])
2669 and info_dict
.get('protocol') is not None # All requested formats have same protocol
2670 and not self
.params
.get('allow_unplayable_formats')
2671 and get_suitable_downloader(info_dict
, self
.params
).__name
__ == 'FFmpegFD')
2672 if directly_mergable
:
2673 info_dict
['url'] = requested_formats
[0]['url']
2674 # Treat it as a single download
2675 dl_filename
= existing_file(full_filename
, temp_filename
)
2676 if dl_filename
is None:
2677 success
, real_download
= self
.dl(temp_filename
, info_dict
)
2678 info_dict
['__real_download'] = real_download
2681 merger
= FFmpegMergerPP(self
)
2682 if self
.params
.get('allow_unplayable_formats'):
2683 self
.report_warning(
2684 'You have requested merging of multiple formats '
2685 'while also allowing unplayable formats to be downloaded. '
2686 'The formats won\'t be merged to prevent data corruption.')
2687 elif not merger
.available
:
2688 self
.report_warning(
2689 'You have requested merging of multiple formats but ffmpeg is not installed. '
2690 'The formats won\'t be merged.')
2692 if dl_filename
is None:
2693 for f
in requested_formats
:
2694 new_info
= dict(info_dict
)
2695 del new_info
['requested_formats']
2697 fname
= prepend_extension(
2698 self
.prepare_filename(new_info
, 'temp'),
2699 'f%s' % f
['format_id'], new_info
['ext'])
2700 if not self
._ensure
_dir
_exists
(fname
):
2702 downloaded
.append(fname
)
2703 partial_success
, real_download
= self
.dl(fname
, new_info
)
2704 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
2705 success
= success
and partial_success
2706 if merger
.available
and not self
.params
.get('allow_unplayable_formats'):
2707 info_dict
['__postprocessors'].append(merger
)
2708 info_dict
['__files_to_merge'] = downloaded
2709 # Even if there were no downloads, it is being merged only now
2710 info_dict
['__real_download'] = True
2712 for file in downloaded
:
2713 files_to_move
[file] = None
2715 # Just a single file
2716 dl_filename
= existing_file(full_filename
, temp_filename
)
2717 if dl_filename
is None:
2718 success
, real_download
= self
.dl(temp_filename
, info_dict
)
2719 info_dict
['__real_download'] = real_download
2721 dl_filename
= dl_filename
or temp_filename
2722 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(encodeFilename(full_filename
)))
2724 except network_exceptions
as err
:
2725 self
.report_error('unable to download video data: %s' % error_to_compat_str(err
))
2727 except (OSError, IOError) as err
:
2728 raise UnavailableVideoError(err
)
2729 except (ContentTooShortError
, ) as err
:
2730 self
.report_error('content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
2733 if success
and full_filename
!= '-':
2737 fixup_policy
= self
.params
.get('fixup')
2738 vid
= info_dict
['id']
2740 if fixup_policy
in ('ignore', 'never'):
2742 elif fixup_policy
== 'warn':
2744 elif fixup_policy
!= 'force':
2745 assert fixup_policy
in ('detect_or_warn', None)
2746 if not info_dict
.get('__real_download'):
2749 def ffmpeg_fixup(cndn
, msg
, cls
):
2753 self
.report_warning(f
'{vid}: {msg}')
2757 info_dict
['__postprocessors'].append(pp
)
2759 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
2761 stretched_ratio
= info_dict
.get('stretched_ratio')
2763 stretched_ratio
not in (1, None),
2764 f
'Non-uniform pixel ratio {stretched_ratio}',
2765 FFmpegFixupStretchedPP
)
2768 (info_dict
.get('requested_formats') is None
2769 and info_dict
.get('container') == 'm4a_dash'
2770 and info_dict
.get('ext') == 'm4a'),
2771 'writing DASH m4a. Only some players support this container',
2774 downloader
= (get_suitable_downloader(info_dict
, self
.params
).__name
__
2775 if 'protocol' in info_dict
else None)
2776 ffmpeg_fixup(downloader
== 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP
)
2777 ffmpeg_fixup(downloader
== 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP
)
2778 ffmpeg_fixup(downloader
== 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP
)
2782 info_dict
= self
.post_process(dl_filename
, info_dict
, files_to_move
)
2783 except PostProcessingError
as err
:
2784 self
.report_error('Postprocessing: %s' % str(err
))
2787 for ph
in self
._post
_hooks
:
2788 ph(info_dict
['filepath'])
2789 except Exception as err
:
2790 self
.report_error('post hooks: %s' % str(err
))
2792 must_record_download_archive
= True
2794 if must_record_download_archive
or self
.params
.get('force_write_download_archive', False):
2795 self
.record_download_archive(info_dict
)
2796 max_downloads
= self
.params
.get('max_downloads')
2797 if max_downloads
is not None and self
._num
_downloads
>= int(max_downloads
):
2798 raise MaxDownloadsReached()
2800 def download(self
, url_list
):
2801 """Download a given list of URLs."""
2802 outtmpl
= self
.outtmpl_dict
['default']
2803 if (len(url_list
) > 1
2805 and '%' not in outtmpl
2806 and self
.params
.get('max_downloads') != 1):
2807 raise SameFileError(outtmpl
)
2809 for url
in url_list
:
2811 # It also downloads the videos
2812 res
= self
.extract_info(
2813 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
2814 except UnavailableVideoError
:
2815 self
.report_error('unable to download video')
2816 except MaxDownloadsReached
:
2817 self
.to_screen('[info] Maximum number of downloaded files reached')
2819 except ExistingVideoReached
:
2820 self
.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2822 except RejectedVideoReached
:
2823 self
.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2826 if self
.params
.get('dump_single_json', False):
2827 self
.post_extract(res
)
2828 self
.to_stdout(json
.dumps(res
, default
=repr))
2830 return self
._download
_retcode
2832 def download_with_info_file(self
, info_filename
):
2833 with contextlib
.closing(fileinput
.FileInput(
2834 [info_filename
], mode
='r',
2835 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
2836 # FileInput doesn't have a read method, we can't call json.load
2837 info
= self
.filter_requested_info(json
.loads('\n'.join(f
)), self
.params
.get('clean_infojson', True))
2839 self
.process_ie_result(info
, download
=True)
2840 except (DownloadError
, EntryNotInPlaylist
, ThrottledDownload
):
2841 webpage_url
= info
.get('webpage_url')
2842 if webpage_url
is not None:
2843 self
.report_warning('The info failed to download, trying with "%s"' % webpage_url
)
2844 return self
.download([webpage_url
])
2847 return self
._download
_retcode
2850 def filter_requested_info(info_dict
, actually_filter
=True):
2851 remove_keys
= ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2852 keep_keys
= ['_type'], # Always keep this to facilitate load-info-json
2854 remove_keys
+= ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
2855 empty_values
= (None, {}, [], set(), tuple())
2856 reject
= lambda k
, v
: k
not in keep_keys
and (
2857 k
.startswith('_') or k
in remove_keys
or v
in empty_values
)
2859 info_dict
['epoch'] = int(time
.time())
2860 reject
= lambda k
, v
: k
in remove_keys
2861 filter_fn
= lambda obj
: (
2862 list(map(filter_fn
, obj
)) if isinstance(obj
, (LazyList
, list, tuple, set))
2863 else obj
if not isinstance(obj
, dict)
2864 else dict((k
, filter_fn(v
)) for k
, v
in obj
.items() if not reject(k
, v
)))
2865 return filter_fn(info_dict
)
2867 def run_pp(self
, pp
, infodict
):
2868 files_to_delete
= []
2869 if '__files_to_move' not in infodict
:
2870 infodict
['__files_to_move'] = {}
2871 files_to_delete
, infodict
= pp
.run(infodict
)
2872 if not files_to_delete
:
2875 if self
.params
.get('keepvideo', False):
2876 for f
in files_to_delete
:
2877 infodict
['__files_to_move'].setdefault(f
, '')
2879 for old_filename
in set(files_to_delete
):
2880 self
.to_screen('Deleting original file %s (pass -k to keep)' % old_filename
)
2882 os
.remove(encodeFilename(old_filename
))
2883 except (IOError, OSError):
2884 self
.report_warning('Unable to remove downloaded original file')
2885 if old_filename
in infodict
['__files_to_move']:
2886 del infodict
['__files_to_move'][old_filename
]
2890 def post_extract(info_dict
):
2891 def actual_post_extract(info_dict
):
2892 if info_dict
.get('_type') in ('playlist', 'multi_video'):
2893 for video_dict
in info_dict
.get('entries', {}):
2894 actual_post_extract(video_dict
or {})
2897 post_extractor
= info_dict
.get('__post_extractor') or (lambda: {})
2898 extra
= post_extractor().items()
2899 info_dict
.update(extra
)
2900 info_dict
.pop('__post_extractor', None)
2902 original_infodict
= info_dict
.get('__original_infodict') or {}
2903 original_infodict
.update(extra
)
2904 original_infodict
.pop('__post_extractor', None)
2906 actual_post_extract(info_dict
or {})
2908 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
2909 info
= dict(ie_info
)
2910 info
['__files_to_move'] = files_to_move
or {}
2911 for pp
in self
._pps
[key
]:
2912 info
= self
.run_pp(pp
, info
)
2913 return info
, info
.pop('__files_to_move', None)
2915 def post_process(self
, filename
, ie_info
, files_to_move
=None):
2916 """Run all the postprocessors on the given file."""
2917 info
= dict(ie_info
)
2918 info
['filepath'] = filename
2919 info
['__files_to_move'] = files_to_move
or {}
2921 for pp
in ie_info
.get('__postprocessors', []) + self
._pps
['post_process']:
2922 info
= self
.run_pp(pp
, info
)
2923 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
2924 del info
['__files_to_move']
2925 for pp
in self
._pps
['after_move']:
2926 info
= self
.run_pp(pp
, info
)
2929 def _make_archive_id(self
, info_dict
):
2930 video_id
= info_dict
.get('id')
2933 # Future-proof against any change in case
2934 # and backwards compatibility with prior versions
2935 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
2936 if extractor
is None:
2937 url
= str_or_none(info_dict
.get('url'))
2940 # Try to find matching extractor for the URL and take its ie_key
2941 for ie
in self
._ies
:
2942 if ie
.suitable(url
):
2943 extractor
= ie
.ie_key()
2947 return '%s %s' % (extractor
.lower(), video_id
)
2949 def in_download_archive(self
, info_dict
):
2950 fn
= self
.params
.get('download_archive')
2954 vid_id
= self
._make
_archive
_id
(info_dict
)
2956 return False # Incomplete video information
2958 return vid_id
in self
.archive
2960 def record_download_archive(self
, info_dict
):
2961 fn
= self
.params
.get('download_archive')
2964 vid_id
= self
._make
_archive
_id
(info_dict
)
2966 with locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
2967 archive_file
.write(vid_id
+ '\n')
2968 self
.archive
.add(vid_id
)
2971 def format_resolution(format
, default
='unknown'):
2972 if format
.get('vcodec') == 'none':
2973 if format
.get('acodec') == 'none':
2976 if format
.get('resolution') is not None:
2977 return format
['resolution']
2978 if format
.get('width') and format
.get('height'):
2979 res
= '%dx%d' % (format
['width'], format
['height'])
2980 elif format
.get('height'):
2981 res
= '%sp' % format
['height']
2982 elif format
.get('width'):
2983 res
= '%dx?' % format
['width']
2988 def _format_note(self
, fdict
):
2990 if fdict
.get('ext') in ['f4f', 'f4m']:
2991 res
+= '(unsupported) '
2992 if fdict
.get('language'):
2995 res
+= '[%s] ' % fdict
['language']
2996 if fdict
.get('format_note') is not None:
2997 res
+= fdict
['format_note'] + ' '
2998 if fdict
.get('tbr') is not None:
2999 res
+= '%4dk ' % fdict
['tbr']
3000 if fdict
.get('container') is not None:
3003 res
+= '%s container' % fdict
['container']
3004 if (fdict
.get('vcodec') is not None
3005 and fdict
.get('vcodec') != 'none'):
3008 res
+= fdict
['vcodec']
3009 if fdict
.get('vbr') is not None:
3011 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3013 if fdict
.get('vbr') is not None:
3014 res
+= '%4dk' % fdict
['vbr']
3015 if fdict
.get('fps') is not None:
3018 res
+= '%sfps' % fdict
['fps']
3019 if fdict
.get('acodec') is not None:
3022 if fdict
['acodec'] == 'none':
3025 res
+= '%-5s' % fdict
['acodec']
3026 elif fdict
.get('abr') is not None:
3030 if fdict
.get('abr') is not None:
3031 res
+= '@%3dk' % fdict
['abr']
3032 if fdict
.get('asr') is not None:
3033 res
+= ' (%5dHz)' % fdict
['asr']
3034 if fdict
.get('filesize') is not None:
3037 res
+= format_bytes(fdict
['filesize'])
3038 elif fdict
.get('filesize_approx') is not None:
3041 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3044 def list_formats(self
, info_dict
):
3045 formats
= info_dict
.get('formats', [info_dict
])
3047 'list-formats' not in self
.params
.get('compat_opts', [])
3048 and self
.params
.get('listformats_table', True) is not False)
3052 format_field(f
, 'format_id'),
3053 format_field(f
, 'ext'),
3054 self
.format_resolution(f
),
3055 format_field(f
, 'fps', '%d'),
3057 format_field(f
, 'filesize', ' %s', func
=format_bytes
) + format_field(f
, 'filesize_approx', '~%s', func
=format_bytes
),
3058 format_field(f
, 'tbr', '%4dk'),
3059 shorten_protocol_name(f
.get('protocol', '').replace("native", "n")),
3061 format_field(f
, 'vcodec', default
='unknown').replace('none', ''),
3062 format_field(f
, 'vbr', '%4dk'),
3063 format_field(f
, 'acodec', default
='unknown').replace('none', ''),
3064 format_field(f
, 'abr', '%3dk'),
3065 format_field(f
, 'asr', '%5dHz'),
3066 ', '.join(filter(None, (
3067 'UNSUPPORTED' if f
.get('ext') in ('f4f', 'f4m') else '',
3068 format_field(f
, 'language', '[%s]'),
3069 format_field(f
, 'format_note'),
3070 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3071 format_field(f
, 'asr', '%5dHz')))),
3072 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3073 header_line
= ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3074 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3078 format_field(f
, 'format_id'),
3079 format_field(f
, 'ext'),
3080 self
.format_resolution(f
),
3081 self
._format
_note
(f
)]
3083 if f
.get('preference') is None or f
['preference'] >= -1000]
3084 header_line
= ['format code', 'extension', 'resolution', 'note']
3087 '[info] Available formats for %s:' % info_dict
['id'])
3088 self
.to_stdout(render_table(
3089 header_line
, table
, delim
=new_format
, extraGap
=(0 if new_format
else 1), hideEmpty
=new_format
))
3091 def list_thumbnails(self
, info_dict
):
3092 thumbnails
= list(info_dict
.get('thumbnails'))
3094 self
.to_screen('[info] No thumbnails present for %s' % info_dict
['id'])
3098 '[info] Thumbnails for %s:' % info_dict
['id'])
3099 self
.to_stdout(render_table(
3100 ['ID', 'width', 'height', 'URL'],
3101 [[t
['id'], t
.get('width', 'unknown'), t
.get('height', 'unknown'), t
['url']] for t
in thumbnails
]))
3103 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3105 self
.to_screen('%s has no %s' % (video_id
, name
))
3108 'Available %s for %s:' % (name
, video_id
))
3110 def _row(lang
, formats
):
3111 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3112 if len(set(names
)) == 1:
3113 names
= [] if names
[0] == 'unknown' else names
[:1]
3114 return [lang
, ', '.join(names
), ', '.join(exts
)]
3116 self
.to_stdout(render_table(
3117 ['Language', 'Name', 'Formats'],
3118 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3121 def urlopen(self
, req
):
3122 """ Start an HTTP download """
3123 if isinstance(req
, compat_basestring
):
3124 req
= sanitized_Request(req
)
3125 return self
._opener
.open(req
, timeout
=self
._socket
_timeout
)
3127 def print_debug_header(self
):
3128 if not self
.params
.get('verbose'):
3131 if type('') is not compat_str
:
3132 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3133 self
.report_warning(
3134 'Your Python is broken! Update to a newer and supported version')
3136 stdout_encoding
= getattr(
3137 sys
.stdout
, 'encoding', 'missing (%s)' % type(sys
.stdout
).__name
__)
3139 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3140 locale
.getpreferredencoding(),
3141 sys
.getfilesystemencoding(),
3143 self
.get_encoding()))
3144 write_string(encoding_str
, encoding
=None)
3147 '(exe)' if hasattr(sys
, 'frozen')
3148 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter
)
3149 else '(source)' if os
.path
.basename(sys
.argv
[0]) == '__main__.py'
3151 self
._write
_string
('[debug] yt-dlp version %s %s\n' % (__version__
, source
))
3153 self
._write
_string
('[debug] Lazy loading extractors enabled\n')
3156 '[debug] Plugin Extractors: %s\n' % [ie
.ie_key() for ie
in _PLUGIN_CLASSES
])
3157 if self
.params
.get('compat_opts'):
3159 '[debug] Compatibility options: %s\n' % ', '.join(self
.params
.get('compat_opts')))
3161 sp
= subprocess
.Popen(
3162 ['git', 'rev-parse', '--short', 'HEAD'],
3163 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
,
3164 cwd
=os
.path
.dirname(os
.path
.abspath(__file__
)))
3165 out
, err
= process_communicate_or_kill(sp
)
3166 out
= out
.decode().strip()
3167 if re
.match('[0-9a-f]+', out
):
3168 self
._write
_string
('[debug] Git HEAD: %s\n' % out
)
3175 def python_implementation():
3176 impl_name
= platform
.python_implementation()
3177 if impl_name
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
3178 return impl_name
+ ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
3181 self
._write
_string
('[debug] Python version %s (%s %s) - %s\n' % (
3182 platform
.python_version(),
3183 python_implementation(),
3184 platform
.architecture()[0],
3187 exe_versions
= FFmpegPostProcessor
.get_versions(self
)
3188 exe_versions
['rtmpdump'] = rtmpdump_version()
3189 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
3190 exe_str
= ', '.join(
3192 for exe
, v
in sorted(exe_versions
.items())
3197 self
._write
_string
('[debug] exe versions: %s\n' % exe_str
)
3200 for handler
in self
._opener
.handlers
:
3201 if hasattr(handler
, 'proxies'):
3202 proxy_map
.update(handler
.proxies
)
3203 self
._write
_string
('[debug] Proxy map: ' + compat_str(proxy_map
) + '\n')
3205 if self
.params
.get('call_home', False):
3206 ipaddr
= self
.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3207 self
._write
_string
('[debug] Public IP address: %s\n' % ipaddr
)
3209 latest_version
= self
.urlopen(
3210 'https://yt-dl.org/latest/version').read().decode('utf-8')
3211 if version_tuple(latest_version
) > version_tuple(__version__
):
3212 self
.report_warning(
3213 'You are using an outdated version (newest version: %s)! '
3214 'See https://yt-dl.org/update if you need help updating.' %
3217 def _setup_opener(self
):
3218 timeout_val
= self
.params
.get('socket_timeout')
3219 self
._socket
_timeout
= 600 if timeout_val
is None else float(timeout_val
)
3221 opts_cookiesfrombrowser
= self
.params
.get('cookiesfrombrowser')
3222 opts_cookiefile
= self
.params
.get('cookiefile')
3223 opts_proxy
= self
.params
.get('proxy')
3225 self
.cookiejar
= load_cookies(opts_cookiefile
, opts_cookiesfrombrowser
, self
)
3227 cookie_processor
= YoutubeDLCookieProcessor(self
.cookiejar
)
3228 if opts_proxy
is not None:
3229 if opts_proxy
== '':
3232 proxies
= {'http': opts_proxy, 'https': opts_proxy}
3234 proxies
= compat_urllib_request
.getproxies()
3235 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3236 if 'http' in proxies
and 'https' not in proxies
:
3237 proxies
['https'] = proxies
['http']
3238 proxy_handler
= PerRequestProxyHandler(proxies
)
3240 debuglevel
= 1 if self
.params
.get('debug_printtraffic') else 0
3241 https_handler
= make_HTTPS_handler(self
.params
, debuglevel
=debuglevel
)
3242 ydlh
= YoutubeDLHandler(self
.params
, debuglevel
=debuglevel
)
3243 redirect_handler
= YoutubeDLRedirectHandler()
3244 data_handler
= compat_urllib_request_DataHandler()
3246 # When passing our own FileHandler instance, build_opener won't add the
3247 # default FileHandler and allows us to disable the file protocol, which
3248 # can be used for malicious purposes (see
3249 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3250 file_handler
= compat_urllib_request
.FileHandler()
3252 def file_open(*args
, **kwargs
):
3253 raise compat_urllib_error
.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3254 file_handler
.file_open
= file_open
3256 opener
= compat_urllib_request
.build_opener(
3257 proxy_handler
, https_handler
, cookie_processor
, ydlh
, redirect_handler
, data_handler
, file_handler
)
3259 # Delete the default user-agent header, which would otherwise apply in
3260 # cases where our custom HTTP handler doesn't come into play
3261 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3262 opener
.addheaders
= []
3263 self
._opener
= opener
3265 def encode(self
, s
):
3266 if isinstance(s
, bytes):
3267 return s
# Already encoded
3270 return s
.encode(self
.get_encoding())
3271 except UnicodeEncodeError as err
:
3272 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
3275 def get_encoding(self
):
3276 encoding
= self
.params
.get('encoding')
3277 if encoding
is None:
3278 encoding
= preferredencoding()
3281 def _write_thumbnails(self
, info_dict
, filename
): # return the extensions
3282 write_all
= self
.params
.get('write_all_thumbnails', False)
3284 if write_all
or self
.params
.get('writethumbnail', False):
3285 thumbnails
= info_dict
.get('thumbnails') or []
3286 multiple
= write_all
and len(thumbnails
) > 1
3289 for t
in thumbnails
[::-1]:
3290 thumb_ext
= determine_ext(t
['url'], 'jpg')
3291 suffix
= '%s.' % t
['id'] if multiple
else ''
3292 thumb_display_id
= '%s ' % t
['id'] if multiple
else ''
3293 thumb_filename
= replace_extension(filename
, suffix
+ thumb_ext
, info_dict
.get('ext'))
3295 if not self
.params
.get('overwrites', True) and os
.path
.exists(encodeFilename(thumb_filename
)):
3296 ret
.append(suffix
+ thumb_ext
)
3297 t
['filepath'] = thumb_filename
3298 self
.to_screen('[%s] %s: Thumbnail %sis already present' %
3299 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
3301 self
.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3302 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
))
3304 uf
= self
.urlopen(t
['url'])
3305 with open(encodeFilename(thumb_filename
), 'wb') as thumbf
:
3306 shutil
.copyfileobj(uf
, thumbf
)
3307 ret
.append(suffix
+ thumb_ext
)
3308 self
.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3309 (info_dict
['extractor'], info_dict
['id'], thumb_display_id
, thumb_filename
))
3310 t
['filepath'] = thumb_filename
3311 except network_exceptions
as err
:
3312 self
.report_warning('Unable to download thumbnail "%s": %s' %
3313 (t
['url'], error_to_compat_str(err
)))
3314 if ret
and not write_all
: