]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
4a7712cb63a41d7423c0f7f24a1e9ea344060584
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import functools
13 import io
14 import itertools
15 import json
16 import locale
17 import operator
18 import os
19 import platform
20 import re
21 import shutil
22 import subprocess
23 import sys
24 import tempfile
25 import time
26 import tokenize
27 import traceback
28 import random
29 import unicodedata
30
31 from string import ascii_letters
32
33 from .compat import (
34 compat_basestring,
35 compat_get_terminal_size,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_pycrypto_AES,
40 compat_shlex_quote,
41 compat_str,
42 compat_tokenize_tokenize,
43 compat_urllib_error,
44 compat_urllib_request,
45 compat_urllib_request_DataHandler,
46 windows_enable_vt_mode,
47 )
48 from .cookies import load_cookies
49 from .utils import (
50 age_restricted,
51 args_to_str,
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
55 DEFAULT_OUTTMPL,
56 determine_ext,
57 determine_protocol,
58 DOT_DESKTOP_LINK_TEMPLATE,
59 DOT_URL_LINK_TEMPLATE,
60 DOT_WEBLOC_LINK_TEMPLATE,
61 DownloadError,
62 encode_compat_str,
63 encodeFilename,
64 EntryNotInPlaylist,
65 error_to_compat_str,
66 ExistingVideoReached,
67 expand_path,
68 ExtractorError,
69 float_or_none,
70 format_bytes,
71 format_field,
72 formatSeconds,
73 GeoRestrictedError,
74 HEADRequest,
75 int_or_none,
76 iri_to_uri,
77 ISO3166Utils,
78 LazyList,
79 locked_file,
80 make_dir,
81 make_HTTPS_handler,
82 MaxDownloadsReached,
83 network_exceptions,
84 orderedSet,
85 OUTTMPL_TYPES,
86 PagedList,
87 parse_filesize,
88 PerRequestProxyHandler,
89 platform_name,
90 PostProcessingError,
91 preferredencoding,
92 prepend_extension,
93 process_communicate_or_kill,
94 register_socks_protocols,
95 RejectedVideoReached,
96 render_table,
97 replace_extension,
98 SameFileError,
99 sanitize_filename,
100 sanitize_path,
101 sanitize_url,
102 sanitized_Request,
103 std_headers,
104 STR_FORMAT_RE_TMPL,
105 STR_FORMAT_TYPES,
106 str_or_none,
107 strftime_or_none,
108 subtitles_filename,
109 supports_terminal_sequences,
110 TERMINAL_SEQUENCES,
111 ThrottledDownload,
112 to_high_limit_path,
113 traverse_obj,
114 try_get,
115 UnavailableVideoError,
116 url_basename,
117 variadic,
118 version_tuple,
119 write_json_file,
120 write_string,
121 YoutubeDLCookieProcessor,
122 YoutubeDLHandler,
123 YoutubeDLRedirectHandler,
124 )
125 from .cache import Cache
126 from .extractor import (
127 gen_extractor_classes,
128 get_info_extractor,
129 _LAZY_LOADER,
130 _PLUGIN_CLASSES as plugin_extractors
131 )
132 from .extractor.openload import PhantomJSwrapper
133 from .downloader import (
134 FFmpegFD,
135 get_suitable_downloader,
136 shorten_protocol_name
137 )
138 from .downloader.rtmp import rtmpdump_version
139 from .postprocessor import (
140 get_postprocessor,
141 EmbedThumbnailPP,
142 FFmpegFixupDurationPP,
143 FFmpegFixupM3u8PP,
144 FFmpegFixupM4aPP,
145 FFmpegFixupStretchedPP,
146 FFmpegFixupTimestampPP,
147 FFmpegMergerPP,
148 FFmpegPostProcessor,
149 MoveFilesAfterDownloadPP,
150 _PLUGIN_CLASSES as plugin_postprocessors
151 )
152 from .update import detect_variant
153 from .version import __version__
154
155 if compat_os_name == 'nt':
156 import ctypes
157
158
159 class YoutubeDL(object):
160 """YoutubeDL class.
161
162 YoutubeDL objects are the ones responsible of downloading the
163 actual video file and writing it to disk if the user has requested
164 it, among some other tasks. In most cases there should be one per
165 program. As, given a video URL, the downloader doesn't know how to
166 extract all the needed information, task that InfoExtractors do, it
167 has to pass the URL to one of them.
168
169 For this, YoutubeDL objects have a method that allows
170 InfoExtractors to be registered in a given order. When it is passed
171 a URL, the YoutubeDL object handles it to the first InfoExtractor it
172 finds that reports being able to handle it. The InfoExtractor extracts
173 all the information about the video or videos the URL refers to, and
174 YoutubeDL process the extracted information, possibly using a File
175 Downloader to download the video.
176
177 YoutubeDL objects accept a lot of parameters. In order not to saturate
178 the object constructor with arguments, it receives a dictionary of
179 options instead. These options are available through the params
180 attribute for the InfoExtractors to use. The YoutubeDL also
181 registers itself as the downloader in charge for the InfoExtractors
182 that are added to it, so this is a "mutual registration".
183
184 Available options:
185
186 username: Username for authentication purposes.
187 password: Password for authentication purposes.
188 videopassword: Password for accessing a video.
189 ap_mso: Adobe Pass multiple-system operator identifier.
190 ap_username: Multiple-system operator account username.
191 ap_password: Multiple-system operator account password.
192 usenetrc: Use netrc for authentication instead.
193 verbose: Print additional info to stdout.
194 quiet: Do not print messages to stdout.
195 no_warnings: Do not print out anything for warnings.
196 forceprint: A list of templates to force print
197 forceurl: Force printing final URL. (Deprecated)
198 forcetitle: Force printing title. (Deprecated)
199 forceid: Force printing ID. (Deprecated)
200 forcethumbnail: Force printing thumbnail URL. (Deprecated)
201 forcedescription: Force printing description. (Deprecated)
202 forcefilename: Force printing final filename. (Deprecated)
203 forceduration: Force printing duration. (Deprecated)
204 forcejson: Force printing info_dict as JSON.
205 dump_single_json: Force printing the info_dict of the whole playlist
206 (or video) as a single JSON line.
207 force_write_download_archive: Force writing download archive regardless
208 of 'skip_download' or 'simulate'.
209 simulate: Do not download the video files. If unset (or None),
210 simulate only if listsubtitles, listformats or list_thumbnails is used
211 format: Video format code. see "FORMAT SELECTION" for more details.
212 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
213 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
214 extracting metadata even if the video is not actually
215 available for download (experimental)
216 format_sort: How to sort the video formats. see "Sorting Formats"
217 for more details.
218 format_sort_force: Force the given format_sort. see "Sorting Formats"
219 for more details.
220 allow_multiple_video_streams: Allow multiple video streams to be merged
221 into a single file
222 allow_multiple_audio_streams: Allow multiple audio streams to be merged
223 into a single file
224 check_formats Whether to test if the formats are downloadable.
225 Can be True (check all), False (check none)
226 or None (check only if requested by extractor)
227 paths: Dictionary of output paths. The allowed keys are 'home'
228 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
229 outtmpl: Dictionary of templates for output names. Allowed keys
230 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
231 For compatibility with youtube-dl, a single string can also be used
232 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
233 restrictfilenames: Do not allow "&" and spaces in file names
234 trim_file_name: Limit length of filename (extension excluded)
235 windowsfilenames: Force the filenames to be windows compatible
236 ignoreerrors: Do not stop on download/postprocessing errors.
237 Can be 'only_download' to ignore only download errors.
238 Default is 'only_download' for CLI, but False for API
239 skip_playlist_after_errors: Number of allowed failures until the rest of
240 the playlist is skipped
241 force_generic_extractor: Force downloader to use the generic extractor
242 overwrites: Overwrite all video and metadata files if True,
243 overwrite only non-video files if None
244 and don't overwrite any file if False
245 For compatibility with youtube-dl,
246 "nooverwrites" may also be used instead
247 playliststart: Playlist item to start at.
248 playlistend: Playlist item to end at.
249 playlist_items: Specific indices of playlist to download.
250 playlistreverse: Download playlist items in reverse order.
251 playlistrandom: Download playlist items in random order.
252 matchtitle: Download only matching titles.
253 rejecttitle: Reject downloads for matching titles.
254 logger: Log messages to a logging.Logger instance.
255 logtostderr: Log messages to stderr instead of stdout.
256 consoletitle: Display progress in console window's titlebar.
257 writedescription: Write the video description to a .description file
258 writeinfojson: Write the video description to a .info.json file
259 clean_infojson: Remove private fields from the infojson
260 getcomments: Extract video comments. This will not be written to disk
261 unless writeinfojson is also given
262 writeannotations: Write the video annotations to a .annotations.xml file
263 writethumbnail: Write the thumbnail image to a file
264 allow_playlist_files: Whether to write playlists' description, infojson etc
265 also to disk when using the 'write*' options
266 write_all_thumbnails: Write all thumbnail formats to files
267 writelink: Write an internet shortcut file, depending on the
268 current platform (.url/.webloc/.desktop)
269 writeurllink: Write a Windows internet shortcut file (.url)
270 writewebloclink: Write a macOS internet shortcut file (.webloc)
271 writedesktoplink: Write a Linux internet shortcut file (.desktop)
272 writesubtitles: Write the video subtitles to a file
273 writeautomaticsub: Write the automatically generated subtitles to a file
274 allsubtitles: Deprecated - Use subtitleslangs = ['all']
275 Downloads all the subtitles of the video
276 (requires writesubtitles or writeautomaticsub)
277 listsubtitles: Lists all available subtitles for the video
278 subtitlesformat: The format code for subtitles
279 subtitleslangs: List of languages of the subtitles to download (can be regex).
280 The list may contain "all" to refer to all the available
281 subtitles. The language can be prefixed with a "-" to
282 exclude it from the requested languages. Eg: ['all', '-live_chat']
283 keepvideo: Keep the video file after post-processing
284 daterange: A DateRange object, download only if the upload_date is in the range.
285 skip_download: Skip the actual download of the video file
286 cachedir: Location of the cache files in the filesystem.
287 False to disable filesystem cache.
288 noplaylist: Download single video instead of a playlist if in doubt.
289 age_limit: An integer representing the user's age in years.
290 Unsuitable videos for the given age are skipped.
291 min_views: An integer representing the minimum view count the video
292 must have in order to not be skipped.
293 Videos without view count information are always
294 downloaded. None for no limit.
295 max_views: An integer representing the maximum view count.
296 Videos that are more popular than that are not
297 downloaded.
298 Videos without view count information are always
299 downloaded. None for no limit.
300 download_archive: File name of a file where all downloads are recorded.
301 Videos already present in the file are not downloaded
302 again.
303 break_on_existing: Stop the download process after attempting to download a
304 file that is in the archive.
305 break_on_reject: Stop the download process when encountering a video that
306 has been filtered out.
307 cookiefile: File name where cookies should be read from and dumped to
308 cookiesfrombrowser: A tuple containing the name of the browser and the profile
309 name/path from where cookies are loaded.
310 Eg: ('chrome', ) or (vivaldi, 'default')
311 nocheckcertificate:Do not verify SSL certificates
312 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
313 At the moment, this is only supported by YouTube.
314 proxy: URL of the proxy server to use
315 geo_verification_proxy: URL of the proxy to use for IP address verification
316 on geo-restricted sites.
317 socket_timeout: Time to wait for unresponsive hosts, in seconds
318 bidi_workaround: Work around buggy terminals without bidirectional text
319 support, using fridibi
320 debug_printtraffic:Print out sent and received HTTP traffic
321 include_ads: Download ads as well
322 default_search: Prepend this string if an input url is not valid.
323 'auto' for elaborate guessing
324 encoding: Use this encoding instead of the system-specified.
325 extract_flat: Do not resolve URLs, return the immediate result.
326 Pass in 'in_playlist' to only show this behavior for
327 playlist items.
328 postprocessors: A list of dictionaries, each with an entry
329 * key: The name of the postprocessor. See
330 yt_dlp/postprocessor/__init__.py for a list.
331 * when: When to run the postprocessor. Can be one of
332 pre_process|before_dl|post_process|after_move.
333 Assumed to be 'post_process' if not given
334 post_hooks: Deprecated - Register a custom postprocessor instead
335 A list of functions that get called as the final step
336 for each video file, after all postprocessors have been
337 called. The filename will be passed as the only argument.
338 progress_hooks: A list of functions that get called on download
339 progress, with a dictionary with the entries
340 * status: One of "downloading", "error", or "finished".
341 Check this first and ignore unknown values.
342 * info_dict: The extracted info_dict
343
344 If status is one of "downloading", or "finished", the
345 following properties may also be present:
346 * filename: The final filename (always present)
347 * tmpfilename: The filename we're currently writing to
348 * downloaded_bytes: Bytes on disk
349 * total_bytes: Size of the whole file, None if unknown
350 * total_bytes_estimate: Guess of the eventual file size,
351 None if unavailable.
352 * elapsed: The number of seconds since download started.
353 * eta: The estimated time in seconds, None if unknown
354 * speed: The download speed in bytes/second, None if
355 unknown
356 * fragment_index: The counter of the currently
357 downloaded video fragment.
358 * fragment_count: The number of fragments (= individual
359 files that will be merged)
360
361 Progress hooks are guaranteed to be called at least once
362 (with status "finished") if the download is successful.
363 postprocessor_hooks: A list of functions that get called on postprocessing
364 progress, with a dictionary with the entries
365 * status: One of "started", "processing", or "finished".
366 Check this first and ignore unknown values.
367 * postprocessor: Name of the postprocessor
368 * info_dict: The extracted info_dict
369
370 Progress hooks are guaranteed to be called at least twice
371 (with status "started" and "finished") if the processing is successful.
372 merge_output_format: Extension to use when merging formats.
373 final_ext: Expected final extension; used to detect when the file was
374 already downloaded and converted. "merge_output_format" is
375 replaced by this extension when given
376 fixup: Automatically correct known faults of the file.
377 One of:
378 - "never": do nothing
379 - "warn": only emit a warning
380 - "detect_or_warn": check whether we can do anything
381 about it, warn otherwise (default)
382 source_address: Client-side IP address to bind to.
383 call_home: Boolean, true iff we are allowed to contact the
384 yt-dlp servers for debugging. (BROKEN)
385 sleep_interval_requests: Number of seconds to sleep between requests
386 during extraction
387 sleep_interval: Number of seconds to sleep before each download when
388 used alone or a lower bound of a range for randomized
389 sleep before each download (minimum possible number
390 of seconds to sleep) when used along with
391 max_sleep_interval.
392 max_sleep_interval:Upper bound of a range for randomized sleep before each
393 download (maximum possible number of seconds to sleep).
394 Must only be used along with sleep_interval.
395 Actual sleep time will be a random float from range
396 [sleep_interval; max_sleep_interval].
397 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
398 listformats: Print an overview of available video formats and exit.
399 list_thumbnails: Print a table of all thumbnails and exit.
400 match_filter: A function that gets called with the info_dict of
401 every video.
402 If it returns a message, the video is ignored.
403 If it returns None, the video is downloaded.
404 match_filter_func in utils.py is one example for this.
405 no_color: Do not emit color codes in output.
406 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
407 HTTP header
408 geo_bypass_country:
409 Two-letter ISO 3166-2 country code that will be used for
410 explicit geographic restriction bypassing via faking
411 X-Forwarded-For HTTP header
412 geo_bypass_ip_block:
413 IP range in CIDR notation that will be used similarly to
414 geo_bypass_country
415
416 The following options determine which downloader is picked:
417 external_downloader: A dictionary of protocol keys and the executable of the
418 external downloader to use for it. The allowed protocols
419 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
420 Set the value to 'native' to use the native downloader
421 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
422 or {'m3u8': 'ffmpeg'} instead.
423 Use the native HLS downloader instead of ffmpeg/avconv
424 if True, otherwise use ffmpeg/avconv if False, otherwise
425 use downloader suggested by extractor if None.
426 compat_opts: Compatibility options. See "Differences in default behavior".
427 The following options do not work when used through the API:
428 filename, abort-on-error, multistreams, no-live-chat, format-sort
429 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
430 Refer __init__.py for their implementation
431 progress_template: Dictionary of templates for progress outputs.
432 Allowed keys are 'download', 'postprocess',
433 'download-title' (console title) and 'postprocess-title'.
434 The template is mapped on a dictionary with keys 'progress' and 'info'
435
436 The following parameters are not used by YoutubeDL itself, they are used by
437 the downloader (see yt_dlp/downloader/common.py):
438 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
439 max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
440 noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
441 external_downloader_args.
442
443 The following options are used by the post processors:
444 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
445 otherwise prefer ffmpeg. (avconv support is deprecated)
446 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
447 to the binary or its containing directory.
448 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
449 and a list of additional command-line arguments for the
450 postprocessor/executable. The dict can also have "PP+EXE" keys
451 which are used when the given exe is used by the given PP.
452 Use 'default' as the name for arguments to passed to all PP
453 For compatibility with youtube-dl, a single list of args
454 can also be used
455
456 The following options are used by the extractors:
457 extractor_retries: Number of times to retry for known errors
458 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
459 hls_split_discontinuity: Split HLS playlists to different formats at
460 discontinuities such as ad breaks (default: False)
461 extractor_args: A dictionary of arguments to be passed to the extractors.
462 See "EXTRACTOR ARGUMENTS" for details.
463 Eg: {'youtube': {'skip': ['dash', 'hls']}}
464 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
465 If True (default), DASH manifests and related
466 data will be downloaded and processed by extractor.
467 You can reduce network I/O by disabling it if you don't
468 care about DASH. (only for youtube)
469 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
470 If True (default), HLS manifests and related
471 data will be downloaded and processed by extractor.
472 You can reduce network I/O by disabling it if you don't
473 care about HLS. (only for youtube)
474 """
475
476 _NUMERIC_FIELDS = set((
477 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
478 'timestamp', 'release_timestamp',
479 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
480 'average_rating', 'comment_count', 'age_limit',
481 'start_time', 'end_time',
482 'chapter_number', 'season_number', 'episode_number',
483 'track_number', 'disc_number', 'release_year',
484 ))
485
486 _format_selection_exts = {
487 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
488 'video': {'mp4', 'flv', 'webm', '3gp'},
489 'storyboards': {'mhtml'},
490 }
491
492 params = None
493 _ies = {}
494 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
495 _printed_messages = set()
496 _first_webpage_request = True
497 _download_retcode = None
498 _num_downloads = None
499 _playlist_level = 0
500 _playlist_urls = set()
501 _screen_file = None
502
503 def __init__(self, params=None, auto_init=True):
504 """Create a FileDownloader object with the given options.
505 @param auto_init Whether to load the default extractors and print header (if verbose).
506 Set to 'no_verbose_header' to not ptint the header
507 """
508 if params is None:
509 params = {}
510 self._ies = {}
511 self._ies_instances = {}
512 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
513 self._printed_messages = set()
514 self._first_webpage_request = True
515 self._post_hooks = []
516 self._progress_hooks = []
517 self._postprocessor_hooks = []
518 self._download_retcode = 0
519 self._num_downloads = 0
520 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
521 self._err_file = sys.stderr
522 self.params = params
523 self.cache = Cache(self)
524
525 windows_enable_vt_mode()
526 # FIXME: This will break if we ever print color to stdout
527 self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
528
529 if sys.version_info < (3, 6):
530 self.report_warning(
531 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
532
533 if self.params.get('allow_unplayable_formats'):
534 self.report_warning(
535 f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
536 'This is a developer option intended for debugging. \n'
537 ' If you experience any issues while using this option, '
538 f'{self._color_text("DO NOT", "red")} open a bug report')
539
540 def check_deprecated(param, option, suggestion):
541 if self.params.get(param) is not None:
542 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
543 return True
544 return False
545
546 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
547 if self.params.get('geo_verification_proxy') is None:
548 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
549
550 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
551 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
552 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
553
554 for msg in self.params.get('warnings', []):
555 self.report_warning(msg)
556
557 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
558 # nooverwrites was unnecessarily changed to overwrites
559 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
560 # This ensures compatibility with both keys
561 self.params['overwrites'] = not self.params['nooverwrites']
562 elif self.params.get('overwrites') is None:
563 self.params.pop('overwrites', None)
564 else:
565 self.params['nooverwrites'] = not self.params['overwrites']
566
567 if params.get('bidi_workaround', False):
568 try:
569 import pty
570 master, slave = pty.openpty()
571 width = compat_get_terminal_size().columns
572 if width is None:
573 width_args = []
574 else:
575 width_args = ['-w', str(width)]
576 sp_kwargs = dict(
577 stdin=subprocess.PIPE,
578 stdout=slave,
579 stderr=self._err_file)
580 try:
581 self._output_process = subprocess.Popen(
582 ['bidiv'] + width_args, **sp_kwargs
583 )
584 except OSError:
585 self._output_process = subprocess.Popen(
586 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
587 self._output_channel = os.fdopen(master, 'rb')
588 except OSError as ose:
589 if ose.errno == errno.ENOENT:
590 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
591 else:
592 raise
593
594 if (sys.platform != 'win32'
595 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
596 and not params.get('restrictfilenames', False)):
597 # Unicode filesystem API will throw errors (#1474, #13027)
598 self.report_warning(
599 'Assuming --restrict-filenames since file system encoding '
600 'cannot encode all characters. '
601 'Set the LC_ALL environment variable to fix this.')
602 self.params['restrictfilenames'] = True
603
604 self.outtmpl_dict = self.parse_outtmpl()
605
606 # Creating format selector here allows us to catch syntax errors before the extraction
607 self.format_selector = (
608 None if self.params.get('format') is None
609 else self.build_format_selector(self.params['format']))
610
611 self._setup_opener()
612
613 if auto_init:
614 if auto_init != 'no_verbose_header':
615 self.print_debug_header()
616 self.add_default_info_extractors()
617
618 for pp_def_raw in self.params.get('postprocessors', []):
619 pp_def = dict(pp_def_raw)
620 when = pp_def.pop('when', 'post_process')
621 pp_class = get_postprocessor(pp_def.pop('key'))
622 pp = pp_class(self, **compat_kwargs(pp_def))
623 self.add_post_processor(pp, when=when)
624
625 for ph in self.params.get('post_hooks', []):
626 self.add_post_hook(ph)
627
628 for ph in self.params.get('progress_hooks', []):
629 self.add_progress_hook(ph)
630
631 register_socks_protocols()
632
633 def preload_download_archive(fn):
634 """Preload the archive, if any is specified"""
635 if fn is None:
636 return False
637 self.write_debug('Loading archive file %r\n' % fn)
638 try:
639 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
640 for line in archive_file:
641 self.archive.add(line.strip())
642 except IOError as ioe:
643 if ioe.errno != errno.ENOENT:
644 raise
645 return False
646 return True
647
648 self.archive = set()
649 preload_download_archive(self.params.get('download_archive'))
650
651 def warn_if_short_id(self, argv):
652 # short YouTube ID starting with dash?
653 idxs = [
654 i for i, a in enumerate(argv)
655 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
656 if idxs:
657 correct_argv = (
658 ['yt-dlp']
659 + [a for i, a in enumerate(argv) if i not in idxs]
660 + ['--'] + [argv[i] for i in idxs]
661 )
662 self.report_warning(
663 'Long argument string detected. '
664 'Use -- to separate parameters and URLs, like this:\n%s\n' %
665 args_to_str(correct_argv))
666
667 def add_info_extractor(self, ie):
668 """Add an InfoExtractor object to the end of the list."""
669 ie_key = ie.ie_key()
670 self._ies[ie_key] = ie
671 if not isinstance(ie, type):
672 self._ies_instances[ie_key] = ie
673 ie.set_downloader(self)
674
675 def _get_info_extractor_class(self, ie_key):
676 ie = self._ies.get(ie_key)
677 if ie is None:
678 ie = get_info_extractor(ie_key)
679 self.add_info_extractor(ie)
680 return ie
681
682 def get_info_extractor(self, ie_key):
683 """
684 Get an instance of an IE with name ie_key, it will try to get one from
685 the _ies list, if there's no instance it will create a new one and add
686 it to the extractor list.
687 """
688 ie = self._ies_instances.get(ie_key)
689 if ie is None:
690 ie = get_info_extractor(ie_key)()
691 self.add_info_extractor(ie)
692 return ie
693
694 def add_default_info_extractors(self):
695 """
696 Add the InfoExtractors returned by gen_extractors to the end of the list
697 """
698 for ie in gen_extractor_classes():
699 self.add_info_extractor(ie)
700
701 def add_post_processor(self, pp, when='post_process'):
702 """Add a PostProcessor object to the end of the chain."""
703 self._pps[when].append(pp)
704 pp.set_downloader(self)
705
706 def add_post_hook(self, ph):
707 """Add the post hook"""
708 self._post_hooks.append(ph)
709
710 def add_progress_hook(self, ph):
711 """Add the download progress hook"""
712 self._progress_hooks.append(ph)
713
714 def add_postprocessor_hook(self, ph):
715 """Add the postprocessing progress hook"""
716 self._postprocessor_hooks.append(ph)
717
718 def _bidi_workaround(self, message):
719 if not hasattr(self, '_output_channel'):
720 return message
721
722 assert hasattr(self, '_output_process')
723 assert isinstance(message, compat_str)
724 line_count = message.count('\n') + 1
725 self._output_process.stdin.write((message + '\n').encode('utf-8'))
726 self._output_process.stdin.flush()
727 res = ''.join(self._output_channel.readline().decode('utf-8')
728 for _ in range(line_count))
729 return res[:-len('\n')]
730
731 def _write_string(self, message, out=None, only_once=False):
732 if only_once:
733 if message in self._printed_messages:
734 return
735 self._printed_messages.add(message)
736 write_string(message, out=out, encoding=self.params.get('encoding'))
737
738 def to_stdout(self, message, skip_eol=False, quiet=False):
739 """Print message to stdout"""
740 if self.params.get('logger'):
741 self.params['logger'].debug(message)
742 elif not quiet or self.params.get('verbose'):
743 self._write_string(
744 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
745 self._err_file if quiet else self._screen_file)
746
747 def to_stderr(self, message, only_once=False):
748 """Print message to stderr"""
749 assert isinstance(message, compat_str)
750 if self.params.get('logger'):
751 self.params['logger'].error(message)
752 else:
753 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
754
755 def to_console_title(self, message):
756 if not self.params.get('consoletitle', False):
757 return
758 if compat_os_name == 'nt':
759 if ctypes.windll.kernel32.GetConsoleWindow():
760 # c_wchar_p() might not be necessary if `message` is
761 # already of type unicode()
762 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
763 elif 'TERM' in os.environ:
764 self._write_string('\033]0;%s\007' % message, self._screen_file)
765
766 def save_console_title(self):
767 if not self.params.get('consoletitle', False):
768 return
769 if self.params.get('simulate'):
770 return
771 if compat_os_name != 'nt' and 'TERM' in os.environ:
772 # Save the title on stack
773 self._write_string('\033[22;0t', self._screen_file)
774
775 def restore_console_title(self):
776 if not self.params.get('consoletitle', False):
777 return
778 if self.params.get('simulate'):
779 return
780 if compat_os_name != 'nt' and 'TERM' in os.environ:
781 # Restore the title from stack
782 self._write_string('\033[23;0t', self._screen_file)
783
784 def __enter__(self):
785 self.save_console_title()
786 return self
787
788 def __exit__(self, *args):
789 self.restore_console_title()
790
791 if self.params.get('cookiefile') is not None:
792 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
793
794 def trouble(self, message=None, tb=None):
795 """Determine action to take when a download problem appears.
796
797 Depending on if the downloader has been configured to ignore
798 download errors or not, this method may throw an exception or
799 not when errors are found, after printing the message.
800
801 tb, if given, is additional traceback information.
802 """
803 if message is not None:
804 self.to_stderr(message)
805 if self.params.get('verbose'):
806 if tb is None:
807 if sys.exc_info()[0]: # if .trouble has been called from an except block
808 tb = ''
809 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
810 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
811 tb += encode_compat_str(traceback.format_exc())
812 else:
813 tb_data = traceback.format_list(traceback.extract_stack())
814 tb = ''.join(tb_data)
815 if tb:
816 self.to_stderr(tb)
817 if not self.params.get('ignoreerrors'):
818 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
819 exc_info = sys.exc_info()[1].exc_info
820 else:
821 exc_info = sys.exc_info()
822 raise DownloadError(message, exc_info)
823 self._download_retcode = 1
824
825 def to_screen(self, message, skip_eol=False):
826 """Print message to stdout if not in quiet mode"""
827 self.to_stdout(
828 message, skip_eol, quiet=self.params.get('quiet', False))
829
830 def _color_text(self, text, color):
831 if self.params.get('no_color'):
832 return text
833 return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
834
835 def report_warning(self, message, only_once=False):
836 '''
837 Print the message to stderr, it will be prefixed with 'WARNING:'
838 If stderr is a tty file the 'WARNING:' will be colored
839 '''
840 if self.params.get('logger') is not None:
841 self.params['logger'].warning(message)
842 else:
843 if self.params.get('no_warnings'):
844 return
845 self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
846
847 def report_error(self, message, tb=None):
848 '''
849 Do the same as trouble, but prefixes the message with 'ERROR:', colored
850 in red if stderr is a tty file.
851 '''
852 self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
853
854 def write_debug(self, message, only_once=False):
855 '''Log debug message or Print message to stderr'''
856 if not self.params.get('verbose', False):
857 return
858 message = '[debug] %s' % message
859 if self.params.get('logger'):
860 self.params['logger'].debug(message)
861 else:
862 self.to_stderr(message, only_once)
863
864 def report_file_already_downloaded(self, file_name):
865 """Report file has already been fully downloaded."""
866 try:
867 self.to_screen('[download] %s has already been downloaded' % file_name)
868 except UnicodeEncodeError:
869 self.to_screen('[download] The file has already been downloaded')
870
871 def report_file_delete(self, file_name):
872 """Report that existing file will be deleted."""
873 try:
874 self.to_screen('Deleting existing file %s' % file_name)
875 except UnicodeEncodeError:
876 self.to_screen('Deleting existing file')
877
878 def raise_no_formats(self, info, forced=False):
879 has_drm = info.get('__has_drm')
880 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
881 expected = self.params.get('ignore_no_formats_error')
882 if forced or not expected:
883 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
884 expected=has_drm or expected)
885 else:
886 self.report_warning(msg)
887
888 def parse_outtmpl(self):
889 outtmpl_dict = self.params.get('outtmpl', {})
890 if not isinstance(outtmpl_dict, dict):
891 outtmpl_dict = {'default': outtmpl_dict}
892 # Remove spaces in the default template
893 if self.params.get('restrictfilenames'):
894 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
895 else:
896 sanitize = lambda x: x
897 outtmpl_dict.update({
898 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
899 if outtmpl_dict.get(k) is None})
900 for key, val in outtmpl_dict.items():
901 if isinstance(val, bytes):
902 self.report_warning(
903 'Parameter outtmpl is bytes, but should be a unicode string. '
904 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
905 return outtmpl_dict
906
907 def get_output_path(self, dir_type='', filename=None):
908 paths = self.params.get('paths', {})
909 assert isinstance(paths, dict)
910 path = os.path.join(
911 expand_path(paths.get('home', '').strip()),
912 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
913 filename or '')
914
915 # Temporary fix for #4787
916 # 'Treat' all problem characters by passing filename through preferredencoding
917 # to workaround encoding issues with subprocess on python2 @ Windows
918 if sys.version_info < (3, 0) and sys.platform == 'win32':
919 path = encodeFilename(path, True).decode(preferredencoding())
920 return sanitize_path(path, force=self.params.get('windowsfilenames'))
921
922 @staticmethod
923 def _outtmpl_expandpath(outtmpl):
924 # expand_path translates '%%' into '%' and '$$' into '$'
925 # correspondingly that is not what we want since we need to keep
926 # '%%' intact for template dict substitution step. Working around
927 # with boundary-alike separator hack.
928 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
929 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
930
931 # outtmpl should be expand_path'ed before template dict substitution
932 # because meta fields may contain env variables we don't want to
933 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
934 # title "Hello $PATH", we don't want `$PATH` to be expanded.
935 return expand_path(outtmpl).replace(sep, '')
936
937 @staticmethod
938 def escape_outtmpl(outtmpl):
939 ''' Escape any remaining strings like %s, %abc% etc. '''
940 return re.sub(
941 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
942 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
943 outtmpl)
944
945 @classmethod
946 def validate_outtmpl(cls, outtmpl):
947 ''' @return None or Exception object '''
948 outtmpl = re.sub(
949 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
950 lambda mobj: f'{mobj.group(0)[:-1]}s',
951 cls._outtmpl_expandpath(outtmpl))
952 try:
953 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
954 return None
955 except ValueError as err:
956 return err
957
958 @staticmethod
959 def _copy_infodict(info_dict):
960 info_dict = dict(info_dict)
961 for key in ('__original_infodict', '__postprocessors'):
962 info_dict.pop(key, None)
963 return info_dict
964
965 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
966 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
967 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
968
969 info_dict = self._copy_infodict(info_dict)
970 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
971 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
972 if info_dict.get('duration', None) is not None
973 else None)
974 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
975 if info_dict.get('resolution') is None:
976 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
977
978 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
979 # of %(field)s to %(field)0Nd for backward compatibility
980 field_size_compat_map = {
981 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
982 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
983 'autonumber': self.params.get('autonumber_size') or 5,
984 }
985
986 TMPL_DICT = {}
987 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
988 MATH_FUNCTIONS = {
989 '+': float.__add__,
990 '-': float.__sub__,
991 }
992 # Field is of the form key1.key2...
993 # where keys (except first) can be string, int or slice
994 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
995 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
996 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
997 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
998 (?P<negate>-)?
999 (?P<fields>{field})
1000 (?P<maths>(?:{math_op}{math_field})*)
1001 (?:>(?P<strf_format>.+?))?
1002 (?P<alternate>(?<!\\),[^|)]+)?
1003 (?:\|(?P<default>.*?))?
1004 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1005
1006 def _traverse_infodict(k):
1007 k = k.split('.')
1008 if k[0] == '':
1009 k.pop(0)
1010 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1011
1012 def get_value(mdict):
1013 # Object traversal
1014 value = _traverse_infodict(mdict['fields'])
1015 # Negative
1016 if mdict['negate']:
1017 value = float_or_none(value)
1018 if value is not None:
1019 value *= -1
1020 # Do maths
1021 offset_key = mdict['maths']
1022 if offset_key:
1023 value = float_or_none(value)
1024 operator = None
1025 while offset_key:
1026 item = re.match(
1027 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1028 offset_key).group(0)
1029 offset_key = offset_key[len(item):]
1030 if operator is None:
1031 operator = MATH_FUNCTIONS[item]
1032 continue
1033 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1034 offset = float_or_none(item)
1035 if offset is None:
1036 offset = float_or_none(_traverse_infodict(item))
1037 try:
1038 value = operator(value, multiplier * offset)
1039 except (TypeError, ZeroDivisionError):
1040 return None
1041 operator = None
1042 # Datetime formatting
1043 if mdict['strf_format']:
1044 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1045
1046 return value
1047
1048 na = self.params.get('outtmpl_na_placeholder', 'NA')
1049
1050 def _dumpjson_default(obj):
1051 if isinstance(obj, (set, LazyList)):
1052 return list(obj)
1053 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1054
1055 def create_key(outer_mobj):
1056 if not outer_mobj.group('has_key'):
1057 return outer_mobj.group(0)
1058 key = outer_mobj.group('key')
1059 mobj = re.match(INTERNAL_FORMAT_RE, key)
1060 initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1061 value, default = None, na
1062 while mobj:
1063 mobj = mobj.groupdict()
1064 default = mobj['default'] if mobj['default'] is not None else default
1065 value = get_value(mobj)
1066 if value is None and mobj['alternate']:
1067 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1068 else:
1069 break
1070
1071 fmt = outer_mobj.group('format')
1072 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1073 fmt = '0{:d}d'.format(field_size_compat_map[key])
1074
1075 value = default if value is None else value
1076
1077 str_fmt = f'{fmt[:-1]}s'
1078 if fmt[-1] == 'l': # list
1079 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1080 value, fmt = delim.join(variadic(value)), str_fmt
1081 elif fmt[-1] == 'j': # json
1082 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1083 elif fmt[-1] == 'q': # quoted
1084 value, fmt = compat_shlex_quote(str(value)), str_fmt
1085 elif fmt[-1] == 'B': # bytes
1086 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1087 value, fmt = value.decode('utf-8', 'ignore'), 's'
1088 elif fmt[-1] == 'U': # unicode normalized
1089 opts = outer_mobj.group('conversion') or ''
1090 value, fmt = unicodedata.normalize(
1091 # "+" = compatibility equivalence, "#" = NFD
1092 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1093 value), str_fmt
1094 elif fmt[-1] == 'c':
1095 if value:
1096 value = str(value)[0]
1097 else:
1098 fmt = str_fmt
1099 elif fmt[-1] not in 'rs': # numeric
1100 value = float_or_none(value)
1101 if value is None:
1102 value, fmt = default, 's'
1103
1104 if sanitize:
1105 if fmt[-1] == 'r':
1106 # If value is an object, sanitize might convert it to a string
1107 # So we convert it to repr first
1108 value, fmt = repr(value), str_fmt
1109 if fmt[-1] in 'csr':
1110 value = sanitize(initial_field, value)
1111
1112 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1113 TMPL_DICT[key] = value
1114 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1115
1116 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1117
1118 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1119 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1120 return self.escape_outtmpl(outtmpl) % info_dict
1121
1122 def _prepare_filename(self, info_dict, tmpl_type='default'):
1123 try:
1124 sanitize = lambda k, v: sanitize_filename(
1125 compat_str(v),
1126 restricted=self.params.get('restrictfilenames'),
1127 is_id=(k == 'id' or k.endswith('_id')))
1128 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1129 filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1130
1131 force_ext = OUTTMPL_TYPES.get(tmpl_type)
1132 if filename and force_ext is not None:
1133 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1134
1135 # https://github.com/blackjack4494/youtube-dlc/issues/85
1136 trim_file_name = self.params.get('trim_file_name', False)
1137 if trim_file_name:
1138 fn_groups = filename.rsplit('.')
1139 ext = fn_groups[-1]
1140 sub_ext = ''
1141 if len(fn_groups) > 2:
1142 sub_ext = fn_groups[-2]
1143 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1144
1145 return filename
1146 except ValueError as err:
1147 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1148 return None
1149
1150 def prepare_filename(self, info_dict, dir_type='', warn=False):
1151 """Generate the output filename."""
1152
1153 filename = self._prepare_filename(info_dict, dir_type or 'default')
1154 if not filename and dir_type not in ('', 'temp'):
1155 return ''
1156
1157 if warn:
1158 if not self.params.get('paths'):
1159 pass
1160 elif filename == '-':
1161 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1162 elif os.path.isabs(filename):
1163 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1164 if filename == '-' or not filename:
1165 return filename
1166
1167 return self.get_output_path(dir_type, filename)
1168
1169 def _match_entry(self, info_dict, incomplete=False, silent=False):
1170 """ Returns None if the file should be downloaded """
1171
1172 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1173
1174 def check_filter():
1175 if 'title' in info_dict:
1176 # This can happen when we're just evaluating the playlist
1177 title = info_dict['title']
1178 matchtitle = self.params.get('matchtitle', False)
1179 if matchtitle:
1180 if not re.search(matchtitle, title, re.IGNORECASE):
1181 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1182 rejecttitle = self.params.get('rejecttitle', False)
1183 if rejecttitle:
1184 if re.search(rejecttitle, title, re.IGNORECASE):
1185 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1186 date = info_dict.get('upload_date')
1187 if date is not None:
1188 dateRange = self.params.get('daterange', DateRange())
1189 if date not in dateRange:
1190 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1191 view_count = info_dict.get('view_count')
1192 if view_count is not None:
1193 min_views = self.params.get('min_views')
1194 if min_views is not None and view_count < min_views:
1195 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1196 max_views = self.params.get('max_views')
1197 if max_views is not None and view_count > max_views:
1198 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1199 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1200 return 'Skipping "%s" because it is age restricted' % video_title
1201
1202 match_filter = self.params.get('match_filter')
1203 if match_filter is not None:
1204 try:
1205 ret = match_filter(info_dict, incomplete=incomplete)
1206 except TypeError:
1207 # For backward compatibility
1208 ret = None if incomplete else match_filter(info_dict)
1209 if ret is not None:
1210 return ret
1211 return None
1212
1213 if self.in_download_archive(info_dict):
1214 reason = '%s has already been recorded in the archive' % video_title
1215 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1216 else:
1217 reason = check_filter()
1218 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1219 if reason is not None:
1220 if not silent:
1221 self.to_screen('[download] ' + reason)
1222 if self.params.get(break_opt, False):
1223 raise break_err()
1224 return reason
1225
1226 @staticmethod
1227 def add_extra_info(info_dict, extra_info):
1228 '''Set the keys from extra_info in info dict if they are missing'''
1229 for key, value in extra_info.items():
1230 info_dict.setdefault(key, value)
1231
1232 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1233 process=True, force_generic_extractor=False):
1234 """
1235 Return a list with a dictionary for each video extracted.
1236
1237 Arguments:
1238 url -- URL to extract
1239
1240 Keyword arguments:
1241 download -- whether to download videos during extraction
1242 ie_key -- extractor key hint
1243 extra_info -- dictionary containing the extra values to add to each result
1244 process -- whether to resolve all unresolved references (URLs, playlist items),
1245 must be True for download to work.
1246 force_generic_extractor -- force using the generic extractor
1247 """
1248
1249 if extra_info is None:
1250 extra_info = {}
1251
1252 if not ie_key and force_generic_extractor:
1253 ie_key = 'Generic'
1254
1255 if ie_key:
1256 ies = {ie_key: self._get_info_extractor_class(ie_key)}
1257 else:
1258 ies = self._ies
1259
1260 for ie_key, ie in ies.items():
1261 if not ie.suitable(url):
1262 continue
1263
1264 if not ie.working():
1265 self.report_warning('The program functionality for this site has been marked as broken, '
1266 'and will probably not work.')
1267
1268 temp_id = ie.get_temp_id(url)
1269 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1270 self.to_screen("[%s] %s: has already been recorded in archive" % (
1271 ie_key, temp_id))
1272 break
1273 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1274 else:
1275 self.report_error('no suitable InfoExtractor for URL %s' % url)
1276
1277 def __handle_extraction_exceptions(func):
1278 @functools.wraps(func)
1279 def wrapper(self, *args, **kwargs):
1280 try:
1281 return func(self, *args, **kwargs)
1282 except GeoRestrictedError as e:
1283 msg = e.msg
1284 if e.countries:
1285 msg += '\nThis video is available in %s.' % ', '.join(
1286 map(ISO3166Utils.short2full, e.countries))
1287 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1288 self.report_error(msg)
1289 except ExtractorError as e: # An error we somewhat expected
1290 self.report_error(compat_str(e), e.format_traceback())
1291 except ThrottledDownload:
1292 self.to_stderr('\r')
1293 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1294 return wrapper(self, *args, **kwargs)
1295 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1296 raise
1297 except Exception as e:
1298 if self.params.get('ignoreerrors'):
1299 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1300 else:
1301 raise
1302 return wrapper
1303
1304 @__handle_extraction_exceptions
1305 def __extract_info(self, url, ie, download, extra_info, process):
1306 ie_result = ie.extract(url)
1307 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1308 return
1309 if isinstance(ie_result, list):
1310 # Backwards compatibility: old IE result format
1311 ie_result = {
1312 '_type': 'compat_list',
1313 'entries': ie_result,
1314 }
1315 if extra_info.get('original_url'):
1316 ie_result.setdefault('original_url', extra_info['original_url'])
1317 self.add_default_extra_info(ie_result, ie, url)
1318 if process:
1319 return self.process_ie_result(ie_result, download, extra_info)
1320 else:
1321 return ie_result
1322
1323 def add_default_extra_info(self, ie_result, ie, url):
1324 if url is not None:
1325 self.add_extra_info(ie_result, {
1326 'webpage_url': url,
1327 'original_url': url,
1328 'webpage_url_basename': url_basename(url),
1329 })
1330 if ie is not None:
1331 self.add_extra_info(ie_result, {
1332 'extractor': ie.IE_NAME,
1333 'extractor_key': ie.ie_key(),
1334 })
1335
1336 def process_ie_result(self, ie_result, download=True, extra_info=None):
1337 """
1338 Take the result of the ie(may be modified) and resolve all unresolved
1339 references (URLs, playlist items).
1340
1341 It will also download the videos if 'download'.
1342 Returns the resolved ie_result.
1343 """
1344 if extra_info is None:
1345 extra_info = {}
1346 result_type = ie_result.get('_type', 'video')
1347
1348 if result_type in ('url', 'url_transparent'):
1349 ie_result['url'] = sanitize_url(ie_result['url'])
1350 if ie_result.get('original_url'):
1351 extra_info.setdefault('original_url', ie_result['original_url'])
1352
1353 extract_flat = self.params.get('extract_flat', False)
1354 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1355 or extract_flat is True):
1356 info_copy = ie_result.copy()
1357 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1358 if ie and not ie_result.get('id'):
1359 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1360 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1361 self.add_extra_info(info_copy, extra_info)
1362 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1363 if self.params.get('force_write_download_archive', False):
1364 self.record_download_archive(info_copy)
1365 return ie_result
1366
1367 if result_type == 'video':
1368 self.add_extra_info(ie_result, extra_info)
1369 ie_result = self.process_video_result(ie_result, download=download)
1370 additional_urls = (ie_result or {}).get('additional_urls')
1371 if additional_urls:
1372 # TODO: Improve MetadataParserPP to allow setting a list
1373 if isinstance(additional_urls, compat_str):
1374 additional_urls = [additional_urls]
1375 self.to_screen(
1376 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1377 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1378 ie_result['additional_entries'] = [
1379 self.extract_info(
1380 url, download, extra_info,
1381 force_generic_extractor=self.params.get('force_generic_extractor'))
1382 for url in additional_urls
1383 ]
1384 return ie_result
1385 elif result_type == 'url':
1386 # We have to add extra_info to the results because it may be
1387 # contained in a playlist
1388 return self.extract_info(
1389 ie_result['url'], download,
1390 ie_key=ie_result.get('ie_key'),
1391 extra_info=extra_info)
1392 elif result_type == 'url_transparent':
1393 # Use the information from the embedding page
1394 info = self.extract_info(
1395 ie_result['url'], ie_key=ie_result.get('ie_key'),
1396 extra_info=extra_info, download=False, process=False)
1397
1398 # extract_info may return None when ignoreerrors is enabled and
1399 # extraction failed with an error, don't crash and return early
1400 # in this case
1401 if not info:
1402 return info
1403
1404 force_properties = dict(
1405 (k, v) for k, v in ie_result.items() if v is not None)
1406 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1407 if f in force_properties:
1408 del force_properties[f]
1409 new_result = info.copy()
1410 new_result.update(force_properties)
1411
1412 # Extracted info may not be a video result (i.e.
1413 # info.get('_type', 'video') != video) but rather an url or
1414 # url_transparent. In such cases outer metadata (from ie_result)
1415 # should be propagated to inner one (info). For this to happen
1416 # _type of info should be overridden with url_transparent. This
1417 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1418 if new_result.get('_type') == 'url':
1419 new_result['_type'] = 'url_transparent'
1420
1421 return self.process_ie_result(
1422 new_result, download=download, extra_info=extra_info)
1423 elif result_type in ('playlist', 'multi_video'):
1424 # Protect from infinite recursion due to recursively nested playlists
1425 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1426 webpage_url = ie_result['webpage_url']
1427 if webpage_url in self._playlist_urls:
1428 self.to_screen(
1429 '[download] Skipping already downloaded playlist: %s'
1430 % ie_result.get('title') or ie_result.get('id'))
1431 return
1432
1433 self._playlist_level += 1
1434 self._playlist_urls.add(webpage_url)
1435 self._sanitize_thumbnails(ie_result)
1436 try:
1437 return self.__process_playlist(ie_result, download)
1438 finally:
1439 self._playlist_level -= 1
1440 if not self._playlist_level:
1441 self._playlist_urls.clear()
1442 elif result_type == 'compat_list':
1443 self.report_warning(
1444 'Extractor %s returned a compat_list result. '
1445 'It needs to be updated.' % ie_result.get('extractor'))
1446
1447 def _fixup(r):
1448 self.add_extra_info(r, {
1449 'extractor': ie_result['extractor'],
1450 'webpage_url': ie_result['webpage_url'],
1451 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1452 'extractor_key': ie_result['extractor_key'],
1453 })
1454 return r
1455 ie_result['entries'] = [
1456 self.process_ie_result(_fixup(r), download, extra_info)
1457 for r in ie_result['entries']
1458 ]
1459 return ie_result
1460 else:
1461 raise Exception('Invalid result type: %s' % result_type)
1462
1463 def _ensure_dir_exists(self, path):
1464 return make_dir(path, self.report_error)
1465
1466 def __process_playlist(self, ie_result, download):
1467 # We process each entry in the playlist
1468 playlist = ie_result.get('title') or ie_result.get('id')
1469 self.to_screen('[download] Downloading playlist: %s' % playlist)
1470
1471 if 'entries' not in ie_result:
1472 raise EntryNotInPlaylist()
1473 incomplete_entries = bool(ie_result.get('requested_entries'))
1474 if incomplete_entries:
1475 def fill_missing_entries(entries, indexes):
1476 ret = [None] * max(*indexes)
1477 for i, entry in zip(indexes, entries):
1478 ret[i - 1] = entry
1479 return ret
1480 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1481
1482 playlist_results = []
1483
1484 playliststart = self.params.get('playliststart', 1)
1485 playlistend = self.params.get('playlistend')
1486 # For backwards compatibility, interpret -1 as whole list
1487 if playlistend == -1:
1488 playlistend = None
1489
1490 playlistitems_str = self.params.get('playlist_items')
1491 playlistitems = None
1492 if playlistitems_str is not None:
1493 def iter_playlistitems(format):
1494 for string_segment in format.split(','):
1495 if '-' in string_segment:
1496 start, end = string_segment.split('-')
1497 for item in range(int(start), int(end) + 1):
1498 yield int(item)
1499 else:
1500 yield int(string_segment)
1501 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1502
1503 ie_entries = ie_result['entries']
1504 msg = (
1505 'Downloading %d videos' if not isinstance(ie_entries, list)
1506 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1507
1508 if isinstance(ie_entries, list):
1509 def get_entry(i):
1510 return ie_entries[i - 1]
1511 else:
1512 if not isinstance(ie_entries, PagedList):
1513 ie_entries = LazyList(ie_entries)
1514
1515 def get_entry(i):
1516 return YoutubeDL.__handle_extraction_exceptions(
1517 lambda self, i: ie_entries[i - 1]
1518 )(self, i)
1519
1520 entries = []
1521 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1522 for i in items:
1523 if i == 0:
1524 continue
1525 if playlistitems is None and playlistend is not None and playlistend < i:
1526 break
1527 entry = None
1528 try:
1529 entry = get_entry(i)
1530 if entry is None:
1531 raise EntryNotInPlaylist()
1532 except (IndexError, EntryNotInPlaylist):
1533 if incomplete_entries:
1534 raise EntryNotInPlaylist()
1535 elif not playlistitems:
1536 break
1537 entries.append(entry)
1538 try:
1539 if entry is not None:
1540 self._match_entry(entry, incomplete=True, silent=True)
1541 except (ExistingVideoReached, RejectedVideoReached):
1542 break
1543 ie_result['entries'] = entries
1544
1545 # Save playlist_index before re-ordering
1546 entries = [
1547 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1548 for i, entry in enumerate(entries, 1)
1549 if entry is not None]
1550 n_entries = len(entries)
1551
1552 if not playlistitems and (playliststart or playlistend):
1553 playlistitems = list(range(playliststart, playliststart + n_entries))
1554 ie_result['requested_entries'] = playlistitems
1555
1556 if self.params.get('allow_playlist_files', True):
1557 ie_copy = {
1558 'playlist': playlist,
1559 'playlist_id': ie_result.get('id'),
1560 'playlist_title': ie_result.get('title'),
1561 'playlist_uploader': ie_result.get('uploader'),
1562 'playlist_uploader_id': ie_result.get('uploader_id'),
1563 'playlist_index': 0,
1564 }
1565 ie_copy.update(dict(ie_result))
1566
1567 if self._write_info_json('playlist', ie_result,
1568 self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1569 return
1570 if self._write_description('playlist', ie_result,
1571 self.prepare_filename(ie_copy, 'pl_description')) is None:
1572 return
1573 # TODO: This should be passed to ThumbnailsConvertor if necessary
1574 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1575
1576 if self.params.get('playlistreverse', False):
1577 entries = entries[::-1]
1578 if self.params.get('playlistrandom', False):
1579 random.shuffle(entries)
1580
1581 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1582
1583 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1584 failures = 0
1585 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1586 for i, entry_tuple in enumerate(entries, 1):
1587 playlist_index, entry = entry_tuple
1588 if 'playlist-index' in self.params.get('compat_opts', []):
1589 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1590 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1591 # This __x_forwarded_for_ip thing is a bit ugly but requires
1592 # minimal changes
1593 if x_forwarded_for:
1594 entry['__x_forwarded_for_ip'] = x_forwarded_for
1595 extra = {
1596 'n_entries': n_entries,
1597 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1598 'playlist_index': playlist_index,
1599 'playlist_autonumber': i,
1600 'playlist': playlist,
1601 'playlist_id': ie_result.get('id'),
1602 'playlist_title': ie_result.get('title'),
1603 'playlist_uploader': ie_result.get('uploader'),
1604 'playlist_uploader_id': ie_result.get('uploader_id'),
1605 'extractor': ie_result['extractor'],
1606 'webpage_url': ie_result['webpage_url'],
1607 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1608 'extractor_key': ie_result['extractor_key'],
1609 }
1610
1611 if self._match_entry(entry, incomplete=True) is not None:
1612 continue
1613
1614 entry_result = self.__process_iterable_entry(entry, download, extra)
1615 if not entry_result:
1616 failures += 1
1617 if failures >= max_failures:
1618 self.report_error(
1619 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1620 break
1621 # TODO: skip failed (empty) entries?
1622 playlist_results.append(entry_result)
1623 ie_result['entries'] = playlist_results
1624 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1625 return ie_result
1626
1627 @__handle_extraction_exceptions
1628 def __process_iterable_entry(self, entry, download, extra_info):
1629 return self.process_ie_result(
1630 entry, download=download, extra_info=extra_info)
1631
1632 def _build_format_filter(self, filter_spec):
1633 " Returns a function to filter the formats according to the filter_spec "
1634
1635 OPERATORS = {
1636 '<': operator.lt,
1637 '<=': operator.le,
1638 '>': operator.gt,
1639 '>=': operator.ge,
1640 '=': operator.eq,
1641 '!=': operator.ne,
1642 }
1643 operator_rex = re.compile(r'''(?x)\s*
1644 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1645 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1646 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1647 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1648 m = operator_rex.fullmatch(filter_spec)
1649 if m:
1650 try:
1651 comparison_value = int(m.group('value'))
1652 except ValueError:
1653 comparison_value = parse_filesize(m.group('value'))
1654 if comparison_value is None:
1655 comparison_value = parse_filesize(m.group('value') + 'B')
1656 if comparison_value is None:
1657 raise ValueError(
1658 'Invalid value %r in format specification %r' % (
1659 m.group('value'), filter_spec))
1660 op = OPERATORS[m.group('op')]
1661
1662 if not m:
1663 STR_OPERATORS = {
1664 '=': operator.eq,
1665 '^=': lambda attr, value: attr.startswith(value),
1666 '$=': lambda attr, value: attr.endswith(value),
1667 '*=': lambda attr, value: value in attr,
1668 }
1669 str_operator_rex = re.compile(r'''(?x)\s*
1670 (?P<key>[a-zA-Z0-9._-]+)\s*
1671 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1672 (?P<value>[a-zA-Z0-9._-]+)\s*
1673 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1674 m = str_operator_rex.fullmatch(filter_spec)
1675 if m:
1676 comparison_value = m.group('value')
1677 str_op = STR_OPERATORS[m.group('op')]
1678 if m.group('negation'):
1679 op = lambda attr, value: not str_op(attr, value)
1680 else:
1681 op = str_op
1682
1683 if not m:
1684 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1685
1686 def _filter(f):
1687 actual_value = f.get(m.group('key'))
1688 if actual_value is None:
1689 return m.group('none_inclusive')
1690 return op(actual_value, comparison_value)
1691 return _filter
1692
1693 def _default_format_spec(self, info_dict, download=True):
1694
1695 def can_merge():
1696 merger = FFmpegMergerPP(self)
1697 return merger.available and merger.can_merge()
1698
1699 prefer_best = (
1700 not self.params.get('simulate')
1701 and download
1702 and (
1703 not can_merge()
1704 or info_dict.get('is_live', False)
1705 or self.outtmpl_dict['default'] == '-'))
1706 compat = (
1707 prefer_best
1708 or self.params.get('allow_multiple_audio_streams', False)
1709 or 'format-spec' in self.params.get('compat_opts', []))
1710
1711 return (
1712 'best/bestvideo+bestaudio' if prefer_best
1713 else 'bestvideo*+bestaudio/best' if not compat
1714 else 'bestvideo+bestaudio/best')
1715
1716 def build_format_selector(self, format_spec):
1717 def syntax_error(note, start):
1718 message = (
1719 'Invalid format specification: '
1720 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1721 return SyntaxError(message)
1722
1723 PICKFIRST = 'PICKFIRST'
1724 MERGE = 'MERGE'
1725 SINGLE = 'SINGLE'
1726 GROUP = 'GROUP'
1727 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1728
1729 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1730 'video': self.params.get('allow_multiple_video_streams', False)}
1731
1732 check_formats = self.params.get('check_formats')
1733
1734 def _parse_filter(tokens):
1735 filter_parts = []
1736 for type, string, start, _, _ in tokens:
1737 if type == tokenize.OP and string == ']':
1738 return ''.join(filter_parts)
1739 else:
1740 filter_parts.append(string)
1741
1742 def _remove_unused_ops(tokens):
1743 # Remove operators that we don't use and join them with the surrounding strings
1744 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1745 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1746 last_string, last_start, last_end, last_line = None, None, None, None
1747 for type, string, start, end, line in tokens:
1748 if type == tokenize.OP and string == '[':
1749 if last_string:
1750 yield tokenize.NAME, last_string, last_start, last_end, last_line
1751 last_string = None
1752 yield type, string, start, end, line
1753 # everything inside brackets will be handled by _parse_filter
1754 for type, string, start, end, line in tokens:
1755 yield type, string, start, end, line
1756 if type == tokenize.OP and string == ']':
1757 break
1758 elif type == tokenize.OP and string in ALLOWED_OPS:
1759 if last_string:
1760 yield tokenize.NAME, last_string, last_start, last_end, last_line
1761 last_string = None
1762 yield type, string, start, end, line
1763 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1764 if not last_string:
1765 last_string = string
1766 last_start = start
1767 last_end = end
1768 else:
1769 last_string += string
1770 if last_string:
1771 yield tokenize.NAME, last_string, last_start, last_end, last_line
1772
1773 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1774 selectors = []
1775 current_selector = None
1776 for type, string, start, _, _ in tokens:
1777 # ENCODING is only defined in python 3.x
1778 if type == getattr(tokenize, 'ENCODING', None):
1779 continue
1780 elif type in [tokenize.NAME, tokenize.NUMBER]:
1781 current_selector = FormatSelector(SINGLE, string, [])
1782 elif type == tokenize.OP:
1783 if string == ')':
1784 if not inside_group:
1785 # ')' will be handled by the parentheses group
1786 tokens.restore_last_token()
1787 break
1788 elif inside_merge and string in ['/', ',']:
1789 tokens.restore_last_token()
1790 break
1791 elif inside_choice and string == ',':
1792 tokens.restore_last_token()
1793 break
1794 elif string == ',':
1795 if not current_selector:
1796 raise syntax_error('"," must follow a format selector', start)
1797 selectors.append(current_selector)
1798 current_selector = None
1799 elif string == '/':
1800 if not current_selector:
1801 raise syntax_error('"/" must follow a format selector', start)
1802 first_choice = current_selector
1803 second_choice = _parse_format_selection(tokens, inside_choice=True)
1804 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1805 elif string == '[':
1806 if not current_selector:
1807 current_selector = FormatSelector(SINGLE, 'best', [])
1808 format_filter = _parse_filter(tokens)
1809 current_selector.filters.append(format_filter)
1810 elif string == '(':
1811 if current_selector:
1812 raise syntax_error('Unexpected "("', start)
1813 group = _parse_format_selection(tokens, inside_group=True)
1814 current_selector = FormatSelector(GROUP, group, [])
1815 elif string == '+':
1816 if not current_selector:
1817 raise syntax_error('Unexpected "+"', start)
1818 selector_1 = current_selector
1819 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1820 if not selector_2:
1821 raise syntax_error('Expected a selector', start)
1822 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1823 else:
1824 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1825 elif type == tokenize.ENDMARKER:
1826 break
1827 if current_selector:
1828 selectors.append(current_selector)
1829 return selectors
1830
1831 def _merge(formats_pair):
1832 format_1, format_2 = formats_pair
1833
1834 formats_info = []
1835 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1836 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1837
1838 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1839 get_no_more = {'video': False, 'audio': False}
1840 for (i, fmt_info) in enumerate(formats_info):
1841 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1842 formats_info.pop(i)
1843 continue
1844 for aud_vid in ['audio', 'video']:
1845 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1846 if get_no_more[aud_vid]:
1847 formats_info.pop(i)
1848 break
1849 get_no_more[aud_vid] = True
1850
1851 if len(formats_info) == 1:
1852 return formats_info[0]
1853
1854 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1855 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1856
1857 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1858 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1859
1860 output_ext = self.params.get('merge_output_format')
1861 if not output_ext:
1862 if the_only_video:
1863 output_ext = the_only_video['ext']
1864 elif the_only_audio and not video_fmts:
1865 output_ext = the_only_audio['ext']
1866 else:
1867 output_ext = 'mkv'
1868
1869 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1870
1871 new_dict = {
1872 'requested_formats': formats_info,
1873 'format': '+'.join(filtered('format')),
1874 'format_id': '+'.join(filtered('format_id')),
1875 'ext': output_ext,
1876 'protocol': '+'.join(map(determine_protocol, formats_info)),
1877 'language': '+'.join(orderedSet(filtered('language'))),
1878 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1879 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1880 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1881 }
1882
1883 if the_only_video:
1884 new_dict.update({
1885 'width': the_only_video.get('width'),
1886 'height': the_only_video.get('height'),
1887 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1888 'fps': the_only_video.get('fps'),
1889 'vcodec': the_only_video.get('vcodec'),
1890 'vbr': the_only_video.get('vbr'),
1891 'stretched_ratio': the_only_video.get('stretched_ratio'),
1892 })
1893
1894 if the_only_audio:
1895 new_dict.update({
1896 'acodec': the_only_audio.get('acodec'),
1897 'abr': the_only_audio.get('abr'),
1898 'asr': the_only_audio.get('asr'),
1899 })
1900
1901 return new_dict
1902
1903 def _check_formats(formats):
1904 if not check_formats:
1905 yield from formats
1906 return
1907 for f in formats:
1908 self.to_screen('[info] Testing format %s' % f['format_id'])
1909 temp_file = tempfile.NamedTemporaryFile(
1910 suffix='.tmp', delete=False,
1911 dir=self.get_output_path('temp') or None)
1912 temp_file.close()
1913 try:
1914 success, _ = self.dl(temp_file.name, f, test=True)
1915 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1916 success = False
1917 finally:
1918 if os.path.exists(temp_file.name):
1919 try:
1920 os.remove(temp_file.name)
1921 except OSError:
1922 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1923 if success:
1924 yield f
1925 else:
1926 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1927
1928 def _build_selector_function(selector):
1929 if isinstance(selector, list): # ,
1930 fs = [_build_selector_function(s) for s in selector]
1931
1932 def selector_function(ctx):
1933 for f in fs:
1934 yield from f(ctx)
1935 return selector_function
1936
1937 elif selector.type == GROUP: # ()
1938 selector_function = _build_selector_function(selector.selector)
1939
1940 elif selector.type == PICKFIRST: # /
1941 fs = [_build_selector_function(s) for s in selector.selector]
1942
1943 def selector_function(ctx):
1944 for f in fs:
1945 picked_formats = list(f(ctx))
1946 if picked_formats:
1947 return picked_formats
1948 return []
1949
1950 elif selector.type == MERGE: # +
1951 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1952
1953 def selector_function(ctx):
1954 for pair in itertools.product(
1955 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1956 yield _merge(pair)
1957
1958 elif selector.type == SINGLE: # atom
1959 format_spec = selector.selector or 'best'
1960
1961 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1962 if format_spec == 'all':
1963 def selector_function(ctx):
1964 yield from _check_formats(ctx['formats'])
1965 elif format_spec == 'mergeall':
1966 def selector_function(ctx):
1967 formats = list(_check_formats(ctx['formats']))
1968 if not formats:
1969 return
1970 merged_format = formats[-1]
1971 for f in formats[-2::-1]:
1972 merged_format = _merge((merged_format, f))
1973 yield merged_format
1974
1975 else:
1976 format_fallback, format_reverse, format_idx = False, True, 1
1977 mobj = re.match(
1978 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1979 format_spec)
1980 if mobj is not None:
1981 format_idx = int_or_none(mobj.group('n'), default=1)
1982 format_reverse = mobj.group('bw')[0] == 'b'
1983 format_type = (mobj.group('type') or [None])[0]
1984 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1985 format_modified = mobj.group('mod') is not None
1986
1987 format_fallback = not format_type and not format_modified # for b, w
1988 _filter_f = (
1989 (lambda f: f.get('%scodec' % format_type) != 'none')
1990 if format_type and format_modified # bv*, ba*, wv*, wa*
1991 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1992 if format_type # bv, ba, wv, wa
1993 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1994 if not format_modified # b, w
1995 else lambda f: True) # b*, w*
1996 filter_f = lambda f: _filter_f(f) and (
1997 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1998 else:
1999 if format_spec in self._format_selection_exts['audio']:
2000 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2001 elif format_spec in self._format_selection_exts['video']:
2002 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2003 elif format_spec in self._format_selection_exts['storyboards']:
2004 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2005 else:
2006 filter_f = lambda f: f.get('format_id') == format_spec # id
2007
2008 def selector_function(ctx):
2009 formats = list(ctx['formats'])
2010 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2011 if format_fallback and ctx['incomplete_formats'] and not matches:
2012 # for extractors with incomplete formats (audio only (soundcloud)
2013 # or video only (imgur)) best/worst will fallback to
2014 # best/worst {video,audio}-only format
2015 matches = formats
2016 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2017 try:
2018 yield matches[format_idx - 1]
2019 except IndexError:
2020 return
2021
2022 filters = [self._build_format_filter(f) for f in selector.filters]
2023
2024 def final_selector(ctx):
2025 ctx_copy = copy.deepcopy(ctx)
2026 for _filter in filters:
2027 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2028 return selector_function(ctx_copy)
2029 return final_selector
2030
2031 stream = io.BytesIO(format_spec.encode('utf-8'))
2032 try:
2033 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2034 except tokenize.TokenError:
2035 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2036
2037 class TokenIterator(object):
2038 def __init__(self, tokens):
2039 self.tokens = tokens
2040 self.counter = 0
2041
2042 def __iter__(self):
2043 return self
2044
2045 def __next__(self):
2046 if self.counter >= len(self.tokens):
2047 raise StopIteration()
2048 value = self.tokens[self.counter]
2049 self.counter += 1
2050 return value
2051
2052 next = __next__
2053
2054 def restore_last_token(self):
2055 self.counter -= 1
2056
2057 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2058 return _build_selector_function(parsed_selector)
2059
2060 def _calc_headers(self, info_dict):
2061 res = std_headers.copy()
2062
2063 add_headers = info_dict.get('http_headers')
2064 if add_headers:
2065 res.update(add_headers)
2066
2067 cookies = self._calc_cookies(info_dict)
2068 if cookies:
2069 res['Cookie'] = cookies
2070
2071 if 'X-Forwarded-For' not in res:
2072 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2073 if x_forwarded_for_ip:
2074 res['X-Forwarded-For'] = x_forwarded_for_ip
2075
2076 return res
2077
2078 def _calc_cookies(self, info_dict):
2079 pr = sanitized_Request(info_dict['url'])
2080 self.cookiejar.add_cookie_header(pr)
2081 return pr.get_header('Cookie')
2082
2083 def _sanitize_thumbnails(self, info_dict):
2084 thumbnails = info_dict.get('thumbnails')
2085 if thumbnails is None:
2086 thumbnail = info_dict.get('thumbnail')
2087 if thumbnail:
2088 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2089 if thumbnails:
2090 thumbnails.sort(key=lambda t: (
2091 t.get('preference') if t.get('preference') is not None else -1,
2092 t.get('width') if t.get('width') is not None else -1,
2093 t.get('height') if t.get('height') is not None else -1,
2094 t.get('id') if t.get('id') is not None else '',
2095 t.get('url')))
2096
2097 def thumbnail_tester():
2098 if self.params.get('check_formats'):
2099 test_all = True
2100 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
2101 else:
2102 test_all = False
2103 to_screen = self.write_debug
2104
2105 def test_thumbnail(t):
2106 if not test_all and not t.get('_test_url'):
2107 return True
2108 to_screen('Testing thumbnail %s' % t['id'])
2109 try:
2110 self.urlopen(HEADRequest(t['url']))
2111 except network_exceptions as err:
2112 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
2113 t['id'], t['url'], error_to_compat_str(err)))
2114 return False
2115 return True
2116
2117 return test_thumbnail
2118
2119 for i, t in enumerate(thumbnails):
2120 if t.get('id') is None:
2121 t['id'] = '%d' % i
2122 if t.get('width') and t.get('height'):
2123 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2124 t['url'] = sanitize_url(t['url'])
2125
2126 if self.params.get('check_formats') is not False:
2127 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2128 else:
2129 info_dict['thumbnails'] = thumbnails
2130
2131 def process_video_result(self, info_dict, download=True):
2132 assert info_dict.get('_type', 'video') == 'video'
2133
2134 if 'id' not in info_dict:
2135 raise ExtractorError('Missing "id" field in extractor result')
2136 if 'title' not in info_dict:
2137 raise ExtractorError('Missing "title" field in extractor result',
2138 video_id=info_dict['id'], ie=info_dict['extractor'])
2139
2140 def report_force_conversion(field, field_not, conversion):
2141 self.report_warning(
2142 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2143 % (field, field_not, conversion))
2144
2145 def sanitize_string_field(info, string_field):
2146 field = info.get(string_field)
2147 if field is None or isinstance(field, compat_str):
2148 return
2149 report_force_conversion(string_field, 'a string', 'string')
2150 info[string_field] = compat_str(field)
2151
2152 def sanitize_numeric_fields(info):
2153 for numeric_field in self._NUMERIC_FIELDS:
2154 field = info.get(numeric_field)
2155 if field is None or isinstance(field, compat_numeric_types):
2156 continue
2157 report_force_conversion(numeric_field, 'numeric', 'int')
2158 info[numeric_field] = int_or_none(field)
2159
2160 sanitize_string_field(info_dict, 'id')
2161 sanitize_numeric_fields(info_dict)
2162
2163 if 'playlist' not in info_dict:
2164 # It isn't part of a playlist
2165 info_dict['playlist'] = None
2166 info_dict['playlist_index'] = None
2167
2168 self._sanitize_thumbnails(info_dict)
2169
2170 thumbnail = info_dict.get('thumbnail')
2171 thumbnails = info_dict.get('thumbnails')
2172 if thumbnail:
2173 info_dict['thumbnail'] = sanitize_url(thumbnail)
2174 elif thumbnails:
2175 info_dict['thumbnail'] = thumbnails[-1]['url']
2176
2177 if info_dict.get('display_id') is None and 'id' in info_dict:
2178 info_dict['display_id'] = info_dict['id']
2179
2180 if info_dict.get('duration') is not None:
2181 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2182
2183 for ts_key, date_key in (
2184 ('timestamp', 'upload_date'),
2185 ('release_timestamp', 'release_date'),
2186 ):
2187 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2188 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2189 # see http://bugs.python.org/issue1646728)
2190 try:
2191 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2192 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2193 except (ValueError, OverflowError, OSError):
2194 pass
2195
2196 live_keys = ('is_live', 'was_live')
2197 live_status = info_dict.get('live_status')
2198 if live_status is None:
2199 for key in live_keys:
2200 if info_dict.get(key) is False:
2201 continue
2202 if info_dict.get(key):
2203 live_status = key
2204 break
2205 if all(info_dict.get(key) is False for key in live_keys):
2206 live_status = 'not_live'
2207 if live_status:
2208 info_dict['live_status'] = live_status
2209 for key in live_keys:
2210 if info_dict.get(key) is None:
2211 info_dict[key] = (live_status == key)
2212
2213 # Auto generate title fields corresponding to the *_number fields when missing
2214 # in order to always have clean titles. This is very common for TV series.
2215 for field in ('chapter', 'season', 'episode'):
2216 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2217 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2218
2219 for cc_kind in ('subtitles', 'automatic_captions'):
2220 cc = info_dict.get(cc_kind)
2221 if cc:
2222 for _, subtitle in cc.items():
2223 for subtitle_format in subtitle:
2224 if subtitle_format.get('url'):
2225 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2226 if subtitle_format.get('ext') is None:
2227 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2228
2229 automatic_captions = info_dict.get('automatic_captions')
2230 subtitles = info_dict.get('subtitles')
2231
2232 info_dict['requested_subtitles'] = self.process_subtitles(
2233 info_dict['id'], subtitles, automatic_captions)
2234
2235 # We now pick which formats have to be downloaded
2236 if info_dict.get('formats') is None:
2237 # There's only one format available
2238 formats = [info_dict]
2239 else:
2240 formats = info_dict['formats']
2241
2242 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2243 if not self.params.get('allow_unplayable_formats'):
2244 formats = [f for f in formats if not f.get('has_drm')]
2245
2246 if not formats:
2247 self.raise_no_formats(info_dict)
2248
2249 def is_wellformed(f):
2250 url = f.get('url')
2251 if not url:
2252 self.report_warning(
2253 '"url" field is missing or empty - skipping format, '
2254 'there is an error in extractor')
2255 return False
2256 if isinstance(url, bytes):
2257 sanitize_string_field(f, 'url')
2258 return True
2259
2260 # Filter out malformed formats for better extraction robustness
2261 formats = list(filter(is_wellformed, formats))
2262
2263 formats_dict = {}
2264
2265 # We check that all the formats have the format and format_id fields
2266 for i, format in enumerate(formats):
2267 sanitize_string_field(format, 'format_id')
2268 sanitize_numeric_fields(format)
2269 format['url'] = sanitize_url(format['url'])
2270 if not format.get('format_id'):
2271 format['format_id'] = compat_str(i)
2272 else:
2273 # Sanitize format_id from characters used in format selector expression
2274 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2275 format_id = format['format_id']
2276 if format_id not in formats_dict:
2277 formats_dict[format_id] = []
2278 formats_dict[format_id].append(format)
2279
2280 # Make sure all formats have unique format_id
2281 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2282 for format_id, ambiguous_formats in formats_dict.items():
2283 ambigious_id = len(ambiguous_formats) > 1
2284 for i, format in enumerate(ambiguous_formats):
2285 if ambigious_id:
2286 format['format_id'] = '%s-%d' % (format_id, i)
2287 if format.get('ext') is None:
2288 format['ext'] = determine_ext(format['url']).lower()
2289 # Ensure there is no conflict between id and ext in format selection
2290 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2291 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2292 format['format_id'] = 'f%s' % format['format_id']
2293
2294 for i, format in enumerate(formats):
2295 if format.get('format') is None:
2296 format['format'] = '{id} - {res}{note}'.format(
2297 id=format['format_id'],
2298 res=self.format_resolution(format),
2299 note=format_field(format, 'format_note', ' (%s)'),
2300 )
2301 if format.get('protocol') is None:
2302 format['protocol'] = determine_protocol(format)
2303 if format.get('resolution') is None:
2304 format['resolution'] = self.format_resolution(format, default=None)
2305 # Add HTTP headers, so that external programs can use them from the
2306 # json output
2307 full_format_info = info_dict.copy()
2308 full_format_info.update(format)
2309 format['http_headers'] = self._calc_headers(full_format_info)
2310 # Remove private housekeeping stuff
2311 if '__x_forwarded_for_ip' in info_dict:
2312 del info_dict['__x_forwarded_for_ip']
2313
2314 # TODO Central sorting goes here
2315
2316 if not formats or formats[0] is not info_dict:
2317 # only set the 'formats' fields if the original info_dict list them
2318 # otherwise we end up with a circular reference, the first (and unique)
2319 # element in the 'formats' field in info_dict is info_dict itself,
2320 # which can't be exported to json
2321 info_dict['formats'] = formats
2322
2323 info_dict, _ = self.pre_process(info_dict)
2324
2325 if self.params.get('list_thumbnails'):
2326 self.list_thumbnails(info_dict)
2327 if self.params.get('listformats'):
2328 if not info_dict.get('formats') and not info_dict.get('url'):
2329 self.to_screen('%s has no formats' % info_dict['id'])
2330 else:
2331 self.list_formats(info_dict)
2332 if self.params.get('listsubtitles'):
2333 if 'automatic_captions' in info_dict:
2334 self.list_subtitles(
2335 info_dict['id'], automatic_captions, 'automatic captions')
2336 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2337 list_only = self.params.get('simulate') is None and (
2338 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2339 if list_only:
2340 # Without this printing, -F --print-json will not work
2341 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2342 return
2343
2344 format_selector = self.format_selector
2345 if format_selector is None:
2346 req_format = self._default_format_spec(info_dict, download=download)
2347 self.write_debug('Default format spec: %s' % req_format)
2348 format_selector = self.build_format_selector(req_format)
2349
2350 # While in format selection we may need to have an access to the original
2351 # format set in order to calculate some metrics or do some processing.
2352 # For now we need to be able to guess whether original formats provided
2353 # by extractor are incomplete or not (i.e. whether extractor provides only
2354 # video-only or audio-only formats) for proper formats selection for
2355 # extractors with such incomplete formats (see
2356 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2357 # Since formats may be filtered during format selection and may not match
2358 # the original formats the results may be incorrect. Thus original formats
2359 # or pre-calculated metrics should be passed to format selection routines
2360 # as well.
2361 # We will pass a context object containing all necessary additional data
2362 # instead of just formats.
2363 # This fixes incorrect format selection issue (see
2364 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2365 incomplete_formats = (
2366 # All formats are video-only or
2367 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2368 # all formats are audio-only
2369 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2370
2371 ctx = {
2372 'formats': formats,
2373 'incomplete_formats': incomplete_formats,
2374 }
2375
2376 formats_to_download = list(format_selector(ctx))
2377 if not formats_to_download:
2378 if not self.params.get('ignore_no_formats_error'):
2379 raise ExtractorError('Requested format is not available', expected=True,
2380 video_id=info_dict['id'], ie=info_dict['extractor'])
2381 else:
2382 self.report_warning('Requested format is not available')
2383 # Process what we can, even without any available formats.
2384 self.process_info(dict(info_dict))
2385 elif download:
2386 self.to_screen(
2387 '[info] %s: Downloading %d format(s): %s' % (
2388 info_dict['id'], len(formats_to_download),
2389 ", ".join([f['format_id'] for f in formats_to_download])))
2390 for fmt in formats_to_download:
2391 new_info = dict(info_dict)
2392 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2393 new_info['__original_infodict'] = info_dict
2394 new_info.update(fmt)
2395 self.process_info(new_info)
2396 # We update the info dict with the best quality format (backwards compatibility)
2397 if formats_to_download:
2398 info_dict.update(formats_to_download[-1])
2399 return info_dict
2400
2401 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2402 """Select the requested subtitles and their format"""
2403 available_subs = {}
2404 if normal_subtitles and self.params.get('writesubtitles'):
2405 available_subs.update(normal_subtitles)
2406 if automatic_captions and self.params.get('writeautomaticsub'):
2407 for lang, cap_info in automatic_captions.items():
2408 if lang not in available_subs:
2409 available_subs[lang] = cap_info
2410
2411 if (not self.params.get('writesubtitles') and not
2412 self.params.get('writeautomaticsub') or not
2413 available_subs):
2414 return None
2415
2416 all_sub_langs = available_subs.keys()
2417 if self.params.get('allsubtitles', False):
2418 requested_langs = all_sub_langs
2419 elif self.params.get('subtitleslangs', False):
2420 # A list is used so that the order of languages will be the same as
2421 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2422 requested_langs = []
2423 for lang_re in self.params.get('subtitleslangs'):
2424 if lang_re == 'all':
2425 requested_langs.extend(all_sub_langs)
2426 continue
2427 discard = lang_re[0] == '-'
2428 if discard:
2429 lang_re = lang_re[1:]
2430 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2431 if discard:
2432 for lang in current_langs:
2433 while lang in requested_langs:
2434 requested_langs.remove(lang)
2435 else:
2436 requested_langs.extend(current_langs)
2437 requested_langs = orderedSet(requested_langs)
2438 elif 'en' in available_subs:
2439 requested_langs = ['en']
2440 else:
2441 requested_langs = [list(all_sub_langs)[0]]
2442 if requested_langs:
2443 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2444
2445 formats_query = self.params.get('subtitlesformat', 'best')
2446 formats_preference = formats_query.split('/') if formats_query else []
2447 subs = {}
2448 for lang in requested_langs:
2449 formats = available_subs.get(lang)
2450 if formats is None:
2451 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2452 continue
2453 for ext in formats_preference:
2454 if ext == 'best':
2455 f = formats[-1]
2456 break
2457 matches = list(filter(lambda f: f['ext'] == ext, formats))
2458 if matches:
2459 f = matches[-1]
2460 break
2461 else:
2462 f = formats[-1]
2463 self.report_warning(
2464 'No subtitle format found matching "%s" for language %s, '
2465 'using %s' % (formats_query, lang, f['ext']))
2466 subs[lang] = f
2467 return subs
2468
2469 def __forced_printings(self, info_dict, filename, incomplete):
2470 def print_mandatory(field, actual_field=None):
2471 if actual_field is None:
2472 actual_field = field
2473 if (self.params.get('force%s' % field, False)
2474 and (not incomplete or info_dict.get(actual_field) is not None)):
2475 self.to_stdout(info_dict[actual_field])
2476
2477 def print_optional(field):
2478 if (self.params.get('force%s' % field, False)
2479 and info_dict.get(field) is not None):
2480 self.to_stdout(info_dict[field])
2481
2482 info_dict = info_dict.copy()
2483 if filename is not None:
2484 info_dict['filename'] = filename
2485 if info_dict.get('requested_formats') is not None:
2486 # For RTMP URLs, also include the playpath
2487 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2488 elif 'url' in info_dict:
2489 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2490
2491 if self.params.get('forceprint') or self.params.get('forcejson'):
2492 self.post_extract(info_dict)
2493 for tmpl in self.params.get('forceprint', []):
2494 mobj = re.match(r'\w+(=?)$', tmpl)
2495 if mobj and mobj.group(1):
2496 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2497 elif mobj:
2498 tmpl = '%({})s'.format(tmpl)
2499 self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2500
2501 print_mandatory('title')
2502 print_mandatory('id')
2503 print_mandatory('url', 'urls')
2504 print_optional('thumbnail')
2505 print_optional('description')
2506 print_optional('filename')
2507 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2508 self.to_stdout(formatSeconds(info_dict['duration']))
2509 print_mandatory('format')
2510
2511 if self.params.get('forcejson'):
2512 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2513
2514 def dl(self, name, info, subtitle=False, test=False):
2515 if not info.get('url'):
2516 self.raise_no_formats(info, True)
2517
2518 if test:
2519 verbose = self.params.get('verbose')
2520 params = {
2521 'test': True,
2522 'quiet': self.params.get('quiet') or not verbose,
2523 'verbose': verbose,
2524 'noprogress': not verbose,
2525 'nopart': True,
2526 'skip_unavailable_fragments': False,
2527 'keep_fragments': False,
2528 'overwrites': True,
2529 '_no_ytdl_file': True,
2530 }
2531 else:
2532 params = self.params
2533 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2534 if not test:
2535 for ph in self._progress_hooks:
2536 fd.add_progress_hook(ph)
2537 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2538 self.write_debug('Invoking downloader on "%s"' % urls)
2539
2540 new_info = copy.deepcopy(self._copy_infodict(info))
2541 if new_info.get('http_headers') is None:
2542 new_info['http_headers'] = self._calc_headers(new_info)
2543 return fd.download(name, new_info, subtitle)
2544
2545 def process_info(self, info_dict):
2546 """Process a single resolved IE result."""
2547
2548 assert info_dict.get('_type', 'video') == 'video'
2549
2550 max_downloads = self.params.get('max_downloads')
2551 if max_downloads is not None:
2552 if self._num_downloads >= int(max_downloads):
2553 raise MaxDownloadsReached()
2554
2555 # TODO: backward compatibility, to be removed
2556 info_dict['fulltitle'] = info_dict['title']
2557
2558 if 'format' not in info_dict and 'ext' in info_dict:
2559 info_dict['format'] = info_dict['ext']
2560
2561 if self._match_entry(info_dict) is not None:
2562 return
2563
2564 self.post_extract(info_dict)
2565 self._num_downloads += 1
2566
2567 # info_dict['_filename'] needs to be set for backward compatibility
2568 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2569 temp_filename = self.prepare_filename(info_dict, 'temp')
2570 files_to_move = {}
2571
2572 # Forced printings
2573 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2574
2575 if self.params.get('simulate'):
2576 if self.params.get('force_write_download_archive', False):
2577 self.record_download_archive(info_dict)
2578 # Do nothing else if in simulate mode
2579 return
2580
2581 if full_filename is None:
2582 return
2583 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2584 return
2585 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2586 return
2587
2588 if self._write_description('video', info_dict,
2589 self.prepare_filename(info_dict, 'description')) is None:
2590 return
2591
2592 sub_files = self._write_subtitles(info_dict, temp_filename)
2593 if sub_files is None:
2594 return
2595 files_to_move.update(dict(sub_files))
2596
2597 thumb_files = self._write_thumbnails(
2598 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2599 if thumb_files is None:
2600 return
2601 files_to_move.update(dict(thumb_files))
2602
2603 infofn = self.prepare_filename(info_dict, 'infojson')
2604 _infojson_written = self._write_info_json('video', info_dict, infofn)
2605 if _infojson_written:
2606 info_dict['__infojson_filename'] = infofn
2607 elif _infojson_written is None:
2608 return
2609
2610 # Note: Annotations are deprecated
2611 annofn = None
2612 if self.params.get('writeannotations', False):
2613 annofn = self.prepare_filename(info_dict, 'annotation')
2614 if annofn:
2615 if not self._ensure_dir_exists(encodeFilename(annofn)):
2616 return
2617 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2618 self.to_screen('[info] Video annotations are already present')
2619 elif not info_dict.get('annotations'):
2620 self.report_warning('There are no annotations to write.')
2621 else:
2622 try:
2623 self.to_screen('[info] Writing video annotations to: ' + annofn)
2624 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2625 annofile.write(info_dict['annotations'])
2626 except (KeyError, TypeError):
2627 self.report_warning('There are no annotations to write.')
2628 except (OSError, IOError):
2629 self.report_error('Cannot write annotations file: ' + annofn)
2630 return
2631
2632 # Write internet shortcut files
2633 url_link = webloc_link = desktop_link = False
2634 if self.params.get('writelink', False):
2635 if sys.platform == "darwin": # macOS.
2636 webloc_link = True
2637 elif sys.platform.startswith("linux"):
2638 desktop_link = True
2639 else: # if sys.platform in ['win32', 'cygwin']:
2640 url_link = True
2641 if self.params.get('writeurllink', False):
2642 url_link = True
2643 if self.params.get('writewebloclink', False):
2644 webloc_link = True
2645 if self.params.get('writedesktoplink', False):
2646 desktop_link = True
2647
2648 if url_link or webloc_link or desktop_link:
2649 if 'webpage_url' not in info_dict:
2650 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2651 return
2652 ascii_url = iri_to_uri(info_dict['webpage_url'])
2653
2654 def _write_link_file(extension, template, newline, embed_filename):
2655 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2656 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2657 self.to_screen('[info] Internet shortcut is already present')
2658 else:
2659 try:
2660 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2661 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2662 template_vars = {'url': ascii_url}
2663 if embed_filename:
2664 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2665 linkfile.write(template % template_vars)
2666 except (OSError, IOError):
2667 self.report_error('Cannot write internet shortcut ' + linkfn)
2668 return False
2669 return True
2670
2671 if url_link:
2672 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2673 return
2674 if webloc_link:
2675 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2676 return
2677 if desktop_link:
2678 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2679 return
2680
2681 try:
2682 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2683 except PostProcessingError as err:
2684 self.report_error('Preprocessing: %s' % str(err))
2685 return
2686
2687 must_record_download_archive = False
2688 if self.params.get('skip_download', False):
2689 info_dict['filepath'] = temp_filename
2690 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2691 info_dict['__files_to_move'] = files_to_move
2692 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2693 else:
2694 # Download
2695 info_dict.setdefault('__postprocessors', [])
2696 try:
2697
2698 def existing_file(*filepaths):
2699 ext = info_dict.get('ext')
2700 final_ext = self.params.get('final_ext', ext)
2701 existing_files = []
2702 for file in orderedSet(filepaths):
2703 if final_ext != ext:
2704 converted = replace_extension(file, final_ext, ext)
2705 if os.path.exists(encodeFilename(converted)):
2706 existing_files.append(converted)
2707 if os.path.exists(encodeFilename(file)):
2708 existing_files.append(file)
2709
2710 if not existing_files or self.params.get('overwrites', False):
2711 for file in orderedSet(existing_files):
2712 self.report_file_delete(file)
2713 os.remove(encodeFilename(file))
2714 return None
2715
2716 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2717 return existing_files[0]
2718
2719 success = True
2720 if info_dict.get('requested_formats') is not None:
2721
2722 def compatible_formats(formats):
2723 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2724 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2725 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2726 if len(video_formats) > 2 or len(audio_formats) > 2:
2727 return False
2728
2729 # Check extension
2730 exts = set(format.get('ext') for format in formats)
2731 COMPATIBLE_EXTS = (
2732 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2733 set(('webm',)),
2734 )
2735 for ext_sets in COMPATIBLE_EXTS:
2736 if ext_sets.issuperset(exts):
2737 return True
2738 # TODO: Check acodec/vcodec
2739 return False
2740
2741 requested_formats = info_dict['requested_formats']
2742 old_ext = info_dict['ext']
2743 if self.params.get('merge_output_format') is None:
2744 if not compatible_formats(requested_formats):
2745 info_dict['ext'] = 'mkv'
2746 self.report_warning(
2747 'Requested formats are incompatible for merge and will be merged into mkv')
2748 if (info_dict['ext'] == 'webm'
2749 and info_dict.get('thumbnails')
2750 # check with type instead of pp_key, __name__, or isinstance
2751 # since we dont want any custom PPs to trigger this
2752 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2753 info_dict['ext'] = 'mkv'
2754 self.report_warning(
2755 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2756 new_ext = info_dict['ext']
2757
2758 def correct_ext(filename, ext=new_ext):
2759 if filename == '-':
2760 return filename
2761 filename_real_ext = os.path.splitext(filename)[1][1:]
2762 filename_wo_ext = (
2763 os.path.splitext(filename)[0]
2764 if filename_real_ext in (old_ext, new_ext)
2765 else filename)
2766 return '%s.%s' % (filename_wo_ext, ext)
2767
2768 # Ensure filename always has a correct extension for successful merge
2769 full_filename = correct_ext(full_filename)
2770 temp_filename = correct_ext(temp_filename)
2771 dl_filename = existing_file(full_filename, temp_filename)
2772 info_dict['__real_download'] = False
2773
2774 if dl_filename is not None:
2775 self.report_file_already_downloaded(dl_filename)
2776 elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2777 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2778 success, real_download = self.dl(temp_filename, info_dict)
2779 info_dict['__real_download'] = real_download
2780 else:
2781 downloaded = []
2782 merger = FFmpegMergerPP(self)
2783 if self.params.get('allow_unplayable_formats'):
2784 self.report_warning(
2785 'You have requested merging of multiple formats '
2786 'while also allowing unplayable formats to be downloaded. '
2787 'The formats won\'t be merged to prevent data corruption.')
2788 elif not merger.available:
2789 self.report_warning(
2790 'You have requested merging of multiple formats but ffmpeg is not installed. '
2791 'The formats won\'t be merged.')
2792
2793 if temp_filename == '-':
2794 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2795 else 'but the formats are incompatible for simultaneous download' if merger.available
2796 else 'but ffmpeg is not installed')
2797 self.report_warning(
2798 f'You have requested downloading multiple formats to stdout {reason}. '
2799 'The formats will be streamed one after the other')
2800 fname = temp_filename
2801 for f in requested_formats:
2802 new_info = dict(info_dict)
2803 del new_info['requested_formats']
2804 new_info.update(f)
2805 if temp_filename != '-':
2806 fname = prepend_extension(
2807 correct_ext(temp_filename, new_info['ext']),
2808 'f%s' % f['format_id'], new_info['ext'])
2809 if not self._ensure_dir_exists(fname):
2810 return
2811 f['filepath'] = fname
2812 downloaded.append(fname)
2813 partial_success, real_download = self.dl(fname, new_info)
2814 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2815 success = success and partial_success
2816 if merger.available and not self.params.get('allow_unplayable_formats'):
2817 info_dict['__postprocessors'].append(merger)
2818 info_dict['__files_to_merge'] = downloaded
2819 # Even if there were no downloads, it is being merged only now
2820 info_dict['__real_download'] = True
2821 else:
2822 for file in downloaded:
2823 files_to_move[file] = None
2824 else:
2825 # Just a single file
2826 dl_filename = existing_file(full_filename, temp_filename)
2827 if dl_filename is None or dl_filename == temp_filename:
2828 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2829 # So we should try to resume the download
2830 success, real_download = self.dl(temp_filename, info_dict)
2831 info_dict['__real_download'] = real_download
2832 else:
2833 self.report_file_already_downloaded(dl_filename)
2834
2835 dl_filename = dl_filename or temp_filename
2836 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2837
2838 except network_exceptions as err:
2839 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2840 return
2841 except (OSError, IOError) as err:
2842 raise UnavailableVideoError(err)
2843 except (ContentTooShortError, ) as err:
2844 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2845 return
2846
2847 if success and full_filename != '-':
2848
2849 def fixup():
2850 do_fixup = True
2851 fixup_policy = self.params.get('fixup')
2852 vid = info_dict['id']
2853
2854 if fixup_policy in ('ignore', 'never'):
2855 return
2856 elif fixup_policy == 'warn':
2857 do_fixup = False
2858 elif fixup_policy != 'force':
2859 assert fixup_policy in ('detect_or_warn', None)
2860 if not info_dict.get('__real_download'):
2861 do_fixup = False
2862
2863 def ffmpeg_fixup(cndn, msg, cls):
2864 if not cndn:
2865 return
2866 if not do_fixup:
2867 self.report_warning(f'{vid}: {msg}')
2868 return
2869 pp = cls(self)
2870 if pp.available:
2871 info_dict['__postprocessors'].append(pp)
2872 else:
2873 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2874
2875 stretched_ratio = info_dict.get('stretched_ratio')
2876 ffmpeg_fixup(
2877 stretched_ratio not in (1, None),
2878 f'Non-uniform pixel ratio {stretched_ratio}',
2879 FFmpegFixupStretchedPP)
2880
2881 ffmpeg_fixup(
2882 (info_dict.get('requested_formats') is None
2883 and info_dict.get('container') == 'm4a_dash'
2884 and info_dict.get('ext') == 'm4a'),
2885 'writing DASH m4a. Only some players support this container',
2886 FFmpegFixupM4aPP)
2887
2888 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2889 downloader = downloader.__name__ if downloader else None
2890 ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2891 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2892 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2893 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2894
2895 fixup()
2896 try:
2897 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2898 except PostProcessingError as err:
2899 self.report_error('Postprocessing: %s' % str(err))
2900 return
2901 try:
2902 for ph in self._post_hooks:
2903 ph(info_dict['filepath'])
2904 except Exception as err:
2905 self.report_error('post hooks: %s' % str(err))
2906 return
2907 must_record_download_archive = True
2908
2909 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2910 self.record_download_archive(info_dict)
2911 max_downloads = self.params.get('max_downloads')
2912 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2913 raise MaxDownloadsReached()
2914
2915 def download(self, url_list):
2916 """Download a given list of URLs."""
2917 outtmpl = self.outtmpl_dict['default']
2918 if (len(url_list) > 1
2919 and outtmpl != '-'
2920 and '%' not in outtmpl
2921 and self.params.get('max_downloads') != 1):
2922 raise SameFileError(outtmpl)
2923
2924 for url in url_list:
2925 try:
2926 # It also downloads the videos
2927 res = self.extract_info(
2928 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2929 except UnavailableVideoError:
2930 self.report_error('unable to download video')
2931 except MaxDownloadsReached:
2932 self.to_screen('[info] Maximum number of downloads reached')
2933 raise
2934 except ExistingVideoReached:
2935 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2936 raise
2937 except RejectedVideoReached:
2938 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2939 raise
2940 else:
2941 if self.params.get('dump_single_json', False):
2942 self.post_extract(res)
2943 self.to_stdout(json.dumps(self.sanitize_info(res)))
2944
2945 return self._download_retcode
2946
2947 def download_with_info_file(self, info_filename):
2948 with contextlib.closing(fileinput.FileInput(
2949 [info_filename], mode='r',
2950 openhook=fileinput.hook_encoded('utf-8'))) as f:
2951 # FileInput doesn't have a read method, we can't call json.load
2952 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2953 try:
2954 self.process_ie_result(info, download=True)
2955 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2956 webpage_url = info.get('webpage_url')
2957 if webpage_url is not None:
2958 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2959 return self.download([webpage_url])
2960 else:
2961 raise
2962 return self._download_retcode
2963
2964 @staticmethod
2965 def sanitize_info(info_dict, remove_private_keys=False):
2966 ''' Sanitize the infodict for converting to json '''
2967 if info_dict is None:
2968 return info_dict
2969 info_dict.setdefault('epoch', int(time.time()))
2970 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
2971 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2972 if remove_private_keys:
2973 remove_keys |= {
2974 'requested_formats', 'requested_subtitles', 'requested_entries',
2975 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2976 }
2977 empty_values = (None, {}, [], set(), tuple())
2978 reject = lambda k, v: k not in keep_keys and (
2979 k.startswith('_') or k in remove_keys or v in empty_values)
2980 else:
2981 reject = lambda k, v: k in remove_keys
2982 filter_fn = lambda obj: (
2983 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2984 else obj if not isinstance(obj, dict)
2985 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2986 return filter_fn(info_dict)
2987
2988 @staticmethod
2989 def filter_requested_info(info_dict, actually_filter=True):
2990 ''' Alias of sanitize_info for backward compatibility '''
2991 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2992
2993 def run_pp(self, pp, infodict):
2994 files_to_delete = []
2995 if '__files_to_move' not in infodict:
2996 infodict['__files_to_move'] = {}
2997 try:
2998 files_to_delete, infodict = pp.run(infodict)
2999 except PostProcessingError as e:
3000 # Must be True and not 'only_download'
3001 if self.params.get('ignoreerrors') is True:
3002 self.report_error(e)
3003 return infodict
3004 raise
3005
3006 if not files_to_delete:
3007 return infodict
3008 if self.params.get('keepvideo', False):
3009 for f in files_to_delete:
3010 infodict['__files_to_move'].setdefault(f, '')
3011 else:
3012 for old_filename in set(files_to_delete):
3013 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3014 try:
3015 os.remove(encodeFilename(old_filename))
3016 except (IOError, OSError):
3017 self.report_warning('Unable to remove downloaded original file')
3018 if old_filename in infodict['__files_to_move']:
3019 del infodict['__files_to_move'][old_filename]
3020 return infodict
3021
3022 @staticmethod
3023 def post_extract(info_dict):
3024 def actual_post_extract(info_dict):
3025 if info_dict.get('_type') in ('playlist', 'multi_video'):
3026 for video_dict in info_dict.get('entries', {}):
3027 actual_post_extract(video_dict or {})
3028 return
3029
3030 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3031 extra = post_extractor().items()
3032 info_dict.update(extra)
3033 info_dict.pop('__post_extractor', None)
3034
3035 original_infodict = info_dict.get('__original_infodict') or {}
3036 original_infodict.update(extra)
3037 original_infodict.pop('__post_extractor', None)
3038
3039 actual_post_extract(info_dict or {})
3040
3041 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3042 info = dict(ie_info)
3043 info['__files_to_move'] = files_to_move or {}
3044 for pp in self._pps[key]:
3045 info = self.run_pp(pp, info)
3046 return info, info.pop('__files_to_move', None)
3047
3048 def post_process(self, filename, ie_info, files_to_move=None):
3049 """Run all the postprocessors on the given file."""
3050 info = dict(ie_info)
3051 info['filepath'] = filename
3052 info['__files_to_move'] = files_to_move or {}
3053
3054 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3055 info = self.run_pp(pp, info)
3056 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3057 del info['__files_to_move']
3058 for pp in self._pps['after_move']:
3059 info = self.run_pp(pp, info)
3060 return info
3061
3062 def _make_archive_id(self, info_dict):
3063 video_id = info_dict.get('id')
3064 if not video_id:
3065 return
3066 # Future-proof against any change in case
3067 # and backwards compatibility with prior versions
3068 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3069 if extractor is None:
3070 url = str_or_none(info_dict.get('url'))
3071 if not url:
3072 return
3073 # Try to find matching extractor for the URL and take its ie_key
3074 for ie_key, ie in self._ies.items():
3075 if ie.suitable(url):
3076 extractor = ie_key
3077 break
3078 else:
3079 return
3080 return '%s %s' % (extractor.lower(), video_id)
3081
3082 def in_download_archive(self, info_dict):
3083 fn = self.params.get('download_archive')
3084 if fn is None:
3085 return False
3086
3087 vid_id = self._make_archive_id(info_dict)
3088 if not vid_id:
3089 return False # Incomplete video information
3090
3091 return vid_id in self.archive
3092
3093 def record_download_archive(self, info_dict):
3094 fn = self.params.get('download_archive')
3095 if fn is None:
3096 return
3097 vid_id = self._make_archive_id(info_dict)
3098 assert vid_id
3099 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3100 archive_file.write(vid_id + '\n')
3101 self.archive.add(vid_id)
3102
3103 @staticmethod
3104 def format_resolution(format, default='unknown'):
3105 is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3106 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3107 return 'audio only'
3108 if format.get('resolution') is not None:
3109 return format['resolution']
3110 if format.get('width') and format.get('height'):
3111 res = '%dx%d' % (format['width'], format['height'])
3112 elif format.get('height'):
3113 res = '%sp' % format['height']
3114 elif format.get('width'):
3115 res = '%dx?' % format['width']
3116 elif is_images:
3117 return 'images'
3118 else:
3119 return default
3120 return f'{res} images' if is_images else res
3121
3122 def _format_note(self, fdict):
3123 res = ''
3124 if fdict.get('ext') in ['f4f', 'f4m']:
3125 res += '(unsupported) '
3126 if fdict.get('language'):
3127 if res:
3128 res += ' '
3129 res += '[%s] ' % fdict['language']
3130 if fdict.get('format_note') is not None:
3131 res += fdict['format_note'] + ' '
3132 if fdict.get('tbr') is not None:
3133 res += '%4dk ' % fdict['tbr']
3134 if fdict.get('container') is not None:
3135 if res:
3136 res += ', '
3137 res += '%s container' % fdict['container']
3138 if (fdict.get('vcodec') is not None
3139 and fdict.get('vcodec') != 'none'):
3140 if res:
3141 res += ', '
3142 res += fdict['vcodec']
3143 if fdict.get('vbr') is not None:
3144 res += '@'
3145 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3146 res += 'video@'
3147 if fdict.get('vbr') is not None:
3148 res += '%4dk' % fdict['vbr']
3149 if fdict.get('fps') is not None:
3150 if res:
3151 res += ', '
3152 res += '%sfps' % fdict['fps']
3153 if fdict.get('acodec') is not None:
3154 if res:
3155 res += ', '
3156 if fdict['acodec'] == 'none':
3157 res += 'video only'
3158 else:
3159 res += '%-5s' % fdict['acodec']
3160 elif fdict.get('abr') is not None:
3161 if res:
3162 res += ', '
3163 res += 'audio'
3164 if fdict.get('abr') is not None:
3165 res += '@%3dk' % fdict['abr']
3166 if fdict.get('asr') is not None:
3167 res += ' (%5dHz)' % fdict['asr']
3168 if fdict.get('filesize') is not None:
3169 if res:
3170 res += ', '
3171 res += format_bytes(fdict['filesize'])
3172 elif fdict.get('filesize_approx') is not None:
3173 if res:
3174 res += ', '
3175 res += '~' + format_bytes(fdict['filesize_approx'])
3176 return res
3177
3178 def list_formats(self, info_dict):
3179 formats = info_dict.get('formats', [info_dict])
3180 new_format = (
3181 'list-formats' not in self.params.get('compat_opts', [])
3182 and self.params.get('listformats_table', True) is not False)
3183 if new_format:
3184 table = [
3185 [
3186 format_field(f, 'format_id'),
3187 format_field(f, 'ext'),
3188 self.format_resolution(f),
3189 format_field(f, 'fps', '%d'),
3190 '|',
3191 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3192 format_field(f, 'tbr', '%4dk'),
3193 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3194 '|',
3195 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3196 format_field(f, 'vbr', '%4dk'),
3197 format_field(f, 'acodec', default='unknown').replace('none', ''),
3198 format_field(f, 'abr', '%3dk'),
3199 format_field(f, 'asr', '%5dHz'),
3200 ', '.join(filter(None, (
3201 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3202 format_field(f, 'language', '[%s]'),
3203 format_field(f, 'format_note'),
3204 format_field(f, 'container', ignore=(None, f.get('ext'))),
3205 ))),
3206 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3207 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3208 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3209 else:
3210 table = [
3211 [
3212 format_field(f, 'format_id'),
3213 format_field(f, 'ext'),
3214 self.format_resolution(f),
3215 self._format_note(f)]
3216 for f in formats
3217 if f.get('preference') is None or f['preference'] >= -1000]
3218 header_line = ['format code', 'extension', 'resolution', 'note']
3219
3220 self.to_screen(
3221 '[info] Available formats for %s:' % info_dict['id'])
3222 self.to_stdout(render_table(
3223 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3224
3225 def list_thumbnails(self, info_dict):
3226 thumbnails = list(info_dict.get('thumbnails'))
3227 if not thumbnails:
3228 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3229 return
3230
3231 self.to_screen(
3232 '[info] Thumbnails for %s:' % info_dict['id'])
3233 self.to_stdout(render_table(
3234 ['ID', 'width', 'height', 'URL'],
3235 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3236
3237 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3238 if not subtitles:
3239 self.to_screen('%s has no %s' % (video_id, name))
3240 return
3241 self.to_screen(
3242 'Available %s for %s:' % (name, video_id))
3243
3244 def _row(lang, formats):
3245 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3246 if len(set(names)) == 1:
3247 names = [] if names[0] == 'unknown' else names[:1]
3248 return [lang, ', '.join(names), ', '.join(exts)]
3249
3250 self.to_stdout(render_table(
3251 ['Language', 'Name', 'Formats'],
3252 [_row(lang, formats) for lang, formats in subtitles.items()],
3253 hideEmpty=True))
3254
3255 def urlopen(self, req):
3256 """ Start an HTTP download """
3257 if isinstance(req, compat_basestring):
3258 req = sanitized_Request(req)
3259 return self._opener.open(req, timeout=self._socket_timeout)
3260
3261 def print_debug_header(self):
3262 if not self.params.get('verbose'):
3263 return
3264 get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3265 encoding_str = (
3266 '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % (
3267 locale.getpreferredencoding(),
3268 sys.getfilesystemencoding(),
3269 get_encoding(self._screen_file), get_encoding(self._err_file),
3270 self.get_encoding()))
3271
3272 logger = self.params.get('logger')
3273 if logger:
3274 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3275 write_debug(encoding_str)
3276 else:
3277 write_debug = lambda msg: self._write_string(f'[debug] {msg}')
3278 write_string(encoding_str, encoding=None)
3279
3280 source = detect_variant()
3281 write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
3282 if _LAZY_LOADER:
3283 write_debug('Lazy loading extractors enabled\n')
3284 if plugin_extractors or plugin_postprocessors:
3285 write_debug('Plugins: %s\n' % [
3286 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3287 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3288 if self.params.get('compat_opts'):
3289 write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3290 try:
3291 sp = subprocess.Popen(
3292 ['git', 'rev-parse', '--short', 'HEAD'],
3293 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3294 cwd=os.path.dirname(os.path.abspath(__file__)))
3295 out, err = process_communicate_or_kill(sp)
3296 out = out.decode().strip()
3297 if re.match('[0-9a-f]+', out):
3298 write_debug('Git HEAD: %s\n' % out)
3299 except Exception:
3300 try:
3301 sys.exc_clear()
3302 except Exception:
3303 pass
3304
3305 def python_implementation():
3306 impl_name = platform.python_implementation()
3307 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3308 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3309 return impl_name
3310
3311 write_debug('Python version %s (%s %s) - %s\n' % (
3312 platform.python_version(),
3313 python_implementation(),
3314 platform.architecture()[0],
3315 platform_name()))
3316
3317 exe_versions = FFmpegPostProcessor.get_versions(self)
3318 exe_versions['rtmpdump'] = rtmpdump_version()
3319 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3320 exe_str = ', '.join(
3321 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3322 ) or 'none'
3323 write_debug('exe versions: %s\n' % exe_str)
3324
3325 from .downloader.websocket import has_websockets
3326 from .postprocessor.embedthumbnail import has_mutagen
3327 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3328
3329 lib_str = ', '.join(sorted(filter(None, (
3330 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3331 has_websockets and 'websockets',
3332 has_mutagen and 'mutagen',
3333 SQLITE_AVAILABLE and 'sqlite',
3334 KEYRING_AVAILABLE and 'keyring',
3335 )))) or 'none'
3336 write_debug('Optional libraries: %s\n' % lib_str)
3337 write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % (
3338 supports_terminal_sequences(self._screen_file),
3339 supports_terminal_sequences(self._err_file)))
3340
3341 proxy_map = {}
3342 for handler in self._opener.handlers:
3343 if hasattr(handler, 'proxies'):
3344 proxy_map.update(handler.proxies)
3345 write_debug('Proxy map: ' + compat_str(proxy_map) + '\n')
3346
3347 if self.params.get('call_home', False):
3348 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3349 write_debug('Public IP address: %s\n' % ipaddr)
3350 return
3351 latest_version = self.urlopen(
3352 'https://yt-dl.org/latest/version').read().decode('utf-8')
3353 if version_tuple(latest_version) > version_tuple(__version__):
3354 self.report_warning(
3355 'You are using an outdated version (newest version: %s)! '
3356 'See https://yt-dl.org/update if you need help updating.' %
3357 latest_version)
3358
3359 def _setup_opener(self):
3360 timeout_val = self.params.get('socket_timeout')
3361 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3362
3363 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3364 opts_cookiefile = self.params.get('cookiefile')
3365 opts_proxy = self.params.get('proxy')
3366
3367 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3368
3369 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3370 if opts_proxy is not None:
3371 if opts_proxy == '':
3372 proxies = {}
3373 else:
3374 proxies = {'http': opts_proxy, 'https': opts_proxy}
3375 else:
3376 proxies = compat_urllib_request.getproxies()
3377 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3378 if 'http' in proxies and 'https' not in proxies:
3379 proxies['https'] = proxies['http']
3380 proxy_handler = PerRequestProxyHandler(proxies)
3381
3382 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3383 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3384 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3385 redirect_handler = YoutubeDLRedirectHandler()
3386 data_handler = compat_urllib_request_DataHandler()
3387
3388 # When passing our own FileHandler instance, build_opener won't add the
3389 # default FileHandler and allows us to disable the file protocol, which
3390 # can be used for malicious purposes (see
3391 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3392 file_handler = compat_urllib_request.FileHandler()
3393
3394 def file_open(*args, **kwargs):
3395 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3396 file_handler.file_open = file_open
3397
3398 opener = compat_urllib_request.build_opener(
3399 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3400
3401 # Delete the default user-agent header, which would otherwise apply in
3402 # cases where our custom HTTP handler doesn't come into play
3403 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3404 opener.addheaders = []
3405 self._opener = opener
3406
3407 def encode(self, s):
3408 if isinstance(s, bytes):
3409 return s # Already encoded
3410
3411 try:
3412 return s.encode(self.get_encoding())
3413 except UnicodeEncodeError as err:
3414 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3415 raise
3416
3417 def get_encoding(self):
3418 encoding = self.params.get('encoding')
3419 if encoding is None:
3420 encoding = preferredencoding()
3421 return encoding
3422
3423 def _write_info_json(self, label, ie_result, infofn):
3424 ''' Write infojson and returns True = written, False = skip, None = error '''
3425 if not self.params.get('writeinfojson'):
3426 return False
3427 elif not infofn:
3428 self.write_debug(f'Skipping writing {label} infojson')
3429 return False
3430 elif not self._ensure_dir_exists(infofn):
3431 return None
3432 elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3433 self.to_screen(f'[info] {label.title()} metadata is already present')
3434 else:
3435 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3436 try:
3437 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3438 except (OSError, IOError):
3439 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3440 return None
3441 return True
3442
3443 def _write_description(self, label, ie_result, descfn):
3444 ''' Write description and returns True = written, False = skip, None = error '''
3445 if not self.params.get('writedescription'):
3446 return False
3447 elif not descfn:
3448 self.write_debug(f'Skipping writing {label} description')
3449 return False
3450 elif not self._ensure_dir_exists(descfn):
3451 return None
3452 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3453 self.to_screen(f'[info] {label.title()} description is already present')
3454 elif ie_result.get('description') is None:
3455 self.report_warning(f'There\'s no {label} description to write')
3456 return False
3457 else:
3458 try:
3459 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3460 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3461 descfile.write(ie_result['description'])
3462 except (OSError, IOError):
3463 self.report_error(f'Cannot write {label} description file {descfn}')
3464 return None
3465 return True
3466
3467 def _write_subtitles(self, info_dict, filename):
3468 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3469 ret = []
3470 subtitles = info_dict.get('requested_subtitles')
3471 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3472 # subtitles download errors are already managed as troubles in relevant IE
3473 # that way it will silently go on when used with unsupporting IE
3474 return ret
3475
3476 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3477 if not sub_filename_base:
3478 self.to_screen('[info] Skipping writing video subtitles')
3479 return ret
3480 for sub_lang, sub_info in subtitles.items():
3481 sub_format = sub_info['ext']
3482 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3483 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3484 if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3485 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3486 sub_info['filepath'] = sub_filename
3487 ret.append((sub_filename, sub_filename_final))
3488 continue
3489
3490 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3491 if sub_info.get('data') is not None:
3492 try:
3493 # Use newline='' to prevent conversion of newline characters
3494 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3495 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3496 subfile.write(sub_info['data'])
3497 sub_info['filepath'] = sub_filename
3498 ret.append((sub_filename, sub_filename_final))
3499 continue
3500 except (OSError, IOError):
3501 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3502 return None
3503
3504 try:
3505 sub_copy = sub_info.copy()
3506 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3507 self.dl(sub_filename, sub_copy, subtitle=True)
3508 sub_info['filepath'] = sub_filename
3509 ret.append((sub_filename, sub_filename_final))
3510 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3511 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3512 continue
3513 return ret
3514
3515 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3516 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3517 write_all = self.params.get('write_all_thumbnails', False)
3518 thumbnails, ret = [], []
3519 if write_all or self.params.get('writethumbnail', False):
3520 thumbnails = info_dict.get('thumbnails') or []
3521 multiple = write_all and len(thumbnails) > 1
3522
3523 if thumb_filename_base is None:
3524 thumb_filename_base = filename
3525 if thumbnails and not thumb_filename_base:
3526 self.write_debug(f'Skipping writing {label} thumbnail')
3527 return ret
3528
3529 for t in thumbnails[::-1]:
3530 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3531 thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3532 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3533 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3534
3535 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3536 ret.append((thumb_filename, thumb_filename_final))
3537 t['filepath'] = thumb_filename
3538 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3539 else:
3540 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3541 try:
3542 uf = self.urlopen(t['url'])
3543 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3544 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3545 shutil.copyfileobj(uf, thumbf)
3546 ret.append((thumb_filename, thumb_filename_final))
3547 t['filepath'] = thumb_filename
3548 except network_exceptions as err:
3549 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3550 if ret and not write_all:
3551 break
3552 return ret