]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[cleanup] misc
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import functools
13 import io
14 import itertools
15 import json
16 import locale
17 import operator
18 import os
19 import platform
20 import re
21 import shutil
22 import subprocess
23 import sys
24 import tempfile
25 import time
26 import tokenize
27 import traceback
28 import random
29 import unicodedata
30
31 from string import ascii_letters
32
33 from .compat import (
34 compat_basestring,
35 compat_get_terminal_size,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_pycrypto_AES,
40 compat_shlex_quote,
41 compat_str,
42 compat_tokenize_tokenize,
43 compat_urllib_error,
44 compat_urllib_request,
45 compat_urllib_request_DataHandler,
46 windows_enable_vt_mode,
47 )
48 from .cookies import load_cookies
49 from .utils import (
50 age_restricted,
51 args_to_str,
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
55 DEFAULT_OUTTMPL,
56 determine_ext,
57 determine_protocol,
58 DOT_DESKTOP_LINK_TEMPLATE,
59 DOT_URL_LINK_TEMPLATE,
60 DOT_WEBLOC_LINK_TEMPLATE,
61 DownloadError,
62 encode_compat_str,
63 encodeFilename,
64 EntryNotInPlaylist,
65 error_to_compat_str,
66 ExistingVideoReached,
67 expand_path,
68 ExtractorError,
69 float_or_none,
70 format_bytes,
71 format_field,
72 formatSeconds,
73 GeoRestrictedError,
74 HEADRequest,
75 int_or_none,
76 iri_to_uri,
77 ISO3166Utils,
78 LazyList,
79 locked_file,
80 make_dir,
81 make_HTTPS_handler,
82 MaxDownloadsReached,
83 network_exceptions,
84 orderedSet,
85 OUTTMPL_TYPES,
86 PagedList,
87 parse_filesize,
88 PerRequestProxyHandler,
89 platform_name,
90 Popen,
91 PostProcessingError,
92 preferredencoding,
93 prepend_extension,
94 register_socks_protocols,
95 RejectedVideoReached,
96 render_table,
97 replace_extension,
98 SameFileError,
99 sanitize_filename,
100 sanitize_path,
101 sanitize_url,
102 sanitized_Request,
103 std_headers,
104 STR_FORMAT_RE_TMPL,
105 STR_FORMAT_TYPES,
106 str_or_none,
107 strftime_or_none,
108 subtitles_filename,
109 supports_terminal_sequences,
110 TERMINAL_SEQUENCES,
111 ThrottledDownload,
112 to_high_limit_path,
113 traverse_obj,
114 try_get,
115 UnavailableVideoError,
116 url_basename,
117 variadic,
118 version_tuple,
119 write_json_file,
120 write_string,
121 YoutubeDLCookieProcessor,
122 YoutubeDLHandler,
123 YoutubeDLRedirectHandler,
124 )
125 from .cache import Cache
126 from .extractor import (
127 gen_extractor_classes,
128 get_info_extractor,
129 _LAZY_LOADER,
130 _PLUGIN_CLASSES as plugin_extractors
131 )
132 from .extractor.openload import PhantomJSwrapper
133 from .downloader import (
134 FFmpegFD,
135 get_suitable_downloader,
136 shorten_protocol_name
137 )
138 from .downloader.rtmp import rtmpdump_version
139 from .postprocessor import (
140 get_postprocessor,
141 EmbedThumbnailPP,
142 FFmpegFixupDurationPP,
143 FFmpegFixupM3u8PP,
144 FFmpegFixupM4aPP,
145 FFmpegFixupStretchedPP,
146 FFmpegFixupTimestampPP,
147 FFmpegMergerPP,
148 FFmpegPostProcessor,
149 MoveFilesAfterDownloadPP,
150 _PLUGIN_CLASSES as plugin_postprocessors
151 )
152 from .update import detect_variant
153 from .version import __version__
154
155 if compat_os_name == 'nt':
156 import ctypes
157
158
159 class YoutubeDL(object):
160 """YoutubeDL class.
161
162 YoutubeDL objects are the ones responsible of downloading the
163 actual video file and writing it to disk if the user has requested
164 it, among some other tasks. In most cases there should be one per
165 program. As, given a video URL, the downloader doesn't know how to
166 extract all the needed information, task that InfoExtractors do, it
167 has to pass the URL to one of them.
168
169 For this, YoutubeDL objects have a method that allows
170 InfoExtractors to be registered in a given order. When it is passed
171 a URL, the YoutubeDL object handles it to the first InfoExtractor it
172 finds that reports being able to handle it. The InfoExtractor extracts
173 all the information about the video or videos the URL refers to, and
174 YoutubeDL process the extracted information, possibly using a File
175 Downloader to download the video.
176
177 YoutubeDL objects accept a lot of parameters. In order not to saturate
178 the object constructor with arguments, it receives a dictionary of
179 options instead. These options are available through the params
180 attribute for the InfoExtractors to use. The YoutubeDL also
181 registers itself as the downloader in charge for the InfoExtractors
182 that are added to it, so this is a "mutual registration".
183
184 Available options:
185
186 username: Username for authentication purposes.
187 password: Password for authentication purposes.
188 videopassword: Password for accessing a video.
189 ap_mso: Adobe Pass multiple-system operator identifier.
190 ap_username: Multiple-system operator account username.
191 ap_password: Multiple-system operator account password.
192 usenetrc: Use netrc for authentication instead.
193 verbose: Print additional info to stdout.
194 quiet: Do not print messages to stdout.
195 no_warnings: Do not print out anything for warnings.
196 forceprint: A list of templates to force print
197 forceurl: Force printing final URL. (Deprecated)
198 forcetitle: Force printing title. (Deprecated)
199 forceid: Force printing ID. (Deprecated)
200 forcethumbnail: Force printing thumbnail URL. (Deprecated)
201 forcedescription: Force printing description. (Deprecated)
202 forcefilename: Force printing final filename. (Deprecated)
203 forceduration: Force printing duration. (Deprecated)
204 forcejson: Force printing info_dict as JSON.
205 dump_single_json: Force printing the info_dict of the whole playlist
206 (or video) as a single JSON line.
207 force_write_download_archive: Force writing download archive regardless
208 of 'skip_download' or 'simulate'.
209 simulate: Do not download the video files. If unset (or None),
210 simulate only if listsubtitles, listformats or list_thumbnails is used
211 format: Video format code. see "FORMAT SELECTION" for more details.
212 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
213 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
214 extracting metadata even if the video is not actually
215 available for download (experimental)
216 format_sort: How to sort the video formats. see "Sorting Formats"
217 for more details.
218 format_sort_force: Force the given format_sort. see "Sorting Formats"
219 for more details.
220 allow_multiple_video_streams: Allow multiple video streams to be merged
221 into a single file
222 allow_multiple_audio_streams: Allow multiple audio streams to be merged
223 into a single file
224 check_formats Whether to test if the formats are downloadable.
225 Can be True (check all), False (check none)
226 or None (check only if requested by extractor)
227 paths: Dictionary of output paths. The allowed keys are 'home'
228 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
229 outtmpl: Dictionary of templates for output names. Allowed keys
230 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
231 For compatibility with youtube-dl, a single string can also be used
232 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
233 restrictfilenames: Do not allow "&" and spaces in file names
234 trim_file_name: Limit length of filename (extension excluded)
235 windowsfilenames: Force the filenames to be windows compatible
236 ignoreerrors: Do not stop on download/postprocessing errors.
237 Can be 'only_download' to ignore only download errors.
238 Default is 'only_download' for CLI, but False for API
239 skip_playlist_after_errors: Number of allowed failures until the rest of
240 the playlist is skipped
241 force_generic_extractor: Force downloader to use the generic extractor
242 overwrites: Overwrite all video and metadata files if True,
243 overwrite only non-video files if None
244 and don't overwrite any file if False
245 For compatibility with youtube-dl,
246 "nooverwrites" may also be used instead
247 playliststart: Playlist item to start at.
248 playlistend: Playlist item to end at.
249 playlist_items: Specific indices of playlist to download.
250 playlistreverse: Download playlist items in reverse order.
251 playlistrandom: Download playlist items in random order.
252 matchtitle: Download only matching titles.
253 rejecttitle: Reject downloads for matching titles.
254 logger: Log messages to a logging.Logger instance.
255 logtostderr: Log messages to stderr instead of stdout.
256 consoletitle: Display progress in console window's titlebar.
257 writedescription: Write the video description to a .description file
258 writeinfojson: Write the video description to a .info.json file
259 clean_infojson: Remove private fields from the infojson
260 getcomments: Extract video comments. This will not be written to disk
261 unless writeinfojson is also given
262 writeannotations: Write the video annotations to a .annotations.xml file
263 writethumbnail: Write the thumbnail image to a file
264 allow_playlist_files: Whether to write playlists' description, infojson etc
265 also to disk when using the 'write*' options
266 write_all_thumbnails: Write all thumbnail formats to files
267 writelink: Write an internet shortcut file, depending on the
268 current platform (.url/.webloc/.desktop)
269 writeurllink: Write a Windows internet shortcut file (.url)
270 writewebloclink: Write a macOS internet shortcut file (.webloc)
271 writedesktoplink: Write a Linux internet shortcut file (.desktop)
272 writesubtitles: Write the video subtitles to a file
273 writeautomaticsub: Write the automatically generated subtitles to a file
274 allsubtitles: Deprecated - Use subtitleslangs = ['all']
275 Downloads all the subtitles of the video
276 (requires writesubtitles or writeautomaticsub)
277 listsubtitles: Lists all available subtitles for the video
278 subtitlesformat: The format code for subtitles
279 subtitleslangs: List of languages of the subtitles to download (can be regex).
280 The list may contain "all" to refer to all the available
281 subtitles. The language can be prefixed with a "-" to
282 exclude it from the requested languages. Eg: ['all', '-live_chat']
283 keepvideo: Keep the video file after post-processing
284 daterange: A DateRange object, download only if the upload_date is in the range.
285 skip_download: Skip the actual download of the video file
286 cachedir: Location of the cache files in the filesystem.
287 False to disable filesystem cache.
288 noplaylist: Download single video instead of a playlist if in doubt.
289 age_limit: An integer representing the user's age in years.
290 Unsuitable videos for the given age are skipped.
291 min_views: An integer representing the minimum view count the video
292 must have in order to not be skipped.
293 Videos without view count information are always
294 downloaded. None for no limit.
295 max_views: An integer representing the maximum view count.
296 Videos that are more popular than that are not
297 downloaded.
298 Videos without view count information are always
299 downloaded. None for no limit.
300 download_archive: File name of a file where all downloads are recorded.
301 Videos already present in the file are not downloaded
302 again.
303 break_on_existing: Stop the download process after attempting to download a
304 file that is in the archive.
305 break_on_reject: Stop the download process when encountering a video that
306 has been filtered out.
307 cookiefile: File name where cookies should be read from and dumped to
308 cookiesfrombrowser: A tuple containing the name of the browser and the profile
309 name/path from where cookies are loaded.
310 Eg: ('chrome', ) or ('vivaldi', 'default')
311 nocheckcertificate:Do not verify SSL certificates
312 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
313 At the moment, this is only supported by YouTube.
314 proxy: URL of the proxy server to use
315 geo_verification_proxy: URL of the proxy to use for IP address verification
316 on geo-restricted sites.
317 socket_timeout: Time to wait for unresponsive hosts, in seconds
318 bidi_workaround: Work around buggy terminals without bidirectional text
319 support, using fridibi
320 debug_printtraffic:Print out sent and received HTTP traffic
321 include_ads: Download ads as well
322 default_search: Prepend this string if an input url is not valid.
323 'auto' for elaborate guessing
324 encoding: Use this encoding instead of the system-specified.
325 extract_flat: Do not resolve URLs, return the immediate result.
326 Pass in 'in_playlist' to only show this behavior for
327 playlist items.
328 postprocessors: A list of dictionaries, each with an entry
329 * key: The name of the postprocessor. See
330 yt_dlp/postprocessor/__init__.py for a list.
331 * when: When to run the postprocessor. Can be one of
332 pre_process|before_dl|post_process|after_move.
333 Assumed to be 'post_process' if not given
334 post_hooks: Deprecated - Register a custom postprocessor instead
335 A list of functions that get called as the final step
336 for each video file, after all postprocessors have been
337 called. The filename will be passed as the only argument.
338 progress_hooks: A list of functions that get called on download
339 progress, with a dictionary with the entries
340 * status: One of "downloading", "error", or "finished".
341 Check this first and ignore unknown values.
342 * info_dict: The extracted info_dict
343
344 If status is one of "downloading", or "finished", the
345 following properties may also be present:
346 * filename: The final filename (always present)
347 * tmpfilename: The filename we're currently writing to
348 * downloaded_bytes: Bytes on disk
349 * total_bytes: Size of the whole file, None if unknown
350 * total_bytes_estimate: Guess of the eventual file size,
351 None if unavailable.
352 * elapsed: The number of seconds since download started.
353 * eta: The estimated time in seconds, None if unknown
354 * speed: The download speed in bytes/second, None if
355 unknown
356 * fragment_index: The counter of the currently
357 downloaded video fragment.
358 * fragment_count: The number of fragments (= individual
359 files that will be merged)
360
361 Progress hooks are guaranteed to be called at least once
362 (with status "finished") if the download is successful.
363 postprocessor_hooks: A list of functions that get called on postprocessing
364 progress, with a dictionary with the entries
365 * status: One of "started", "processing", or "finished".
366 Check this first and ignore unknown values.
367 * postprocessor: Name of the postprocessor
368 * info_dict: The extracted info_dict
369
370 Progress hooks are guaranteed to be called at least twice
371 (with status "started" and "finished") if the processing is successful.
372 merge_output_format: Extension to use when merging formats.
373 final_ext: Expected final extension; used to detect when the file was
374 already downloaded and converted. "merge_output_format" is
375 replaced by this extension when given
376 fixup: Automatically correct known faults of the file.
377 One of:
378 - "never": do nothing
379 - "warn": only emit a warning
380 - "detect_or_warn": check whether we can do anything
381 about it, warn otherwise (default)
382 source_address: Client-side IP address to bind to.
383 call_home: Boolean, true iff we are allowed to contact the
384 yt-dlp servers for debugging. (BROKEN)
385 sleep_interval_requests: Number of seconds to sleep between requests
386 during extraction
387 sleep_interval: Number of seconds to sleep before each download when
388 used alone or a lower bound of a range for randomized
389 sleep before each download (minimum possible number
390 of seconds to sleep) when used along with
391 max_sleep_interval.
392 max_sleep_interval:Upper bound of a range for randomized sleep before each
393 download (maximum possible number of seconds to sleep).
394 Must only be used along with sleep_interval.
395 Actual sleep time will be a random float from range
396 [sleep_interval; max_sleep_interval].
397 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
398 listformats: Print an overview of available video formats and exit.
399 list_thumbnails: Print a table of all thumbnails and exit.
400 match_filter: A function that gets called with the info_dict of
401 every video.
402 If it returns a message, the video is ignored.
403 If it returns None, the video is downloaded.
404 match_filter_func in utils.py is one example for this.
405 no_color: Do not emit color codes in output.
406 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
407 HTTP header
408 geo_bypass_country:
409 Two-letter ISO 3166-2 country code that will be used for
410 explicit geographic restriction bypassing via faking
411 X-Forwarded-For HTTP header
412 geo_bypass_ip_block:
413 IP range in CIDR notation that will be used similarly to
414 geo_bypass_country
415
416 The following options determine which downloader is picked:
417 external_downloader: A dictionary of protocol keys and the executable of the
418 external downloader to use for it. The allowed protocols
419 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
420 Set the value to 'native' to use the native downloader
421 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
422 or {'m3u8': 'ffmpeg'} instead.
423 Use the native HLS downloader instead of ffmpeg/avconv
424 if True, otherwise use ffmpeg/avconv if False, otherwise
425 use downloader suggested by extractor if None.
426 compat_opts: Compatibility options. See "Differences in default behavior".
427 The following options do not work when used through the API:
428 filename, abort-on-error, multistreams, no-live-chat, format-sort
429 no-clean-infojson, no-playlist-metafiles, no-keep-subs.
430 Refer __init__.py for their implementation
431 progress_template: Dictionary of templates for progress outputs.
432 Allowed keys are 'download', 'postprocess',
433 'download-title' (console title) and 'postprocess-title'.
434 The template is mapped on a dictionary with keys 'progress' and 'info'
435
436 The following parameters are not used by YoutubeDL itself, they are used by
437 the downloader (see yt_dlp/downloader/common.py):
438 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
439 max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
440 noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
441 external_downloader_args.
442
443 The following options are used by the post processors:
444 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
445 otherwise prefer ffmpeg. (avconv support is deprecated)
446 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
447 to the binary or its containing directory.
448 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
449 and a list of additional command-line arguments for the
450 postprocessor/executable. The dict can also have "PP+EXE" keys
451 which are used when the given exe is used by the given PP.
452 Use 'default' as the name for arguments to passed to all PP
453 For compatibility with youtube-dl, a single list of args
454 can also be used
455
456 The following options are used by the extractors:
457 extractor_retries: Number of times to retry for known errors
458 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
459 hls_split_discontinuity: Split HLS playlists to different formats at
460 discontinuities such as ad breaks (default: False)
461 extractor_args: A dictionary of arguments to be passed to the extractors.
462 See "EXTRACTOR ARGUMENTS" for details.
463 Eg: {'youtube': {'skip': ['dash', 'hls']}}
464 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
465 If True (default), DASH manifests and related
466 data will be downloaded and processed by extractor.
467 You can reduce network I/O by disabling it if you don't
468 care about DASH. (only for youtube)
469 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
470 If True (default), HLS manifests and related
471 data will be downloaded and processed by extractor.
472 You can reduce network I/O by disabling it if you don't
473 care about HLS. (only for youtube)
474 """
475
476 _NUMERIC_FIELDS = set((
477 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
478 'timestamp', 'release_timestamp',
479 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
480 'average_rating', 'comment_count', 'age_limit',
481 'start_time', 'end_time',
482 'chapter_number', 'season_number', 'episode_number',
483 'track_number', 'disc_number', 'release_year',
484 ))
485
486 _format_selection_exts = {
487 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
488 'video': {'mp4', 'flv', 'webm', '3gp'},
489 'storyboards': {'mhtml'},
490 }
491
492 params = None
493 _ies = {}
494 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
495 _printed_messages = set()
496 _first_webpage_request = True
497 _download_retcode = None
498 _num_downloads = None
499 _playlist_level = 0
500 _playlist_urls = set()
501 _screen_file = None
502
503 def __init__(self, params=None, auto_init=True):
504 """Create a FileDownloader object with the given options.
505 @param auto_init Whether to load the default extractors and print header (if verbose).
506 Set to 'no_verbose_header' to not print the header
507 """
508 if params is None:
509 params = {}
510 self._ies = {}
511 self._ies_instances = {}
512 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
513 self._printed_messages = set()
514 self._first_webpage_request = True
515 self._post_hooks = []
516 self._progress_hooks = []
517 self._postprocessor_hooks = []
518 self._download_retcode = 0
519 self._num_downloads = 0
520 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
521 self._err_file = sys.stderr
522 self.params = params
523 self.cache = Cache(self)
524
525 windows_enable_vt_mode()
526 # FIXME: This will break if we ever print color to stdout
527 self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
528
529 if sys.version_info < (3, 6):
530 self.report_warning(
531 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
532
533 if self.params.get('allow_unplayable_formats'):
534 self.report_warning(
535 f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
536 'This is a developer option intended for debugging. \n'
537 ' If you experience any issues while using this option, '
538 f'{self._color_text("DO NOT", "red")} open a bug report')
539
540 def check_deprecated(param, option, suggestion):
541 if self.params.get(param) is not None:
542 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
543 return True
544 return False
545
546 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
547 if self.params.get('geo_verification_proxy') is None:
548 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
549
550 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
551 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
552 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
553
554 for msg in self.params.get('_warnings', []):
555 self.report_warning(msg)
556
557 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
558 # nooverwrites was unnecessarily changed to overwrites
559 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
560 # This ensures compatibility with both keys
561 self.params['overwrites'] = not self.params['nooverwrites']
562 elif self.params.get('overwrites') is None:
563 self.params.pop('overwrites', None)
564 else:
565 self.params['nooverwrites'] = not self.params['overwrites']
566
567 if params.get('bidi_workaround', False):
568 try:
569 import pty
570 master, slave = pty.openpty()
571 width = compat_get_terminal_size().columns
572 if width is None:
573 width_args = []
574 else:
575 width_args = ['-w', str(width)]
576 sp_kwargs = dict(
577 stdin=subprocess.PIPE,
578 stdout=slave,
579 stderr=self._err_file)
580 try:
581 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
582 except OSError:
583 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
584 self._output_channel = os.fdopen(master, 'rb')
585 except OSError as ose:
586 if ose.errno == errno.ENOENT:
587 self.report_warning(
588 'Could not find fribidi executable, ignoring --bidi-workaround. '
589 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
590 else:
591 raise
592
593 if (sys.platform != 'win32'
594 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
595 and not params.get('restrictfilenames', False)):
596 # Unicode filesystem API will throw errors (#1474, #13027)
597 self.report_warning(
598 'Assuming --restrict-filenames since file system encoding '
599 'cannot encode all characters. '
600 'Set the LC_ALL environment variable to fix this.')
601 self.params['restrictfilenames'] = True
602
603 self.outtmpl_dict = self.parse_outtmpl()
604
605 # Creating format selector here allows us to catch syntax errors before the extraction
606 self.format_selector = (
607 None if self.params.get('format') is None
608 else self.build_format_selector(self.params['format']))
609
610 self._setup_opener()
611
612 if auto_init:
613 if auto_init != 'no_verbose_header':
614 self.print_debug_header()
615 self.add_default_info_extractors()
616
617 for pp_def_raw in self.params.get('postprocessors', []):
618 pp_def = dict(pp_def_raw)
619 when = pp_def.pop('when', 'post_process')
620 pp_class = get_postprocessor(pp_def.pop('key'))
621 pp = pp_class(self, **compat_kwargs(pp_def))
622 self.add_post_processor(pp, when=when)
623
624 for ph in self.params.get('post_hooks', []):
625 self.add_post_hook(ph)
626
627 for ph in self.params.get('progress_hooks', []):
628 self.add_progress_hook(ph)
629
630 register_socks_protocols()
631
632 def preload_download_archive(fn):
633 """Preload the archive, if any is specified"""
634 if fn is None:
635 return False
636 self.write_debug(f'Loading archive file {fn!r}')
637 try:
638 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
639 for line in archive_file:
640 self.archive.add(line.strip())
641 except IOError as ioe:
642 if ioe.errno != errno.ENOENT:
643 raise
644 return False
645 return True
646
647 self.archive = set()
648 preload_download_archive(self.params.get('download_archive'))
649
650 def warn_if_short_id(self, argv):
651 # short YouTube ID starting with dash?
652 idxs = [
653 i for i, a in enumerate(argv)
654 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
655 if idxs:
656 correct_argv = (
657 ['yt-dlp']
658 + [a for i, a in enumerate(argv) if i not in idxs]
659 + ['--'] + [argv[i] for i in idxs]
660 )
661 self.report_warning(
662 'Long argument string detected. '
663 'Use -- to separate parameters and URLs, like this:\n%s' %
664 args_to_str(correct_argv))
665
666 def add_info_extractor(self, ie):
667 """Add an InfoExtractor object to the end of the list."""
668 ie_key = ie.ie_key()
669 self._ies[ie_key] = ie
670 if not isinstance(ie, type):
671 self._ies_instances[ie_key] = ie
672 ie.set_downloader(self)
673
674 def _get_info_extractor_class(self, ie_key):
675 ie = self._ies.get(ie_key)
676 if ie is None:
677 ie = get_info_extractor(ie_key)
678 self.add_info_extractor(ie)
679 return ie
680
681 def get_info_extractor(self, ie_key):
682 """
683 Get an instance of an IE with name ie_key, it will try to get one from
684 the _ies list, if there's no instance it will create a new one and add
685 it to the extractor list.
686 """
687 ie = self._ies_instances.get(ie_key)
688 if ie is None:
689 ie = get_info_extractor(ie_key)()
690 self.add_info_extractor(ie)
691 return ie
692
693 def add_default_info_extractors(self):
694 """
695 Add the InfoExtractors returned by gen_extractors to the end of the list
696 """
697 for ie in gen_extractor_classes():
698 self.add_info_extractor(ie)
699
700 def add_post_processor(self, pp, when='post_process'):
701 """Add a PostProcessor object to the end of the chain."""
702 self._pps[when].append(pp)
703 pp.set_downloader(self)
704
705 def add_post_hook(self, ph):
706 """Add the post hook"""
707 self._post_hooks.append(ph)
708
709 def add_progress_hook(self, ph):
710 """Add the download progress hook"""
711 self._progress_hooks.append(ph)
712
713 def add_postprocessor_hook(self, ph):
714 """Add the postprocessing progress hook"""
715 self._postprocessor_hooks.append(ph)
716
717 def _bidi_workaround(self, message):
718 if not hasattr(self, '_output_channel'):
719 return message
720
721 assert hasattr(self, '_output_process')
722 assert isinstance(message, compat_str)
723 line_count = message.count('\n') + 1
724 self._output_process.stdin.write((message + '\n').encode('utf-8'))
725 self._output_process.stdin.flush()
726 res = ''.join(self._output_channel.readline().decode('utf-8')
727 for _ in range(line_count))
728 return res[:-len('\n')]
729
730 def _write_string(self, message, out=None, only_once=False):
731 if only_once:
732 if message in self._printed_messages:
733 return
734 self._printed_messages.add(message)
735 write_string(message, out=out, encoding=self.params.get('encoding'))
736
737 def to_stdout(self, message, skip_eol=False, quiet=False):
738 """Print message to stdout"""
739 if self.params.get('logger'):
740 self.params['logger'].debug(message)
741 elif not quiet or self.params.get('verbose'):
742 self._write_string(
743 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
744 self._err_file if quiet else self._screen_file)
745
746 def to_stderr(self, message, only_once=False):
747 """Print message to stderr"""
748 assert isinstance(message, compat_str)
749 if self.params.get('logger'):
750 self.params['logger'].error(message)
751 else:
752 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
753
754 def to_console_title(self, message):
755 if not self.params.get('consoletitle', False):
756 return
757 if compat_os_name == 'nt':
758 if ctypes.windll.kernel32.GetConsoleWindow():
759 # c_wchar_p() might not be necessary if `message` is
760 # already of type unicode()
761 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
762 elif 'TERM' in os.environ:
763 self._write_string('\033]0;%s\007' % message, self._screen_file)
764
765 def save_console_title(self):
766 if not self.params.get('consoletitle', False):
767 return
768 if self.params.get('simulate'):
769 return
770 if compat_os_name != 'nt' and 'TERM' in os.environ:
771 # Save the title on stack
772 self._write_string('\033[22;0t', self._screen_file)
773
774 def restore_console_title(self):
775 if not self.params.get('consoletitle', False):
776 return
777 if self.params.get('simulate'):
778 return
779 if compat_os_name != 'nt' and 'TERM' in os.environ:
780 # Restore the title from stack
781 self._write_string('\033[23;0t', self._screen_file)
782
783 def __enter__(self):
784 self.save_console_title()
785 return self
786
787 def __exit__(self, *args):
788 self.restore_console_title()
789
790 if self.params.get('cookiefile') is not None:
791 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
792
793 def trouble(self, message=None, tb=None):
794 """Determine action to take when a download problem appears.
795
796 Depending on if the downloader has been configured to ignore
797 download errors or not, this method may throw an exception or
798 not when errors are found, after printing the message.
799
800 tb, if given, is additional traceback information.
801 """
802 if message is not None:
803 self.to_stderr(message)
804 if self.params.get('verbose'):
805 if tb is None:
806 if sys.exc_info()[0]: # if .trouble has been called from an except block
807 tb = ''
808 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
809 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
810 tb += encode_compat_str(traceback.format_exc())
811 else:
812 tb_data = traceback.format_list(traceback.extract_stack())
813 tb = ''.join(tb_data)
814 if tb:
815 self.to_stderr(tb)
816 if not self.params.get('ignoreerrors'):
817 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
818 exc_info = sys.exc_info()[1].exc_info
819 else:
820 exc_info = sys.exc_info()
821 raise DownloadError(message, exc_info)
822 self._download_retcode = 1
823
824 def to_screen(self, message, skip_eol=False):
825 """Print message to stdout if not in quiet mode"""
826 self.to_stdout(
827 message, skip_eol, quiet=self.params.get('quiet', False))
828
829 def _color_text(self, text, color):
830 if self.params.get('no_color'):
831 return text
832 return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
833
834 def report_warning(self, message, only_once=False):
835 '''
836 Print the message to stderr, it will be prefixed with 'WARNING:'
837 If stderr is a tty file the 'WARNING:' will be colored
838 '''
839 if self.params.get('logger') is not None:
840 self.params['logger'].warning(message)
841 else:
842 if self.params.get('no_warnings'):
843 return
844 self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
845
846 def report_error(self, message, tb=None):
847 '''
848 Do the same as trouble, but prefixes the message with 'ERROR:', colored
849 in red if stderr is a tty file.
850 '''
851 self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
852
853 def write_debug(self, message, only_once=False):
854 '''Log debug message or Print message to stderr'''
855 if not self.params.get('verbose', False):
856 return
857 message = '[debug] %s' % message
858 if self.params.get('logger'):
859 self.params['logger'].debug(message)
860 else:
861 self.to_stderr(message, only_once)
862
863 def report_file_already_downloaded(self, file_name):
864 """Report file has already been fully downloaded."""
865 try:
866 self.to_screen('[download] %s has already been downloaded' % file_name)
867 except UnicodeEncodeError:
868 self.to_screen('[download] The file has already been downloaded')
869
870 def report_file_delete(self, file_name):
871 """Report that existing file will be deleted."""
872 try:
873 self.to_screen('Deleting existing file %s' % file_name)
874 except UnicodeEncodeError:
875 self.to_screen('Deleting existing file')
876
877 def raise_no_formats(self, info, forced=False):
878 has_drm = info.get('__has_drm')
879 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
880 expected = self.params.get('ignore_no_formats_error')
881 if forced or not expected:
882 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
883 expected=has_drm or expected)
884 else:
885 self.report_warning(msg)
886
887 def parse_outtmpl(self):
888 outtmpl_dict = self.params.get('outtmpl', {})
889 if not isinstance(outtmpl_dict, dict):
890 outtmpl_dict = {'default': outtmpl_dict}
891 # Remove spaces in the default template
892 if self.params.get('restrictfilenames'):
893 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
894 else:
895 sanitize = lambda x: x
896 outtmpl_dict.update({
897 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
898 if outtmpl_dict.get(k) is None})
899 for key, val in outtmpl_dict.items():
900 if isinstance(val, bytes):
901 self.report_warning(
902 'Parameter outtmpl is bytes, but should be a unicode string. '
903 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
904 return outtmpl_dict
905
906 def get_output_path(self, dir_type='', filename=None):
907 paths = self.params.get('paths', {})
908 assert isinstance(paths, dict)
909 path = os.path.join(
910 expand_path(paths.get('home', '').strip()),
911 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
912 filename or '')
913
914 # Temporary fix for #4787
915 # 'Treat' all problem characters by passing filename through preferredencoding
916 # to workaround encoding issues with subprocess on python2 @ Windows
917 if sys.version_info < (3, 0) and sys.platform == 'win32':
918 path = encodeFilename(path, True).decode(preferredencoding())
919 return sanitize_path(path, force=self.params.get('windowsfilenames'))
920
921 @staticmethod
922 def _outtmpl_expandpath(outtmpl):
923 # expand_path translates '%%' into '%' and '$$' into '$'
924 # correspondingly that is not what we want since we need to keep
925 # '%%' intact for template dict substitution step. Working around
926 # with boundary-alike separator hack.
927 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
928 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
929
930 # outtmpl should be expand_path'ed before template dict substitution
931 # because meta fields may contain env variables we don't want to
932 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
933 # title "Hello $PATH", we don't want `$PATH` to be expanded.
934 return expand_path(outtmpl).replace(sep, '')
935
936 @staticmethod
937 def escape_outtmpl(outtmpl):
938 ''' Escape any remaining strings like %s, %abc% etc. '''
939 return re.sub(
940 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
941 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
942 outtmpl)
943
944 @classmethod
945 def validate_outtmpl(cls, outtmpl):
946 ''' @return None or Exception object '''
947 outtmpl = re.sub(
948 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
949 lambda mobj: f'{mobj.group(0)[:-1]}s',
950 cls._outtmpl_expandpath(outtmpl))
951 try:
952 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
953 return None
954 except ValueError as err:
955 return err
956
957 @staticmethod
958 def _copy_infodict(info_dict):
959 info_dict = dict(info_dict)
960 for key in ('__original_infodict', '__postprocessors'):
961 info_dict.pop(key, None)
962 return info_dict
963
964 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
965 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
966 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
967
968 info_dict = self._copy_infodict(info_dict)
969 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
970 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
971 if info_dict.get('duration', None) is not None
972 else None)
973 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
974 if info_dict.get('resolution') is None:
975 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
976
977 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
978 # of %(field)s to %(field)0Nd for backward compatibility
979 field_size_compat_map = {
980 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
981 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
982 'autonumber': self.params.get('autonumber_size') or 5,
983 }
984
985 TMPL_DICT = {}
986 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
987 MATH_FUNCTIONS = {
988 '+': float.__add__,
989 '-': float.__sub__,
990 }
991 # Field is of the form key1.key2...
992 # where keys (except first) can be string, int or slice
993 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
994 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
995 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
996 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
997 (?P<negate>-)?
998 (?P<fields>{field})
999 (?P<maths>(?:{math_op}{math_field})*)
1000 (?:>(?P<strf_format>.+?))?
1001 (?P<alternate>(?<!\\),[^|)]+)?
1002 (?:\|(?P<default>.*?))?
1003 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1004
1005 def _traverse_infodict(k):
1006 k = k.split('.')
1007 if k[0] == '':
1008 k.pop(0)
1009 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1010
1011 def get_value(mdict):
1012 # Object traversal
1013 value = _traverse_infodict(mdict['fields'])
1014 # Negative
1015 if mdict['negate']:
1016 value = float_or_none(value)
1017 if value is not None:
1018 value *= -1
1019 # Do maths
1020 offset_key = mdict['maths']
1021 if offset_key:
1022 value = float_or_none(value)
1023 operator = None
1024 while offset_key:
1025 item = re.match(
1026 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1027 offset_key).group(0)
1028 offset_key = offset_key[len(item):]
1029 if operator is None:
1030 operator = MATH_FUNCTIONS[item]
1031 continue
1032 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1033 offset = float_or_none(item)
1034 if offset is None:
1035 offset = float_or_none(_traverse_infodict(item))
1036 try:
1037 value = operator(value, multiplier * offset)
1038 except (TypeError, ZeroDivisionError):
1039 return None
1040 operator = None
1041 # Datetime formatting
1042 if mdict['strf_format']:
1043 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1044
1045 return value
1046
1047 na = self.params.get('outtmpl_na_placeholder', 'NA')
1048
1049 def _dumpjson_default(obj):
1050 if isinstance(obj, (set, LazyList)):
1051 return list(obj)
1052 raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1053
1054 def create_key(outer_mobj):
1055 if not outer_mobj.group('has_key'):
1056 return outer_mobj.group(0)
1057 key = outer_mobj.group('key')
1058 mobj = re.match(INTERNAL_FORMAT_RE, key)
1059 initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1060 value, default = None, na
1061 while mobj:
1062 mobj = mobj.groupdict()
1063 default = mobj['default'] if mobj['default'] is not None else default
1064 value = get_value(mobj)
1065 if value is None and mobj['alternate']:
1066 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1067 else:
1068 break
1069
1070 fmt = outer_mobj.group('format')
1071 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1072 fmt = '0{:d}d'.format(field_size_compat_map[key])
1073
1074 value = default if value is None else value
1075
1076 str_fmt = f'{fmt[:-1]}s'
1077 if fmt[-1] == 'l': # list
1078 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1079 value, fmt = delim.join(variadic(value)), str_fmt
1080 elif fmt[-1] == 'j': # json
1081 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1082 elif fmt[-1] == 'q': # quoted
1083 value, fmt = compat_shlex_quote(str(value)), str_fmt
1084 elif fmt[-1] == 'B': # bytes
1085 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1086 value, fmt = value.decode('utf-8', 'ignore'), 's'
1087 elif fmt[-1] == 'U': # unicode normalized
1088 opts = outer_mobj.group('conversion') or ''
1089 value, fmt = unicodedata.normalize(
1090 # "+" = compatibility equivalence, "#" = NFD
1091 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1092 value), str_fmt
1093 elif fmt[-1] == 'c':
1094 if value:
1095 value = str(value)[0]
1096 else:
1097 fmt = str_fmt
1098 elif fmt[-1] not in 'rs': # numeric
1099 value = float_or_none(value)
1100 if value is None:
1101 value, fmt = default, 's'
1102
1103 if sanitize:
1104 if fmt[-1] == 'r':
1105 # If value is an object, sanitize might convert it to a string
1106 # So we convert it to repr first
1107 value, fmt = repr(value), str_fmt
1108 if fmt[-1] in 'csr':
1109 value = sanitize(initial_field, value)
1110
1111 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1112 TMPL_DICT[key] = value
1113 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1114
1115 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1116
1117 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1118 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1119 return self.escape_outtmpl(outtmpl) % info_dict
1120
1121 def _prepare_filename(self, info_dict, tmpl_type='default'):
1122 try:
1123 sanitize = lambda k, v: sanitize_filename(
1124 compat_str(v),
1125 restricted=self.params.get('restrictfilenames'),
1126 is_id=(k == 'id' or k.endswith('_id')))
1127 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1128 filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1129
1130 force_ext = OUTTMPL_TYPES.get(tmpl_type)
1131 if filename and force_ext is not None:
1132 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1133
1134 # https://github.com/blackjack4494/youtube-dlc/issues/85
1135 trim_file_name = self.params.get('trim_file_name', False)
1136 if trim_file_name:
1137 fn_groups = filename.rsplit('.')
1138 ext = fn_groups[-1]
1139 sub_ext = ''
1140 if len(fn_groups) > 2:
1141 sub_ext = fn_groups[-2]
1142 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1143
1144 return filename
1145 except ValueError as err:
1146 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1147 return None
1148
1149 def prepare_filename(self, info_dict, dir_type='', warn=False):
1150 """Generate the output filename."""
1151
1152 filename = self._prepare_filename(info_dict, dir_type or 'default')
1153 if not filename and dir_type not in ('', 'temp'):
1154 return ''
1155
1156 if warn:
1157 if not self.params.get('paths'):
1158 pass
1159 elif filename == '-':
1160 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1161 elif os.path.isabs(filename):
1162 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1163 if filename == '-' or not filename:
1164 return filename
1165
1166 return self.get_output_path(dir_type, filename)
1167
1168 def _match_entry(self, info_dict, incomplete=False, silent=False):
1169 """ Returns None if the file should be downloaded """
1170
1171 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1172
1173 def check_filter():
1174 if 'title' in info_dict:
1175 # This can happen when we're just evaluating the playlist
1176 title = info_dict['title']
1177 matchtitle = self.params.get('matchtitle', False)
1178 if matchtitle:
1179 if not re.search(matchtitle, title, re.IGNORECASE):
1180 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1181 rejecttitle = self.params.get('rejecttitle', False)
1182 if rejecttitle:
1183 if re.search(rejecttitle, title, re.IGNORECASE):
1184 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1185 date = info_dict.get('upload_date')
1186 if date is not None:
1187 dateRange = self.params.get('daterange', DateRange())
1188 if date not in dateRange:
1189 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1190 view_count = info_dict.get('view_count')
1191 if view_count is not None:
1192 min_views = self.params.get('min_views')
1193 if min_views is not None and view_count < min_views:
1194 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1195 max_views = self.params.get('max_views')
1196 if max_views is not None and view_count > max_views:
1197 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1198 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1199 return 'Skipping "%s" because it is age restricted' % video_title
1200
1201 match_filter = self.params.get('match_filter')
1202 if match_filter is not None:
1203 try:
1204 ret = match_filter(info_dict, incomplete=incomplete)
1205 except TypeError:
1206 # For backward compatibility
1207 ret = None if incomplete else match_filter(info_dict)
1208 if ret is not None:
1209 return ret
1210 return None
1211
1212 if self.in_download_archive(info_dict):
1213 reason = '%s has already been recorded in the archive' % video_title
1214 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1215 else:
1216 reason = check_filter()
1217 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1218 if reason is not None:
1219 if not silent:
1220 self.to_screen('[download] ' + reason)
1221 if self.params.get(break_opt, False):
1222 raise break_err()
1223 return reason
1224
1225 @staticmethod
1226 def add_extra_info(info_dict, extra_info):
1227 '''Set the keys from extra_info in info dict if they are missing'''
1228 for key, value in extra_info.items():
1229 info_dict.setdefault(key, value)
1230
1231 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1232 process=True, force_generic_extractor=False):
1233 """
1234 Return a list with a dictionary for each video extracted.
1235
1236 Arguments:
1237 url -- URL to extract
1238
1239 Keyword arguments:
1240 download -- whether to download videos during extraction
1241 ie_key -- extractor key hint
1242 extra_info -- dictionary containing the extra values to add to each result
1243 process -- whether to resolve all unresolved references (URLs, playlist items),
1244 must be True for download to work.
1245 force_generic_extractor -- force using the generic extractor
1246 """
1247
1248 if extra_info is None:
1249 extra_info = {}
1250
1251 if not ie_key and force_generic_extractor:
1252 ie_key = 'Generic'
1253
1254 if ie_key:
1255 ies = {ie_key: self._get_info_extractor_class(ie_key)}
1256 else:
1257 ies = self._ies
1258
1259 for ie_key, ie in ies.items():
1260 if not ie.suitable(url):
1261 continue
1262
1263 if not ie.working():
1264 self.report_warning('The program functionality for this site has been marked as broken, '
1265 'and will probably not work.')
1266
1267 temp_id = ie.get_temp_id(url)
1268 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1269 self.to_screen("[%s] %s: has already been recorded in archive" % (
1270 ie_key, temp_id))
1271 break
1272 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1273 else:
1274 self.report_error('no suitable InfoExtractor for URL %s' % url)
1275
1276 def __handle_extraction_exceptions(func):
1277 @functools.wraps(func)
1278 def wrapper(self, *args, **kwargs):
1279 try:
1280 return func(self, *args, **kwargs)
1281 except GeoRestrictedError as e:
1282 msg = e.msg
1283 if e.countries:
1284 msg += '\nThis video is available in %s.' % ', '.join(
1285 map(ISO3166Utils.short2full, e.countries))
1286 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1287 self.report_error(msg)
1288 except ExtractorError as e: # An error we somewhat expected
1289 self.report_error(compat_str(e), e.format_traceback())
1290 except ThrottledDownload:
1291 self.to_stderr('\r')
1292 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1293 return wrapper(self, *args, **kwargs)
1294 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1295 raise
1296 except Exception as e:
1297 if self.params.get('ignoreerrors'):
1298 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1299 else:
1300 raise
1301 return wrapper
1302
1303 @__handle_extraction_exceptions
1304 def __extract_info(self, url, ie, download, extra_info, process):
1305 ie_result = ie.extract(url)
1306 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1307 return
1308 if isinstance(ie_result, list):
1309 # Backwards compatibility: old IE result format
1310 ie_result = {
1311 '_type': 'compat_list',
1312 'entries': ie_result,
1313 }
1314 if extra_info.get('original_url'):
1315 ie_result.setdefault('original_url', extra_info['original_url'])
1316 self.add_default_extra_info(ie_result, ie, url)
1317 if process:
1318 return self.process_ie_result(ie_result, download, extra_info)
1319 else:
1320 return ie_result
1321
1322 def add_default_extra_info(self, ie_result, ie, url):
1323 if url is not None:
1324 self.add_extra_info(ie_result, {
1325 'webpage_url': url,
1326 'original_url': url,
1327 'webpage_url_basename': url_basename(url),
1328 })
1329 if ie is not None:
1330 self.add_extra_info(ie_result, {
1331 'extractor': ie.IE_NAME,
1332 'extractor_key': ie.ie_key(),
1333 })
1334
1335 def process_ie_result(self, ie_result, download=True, extra_info=None):
1336 """
1337 Take the result of the ie(may be modified) and resolve all unresolved
1338 references (URLs, playlist items).
1339
1340 It will also download the videos if 'download'.
1341 Returns the resolved ie_result.
1342 """
1343 if extra_info is None:
1344 extra_info = {}
1345 result_type = ie_result.get('_type', 'video')
1346
1347 if result_type in ('url', 'url_transparent'):
1348 ie_result['url'] = sanitize_url(ie_result['url'])
1349 if ie_result.get('original_url'):
1350 extra_info.setdefault('original_url', ie_result['original_url'])
1351
1352 extract_flat = self.params.get('extract_flat', False)
1353 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1354 or extract_flat is True):
1355 info_copy = ie_result.copy()
1356 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1357 if ie and not ie_result.get('id'):
1358 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1359 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1360 self.add_extra_info(info_copy, extra_info)
1361 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1362 if self.params.get('force_write_download_archive', False):
1363 self.record_download_archive(info_copy)
1364 return ie_result
1365
1366 if result_type == 'video':
1367 self.add_extra_info(ie_result, extra_info)
1368 ie_result = self.process_video_result(ie_result, download=download)
1369 additional_urls = (ie_result or {}).get('additional_urls')
1370 if additional_urls:
1371 # TODO: Improve MetadataParserPP to allow setting a list
1372 if isinstance(additional_urls, compat_str):
1373 additional_urls = [additional_urls]
1374 self.to_screen(
1375 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1376 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1377 ie_result['additional_entries'] = [
1378 self.extract_info(
1379 url, download, extra_info,
1380 force_generic_extractor=self.params.get('force_generic_extractor'))
1381 for url in additional_urls
1382 ]
1383 return ie_result
1384 elif result_type == 'url':
1385 # We have to add extra_info to the results because it may be
1386 # contained in a playlist
1387 return self.extract_info(
1388 ie_result['url'], download,
1389 ie_key=ie_result.get('ie_key'),
1390 extra_info=extra_info)
1391 elif result_type == 'url_transparent':
1392 # Use the information from the embedding page
1393 info = self.extract_info(
1394 ie_result['url'], ie_key=ie_result.get('ie_key'),
1395 extra_info=extra_info, download=False, process=False)
1396
1397 # extract_info may return None when ignoreerrors is enabled and
1398 # extraction failed with an error, don't crash and return early
1399 # in this case
1400 if not info:
1401 return info
1402
1403 force_properties = dict(
1404 (k, v) for k, v in ie_result.items() if v is not None)
1405 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1406 if f in force_properties:
1407 del force_properties[f]
1408 new_result = info.copy()
1409 new_result.update(force_properties)
1410
1411 # Extracted info may not be a video result (i.e.
1412 # info.get('_type', 'video') != video) but rather an url or
1413 # url_transparent. In such cases outer metadata (from ie_result)
1414 # should be propagated to inner one (info). For this to happen
1415 # _type of info should be overridden with url_transparent. This
1416 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1417 if new_result.get('_type') == 'url':
1418 new_result['_type'] = 'url_transparent'
1419
1420 return self.process_ie_result(
1421 new_result, download=download, extra_info=extra_info)
1422 elif result_type in ('playlist', 'multi_video'):
1423 # Protect from infinite recursion due to recursively nested playlists
1424 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1425 webpage_url = ie_result['webpage_url']
1426 if webpage_url in self._playlist_urls:
1427 self.to_screen(
1428 '[download] Skipping already downloaded playlist: %s'
1429 % ie_result.get('title') or ie_result.get('id'))
1430 return
1431
1432 self._playlist_level += 1
1433 self._playlist_urls.add(webpage_url)
1434 self._sanitize_thumbnails(ie_result)
1435 try:
1436 return self.__process_playlist(ie_result, download)
1437 finally:
1438 self._playlist_level -= 1
1439 if not self._playlist_level:
1440 self._playlist_urls.clear()
1441 elif result_type == 'compat_list':
1442 self.report_warning(
1443 'Extractor %s returned a compat_list result. '
1444 'It needs to be updated.' % ie_result.get('extractor'))
1445
1446 def _fixup(r):
1447 self.add_extra_info(r, {
1448 'extractor': ie_result['extractor'],
1449 'webpage_url': ie_result['webpage_url'],
1450 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1451 'extractor_key': ie_result['extractor_key'],
1452 })
1453 return r
1454 ie_result['entries'] = [
1455 self.process_ie_result(_fixup(r), download, extra_info)
1456 for r in ie_result['entries']
1457 ]
1458 return ie_result
1459 else:
1460 raise Exception('Invalid result type: %s' % result_type)
1461
1462 def _ensure_dir_exists(self, path):
1463 return make_dir(path, self.report_error)
1464
1465 def __process_playlist(self, ie_result, download):
1466 # We process each entry in the playlist
1467 playlist = ie_result.get('title') or ie_result.get('id')
1468 self.to_screen('[download] Downloading playlist: %s' % playlist)
1469
1470 if 'entries' not in ie_result:
1471 raise EntryNotInPlaylist()
1472 incomplete_entries = bool(ie_result.get('requested_entries'))
1473 if incomplete_entries:
1474 def fill_missing_entries(entries, indexes):
1475 ret = [None] * max(*indexes)
1476 for i, entry in zip(indexes, entries):
1477 ret[i - 1] = entry
1478 return ret
1479 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1480
1481 playlist_results = []
1482
1483 playliststart = self.params.get('playliststart', 1)
1484 playlistend = self.params.get('playlistend')
1485 # For backwards compatibility, interpret -1 as whole list
1486 if playlistend == -1:
1487 playlistend = None
1488
1489 playlistitems_str = self.params.get('playlist_items')
1490 playlistitems = None
1491 if playlistitems_str is not None:
1492 def iter_playlistitems(format):
1493 for string_segment in format.split(','):
1494 if '-' in string_segment:
1495 start, end = string_segment.split('-')
1496 for item in range(int(start), int(end) + 1):
1497 yield int(item)
1498 else:
1499 yield int(string_segment)
1500 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1501
1502 ie_entries = ie_result['entries']
1503 msg = (
1504 'Downloading %d videos' if not isinstance(ie_entries, list)
1505 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1506
1507 if isinstance(ie_entries, list):
1508 def get_entry(i):
1509 return ie_entries[i - 1]
1510 else:
1511 if not isinstance(ie_entries, PagedList):
1512 ie_entries = LazyList(ie_entries)
1513
1514 def get_entry(i):
1515 return YoutubeDL.__handle_extraction_exceptions(
1516 lambda self, i: ie_entries[i - 1]
1517 )(self, i)
1518
1519 entries = []
1520 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1521 for i in items:
1522 if i == 0:
1523 continue
1524 if playlistitems is None and playlistend is not None and playlistend < i:
1525 break
1526 entry = None
1527 try:
1528 entry = get_entry(i)
1529 if entry is None:
1530 raise EntryNotInPlaylist()
1531 except (IndexError, EntryNotInPlaylist):
1532 if incomplete_entries:
1533 raise EntryNotInPlaylist()
1534 elif not playlistitems:
1535 break
1536 entries.append(entry)
1537 try:
1538 if entry is not None:
1539 self._match_entry(entry, incomplete=True, silent=True)
1540 except (ExistingVideoReached, RejectedVideoReached):
1541 break
1542 ie_result['entries'] = entries
1543
1544 # Save playlist_index before re-ordering
1545 entries = [
1546 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1547 for i, entry in enumerate(entries, 1)
1548 if entry is not None]
1549 n_entries = len(entries)
1550
1551 if not playlistitems and (playliststart or playlistend):
1552 playlistitems = list(range(playliststart, playliststart + n_entries))
1553 ie_result['requested_entries'] = playlistitems
1554
1555 if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1556 ie_copy = {
1557 'playlist': playlist,
1558 'playlist_id': ie_result.get('id'),
1559 'playlist_title': ie_result.get('title'),
1560 'playlist_uploader': ie_result.get('uploader'),
1561 'playlist_uploader_id': ie_result.get('uploader_id'),
1562 'playlist_index': 0,
1563 'n_entries': n_entries,
1564 }
1565 ie_copy.update(dict(ie_result))
1566
1567 if self._write_info_json('playlist', ie_result,
1568 self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1569 return
1570 if self._write_description('playlist', ie_result,
1571 self.prepare_filename(ie_copy, 'pl_description')) is None:
1572 return
1573 # TODO: This should be passed to ThumbnailsConvertor if necessary
1574 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1575
1576 if self.params.get('playlistreverse', False):
1577 entries = entries[::-1]
1578 if self.params.get('playlistrandom', False):
1579 random.shuffle(entries)
1580
1581 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1582
1583 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1584 failures = 0
1585 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1586 for i, entry_tuple in enumerate(entries, 1):
1587 playlist_index, entry = entry_tuple
1588 if 'playlist-index' in self.params.get('compat_opts', []):
1589 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1590 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1591 # This __x_forwarded_for_ip thing is a bit ugly but requires
1592 # minimal changes
1593 if x_forwarded_for:
1594 entry['__x_forwarded_for_ip'] = x_forwarded_for
1595 extra = {
1596 'n_entries': n_entries,
1597 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1598 'playlist_index': playlist_index,
1599 'playlist_autonumber': i,
1600 'playlist': playlist,
1601 'playlist_id': ie_result.get('id'),
1602 'playlist_title': ie_result.get('title'),
1603 'playlist_uploader': ie_result.get('uploader'),
1604 'playlist_uploader_id': ie_result.get('uploader_id'),
1605 'extractor': ie_result['extractor'],
1606 'webpage_url': ie_result['webpage_url'],
1607 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1608 'extractor_key': ie_result['extractor_key'],
1609 }
1610
1611 if self._match_entry(entry, incomplete=True) is not None:
1612 continue
1613
1614 entry_result = self.__process_iterable_entry(entry, download, extra)
1615 if not entry_result:
1616 failures += 1
1617 if failures >= max_failures:
1618 self.report_error(
1619 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1620 break
1621 # TODO: skip failed (empty) entries?
1622 playlist_results.append(entry_result)
1623 ie_result['entries'] = playlist_results
1624 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1625 return ie_result
1626
1627 @__handle_extraction_exceptions
1628 def __process_iterable_entry(self, entry, download, extra_info):
1629 return self.process_ie_result(
1630 entry, download=download, extra_info=extra_info)
1631
1632 def _build_format_filter(self, filter_spec):
1633 " Returns a function to filter the formats according to the filter_spec "
1634
1635 OPERATORS = {
1636 '<': operator.lt,
1637 '<=': operator.le,
1638 '>': operator.gt,
1639 '>=': operator.ge,
1640 '=': operator.eq,
1641 '!=': operator.ne,
1642 }
1643 operator_rex = re.compile(r'''(?x)\s*
1644 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1645 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1646 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1647 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1648 m = operator_rex.fullmatch(filter_spec)
1649 if m:
1650 try:
1651 comparison_value = int(m.group('value'))
1652 except ValueError:
1653 comparison_value = parse_filesize(m.group('value'))
1654 if comparison_value is None:
1655 comparison_value = parse_filesize(m.group('value') + 'B')
1656 if comparison_value is None:
1657 raise ValueError(
1658 'Invalid value %r in format specification %r' % (
1659 m.group('value'), filter_spec))
1660 op = OPERATORS[m.group('op')]
1661
1662 if not m:
1663 STR_OPERATORS = {
1664 '=': operator.eq,
1665 '^=': lambda attr, value: attr.startswith(value),
1666 '$=': lambda attr, value: attr.endswith(value),
1667 '*=': lambda attr, value: value in attr,
1668 }
1669 str_operator_rex = re.compile(r'''(?x)\s*
1670 (?P<key>[a-zA-Z0-9._-]+)\s*
1671 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1672 (?P<value>[a-zA-Z0-9._-]+)\s*
1673 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1674 m = str_operator_rex.fullmatch(filter_spec)
1675 if m:
1676 comparison_value = m.group('value')
1677 str_op = STR_OPERATORS[m.group('op')]
1678 if m.group('negation'):
1679 op = lambda attr, value: not str_op(attr, value)
1680 else:
1681 op = str_op
1682
1683 if not m:
1684 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1685
1686 def _filter(f):
1687 actual_value = f.get(m.group('key'))
1688 if actual_value is None:
1689 return m.group('none_inclusive')
1690 return op(actual_value, comparison_value)
1691 return _filter
1692
1693 def _default_format_spec(self, info_dict, download=True):
1694
1695 def can_merge():
1696 merger = FFmpegMergerPP(self)
1697 return merger.available and merger.can_merge()
1698
1699 prefer_best = (
1700 not self.params.get('simulate')
1701 and download
1702 and (
1703 not can_merge()
1704 or info_dict.get('is_live', False)
1705 or self.outtmpl_dict['default'] == '-'))
1706 compat = (
1707 prefer_best
1708 or self.params.get('allow_multiple_audio_streams', False)
1709 or 'format-spec' in self.params.get('compat_opts', []))
1710
1711 return (
1712 'best/bestvideo+bestaudio' if prefer_best
1713 else 'bestvideo*+bestaudio/best' if not compat
1714 else 'bestvideo+bestaudio/best')
1715
1716 def build_format_selector(self, format_spec):
1717 def syntax_error(note, start):
1718 message = (
1719 'Invalid format specification: '
1720 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1721 return SyntaxError(message)
1722
1723 PICKFIRST = 'PICKFIRST'
1724 MERGE = 'MERGE'
1725 SINGLE = 'SINGLE'
1726 GROUP = 'GROUP'
1727 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1728
1729 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1730 'video': self.params.get('allow_multiple_video_streams', False)}
1731
1732 check_formats = self.params.get('check_formats')
1733
1734 def _parse_filter(tokens):
1735 filter_parts = []
1736 for type, string, start, _, _ in tokens:
1737 if type == tokenize.OP and string == ']':
1738 return ''.join(filter_parts)
1739 else:
1740 filter_parts.append(string)
1741
1742 def _remove_unused_ops(tokens):
1743 # Remove operators that we don't use and join them with the surrounding strings
1744 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1745 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1746 last_string, last_start, last_end, last_line = None, None, None, None
1747 for type, string, start, end, line in tokens:
1748 if type == tokenize.OP and string == '[':
1749 if last_string:
1750 yield tokenize.NAME, last_string, last_start, last_end, last_line
1751 last_string = None
1752 yield type, string, start, end, line
1753 # everything inside brackets will be handled by _parse_filter
1754 for type, string, start, end, line in tokens:
1755 yield type, string, start, end, line
1756 if type == tokenize.OP and string == ']':
1757 break
1758 elif type == tokenize.OP and string in ALLOWED_OPS:
1759 if last_string:
1760 yield tokenize.NAME, last_string, last_start, last_end, last_line
1761 last_string = None
1762 yield type, string, start, end, line
1763 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1764 if not last_string:
1765 last_string = string
1766 last_start = start
1767 last_end = end
1768 else:
1769 last_string += string
1770 if last_string:
1771 yield tokenize.NAME, last_string, last_start, last_end, last_line
1772
1773 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1774 selectors = []
1775 current_selector = None
1776 for type, string, start, _, _ in tokens:
1777 # ENCODING is only defined in python 3.x
1778 if type == getattr(tokenize, 'ENCODING', None):
1779 continue
1780 elif type in [tokenize.NAME, tokenize.NUMBER]:
1781 current_selector = FormatSelector(SINGLE, string, [])
1782 elif type == tokenize.OP:
1783 if string == ')':
1784 if not inside_group:
1785 # ')' will be handled by the parentheses group
1786 tokens.restore_last_token()
1787 break
1788 elif inside_merge and string in ['/', ',']:
1789 tokens.restore_last_token()
1790 break
1791 elif inside_choice and string == ',':
1792 tokens.restore_last_token()
1793 break
1794 elif string == ',':
1795 if not current_selector:
1796 raise syntax_error('"," must follow a format selector', start)
1797 selectors.append(current_selector)
1798 current_selector = None
1799 elif string == '/':
1800 if not current_selector:
1801 raise syntax_error('"/" must follow a format selector', start)
1802 first_choice = current_selector
1803 second_choice = _parse_format_selection(tokens, inside_choice=True)
1804 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1805 elif string == '[':
1806 if not current_selector:
1807 current_selector = FormatSelector(SINGLE, 'best', [])
1808 format_filter = _parse_filter(tokens)
1809 current_selector.filters.append(format_filter)
1810 elif string == '(':
1811 if current_selector:
1812 raise syntax_error('Unexpected "("', start)
1813 group = _parse_format_selection(tokens, inside_group=True)
1814 current_selector = FormatSelector(GROUP, group, [])
1815 elif string == '+':
1816 if not current_selector:
1817 raise syntax_error('Unexpected "+"', start)
1818 selector_1 = current_selector
1819 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1820 if not selector_2:
1821 raise syntax_error('Expected a selector', start)
1822 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1823 else:
1824 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1825 elif type == tokenize.ENDMARKER:
1826 break
1827 if current_selector:
1828 selectors.append(current_selector)
1829 return selectors
1830
1831 def _merge(formats_pair):
1832 format_1, format_2 = formats_pair
1833
1834 formats_info = []
1835 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1836 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1837
1838 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1839 get_no_more = {'video': False, 'audio': False}
1840 for (i, fmt_info) in enumerate(formats_info):
1841 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1842 formats_info.pop(i)
1843 continue
1844 for aud_vid in ['audio', 'video']:
1845 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1846 if get_no_more[aud_vid]:
1847 formats_info.pop(i)
1848 break
1849 get_no_more[aud_vid] = True
1850
1851 if len(formats_info) == 1:
1852 return formats_info[0]
1853
1854 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1855 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1856
1857 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1858 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1859
1860 output_ext = self.params.get('merge_output_format')
1861 if not output_ext:
1862 if the_only_video:
1863 output_ext = the_only_video['ext']
1864 elif the_only_audio and not video_fmts:
1865 output_ext = the_only_audio['ext']
1866 else:
1867 output_ext = 'mkv'
1868
1869 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1870
1871 new_dict = {
1872 'requested_formats': formats_info,
1873 'format': '+'.join(filtered('format')),
1874 'format_id': '+'.join(filtered('format_id')),
1875 'ext': output_ext,
1876 'protocol': '+'.join(map(determine_protocol, formats_info)),
1877 'language': '+'.join(orderedSet(filtered('language'))),
1878 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1879 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1880 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1881 }
1882
1883 if the_only_video:
1884 new_dict.update({
1885 'width': the_only_video.get('width'),
1886 'height': the_only_video.get('height'),
1887 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1888 'fps': the_only_video.get('fps'),
1889 'dynamic_range': the_only_video.get('dynamic_range'),
1890 'vcodec': the_only_video.get('vcodec'),
1891 'vbr': the_only_video.get('vbr'),
1892 'stretched_ratio': the_only_video.get('stretched_ratio'),
1893 })
1894
1895 if the_only_audio:
1896 new_dict.update({
1897 'acodec': the_only_audio.get('acodec'),
1898 'abr': the_only_audio.get('abr'),
1899 'asr': the_only_audio.get('asr'),
1900 })
1901
1902 return new_dict
1903
1904 def _check_formats(formats):
1905 if not check_formats:
1906 yield from formats
1907 return
1908 for f in formats:
1909 self.to_screen('[info] Testing format %s' % f['format_id'])
1910 temp_file = tempfile.NamedTemporaryFile(
1911 suffix='.tmp', delete=False,
1912 dir=self.get_output_path('temp') or None)
1913 temp_file.close()
1914 try:
1915 success, _ = self.dl(temp_file.name, f, test=True)
1916 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1917 success = False
1918 finally:
1919 if os.path.exists(temp_file.name):
1920 try:
1921 os.remove(temp_file.name)
1922 except OSError:
1923 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1924 if success:
1925 yield f
1926 else:
1927 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1928
1929 def _build_selector_function(selector):
1930 if isinstance(selector, list): # ,
1931 fs = [_build_selector_function(s) for s in selector]
1932
1933 def selector_function(ctx):
1934 for f in fs:
1935 yield from f(ctx)
1936 return selector_function
1937
1938 elif selector.type == GROUP: # ()
1939 selector_function = _build_selector_function(selector.selector)
1940
1941 elif selector.type == PICKFIRST: # /
1942 fs = [_build_selector_function(s) for s in selector.selector]
1943
1944 def selector_function(ctx):
1945 for f in fs:
1946 picked_formats = list(f(ctx))
1947 if picked_formats:
1948 return picked_formats
1949 return []
1950
1951 elif selector.type == MERGE: # +
1952 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1953
1954 def selector_function(ctx):
1955 for pair in itertools.product(
1956 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1957 yield _merge(pair)
1958
1959 elif selector.type == SINGLE: # atom
1960 format_spec = selector.selector or 'best'
1961
1962 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1963 if format_spec == 'all':
1964 def selector_function(ctx):
1965 yield from _check_formats(ctx['formats'])
1966 elif format_spec == 'mergeall':
1967 def selector_function(ctx):
1968 formats = list(_check_formats(ctx['formats']))
1969 if not formats:
1970 return
1971 merged_format = formats[-1]
1972 for f in formats[-2::-1]:
1973 merged_format = _merge((merged_format, f))
1974 yield merged_format
1975
1976 else:
1977 format_fallback, format_reverse, format_idx = False, True, 1
1978 mobj = re.match(
1979 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1980 format_spec)
1981 if mobj is not None:
1982 format_idx = int_or_none(mobj.group('n'), default=1)
1983 format_reverse = mobj.group('bw')[0] == 'b'
1984 format_type = (mobj.group('type') or [None])[0]
1985 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1986 format_modified = mobj.group('mod') is not None
1987
1988 format_fallback = not format_type and not format_modified # for b, w
1989 _filter_f = (
1990 (lambda f: f.get('%scodec' % format_type) != 'none')
1991 if format_type and format_modified # bv*, ba*, wv*, wa*
1992 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1993 if format_type # bv, ba, wv, wa
1994 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1995 if not format_modified # b, w
1996 else lambda f: True) # b*, w*
1997 filter_f = lambda f: _filter_f(f) and (
1998 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1999 else:
2000 if format_spec in self._format_selection_exts['audio']:
2001 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2002 elif format_spec in self._format_selection_exts['video']:
2003 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2004 elif format_spec in self._format_selection_exts['storyboards']:
2005 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2006 else:
2007 filter_f = lambda f: f.get('format_id') == format_spec # id
2008
2009 def selector_function(ctx):
2010 formats = list(ctx['formats'])
2011 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2012 if format_fallback and ctx['incomplete_formats'] and not matches:
2013 # for extractors with incomplete formats (audio only (soundcloud)
2014 # or video only (imgur)) best/worst will fallback to
2015 # best/worst {video,audio}-only format
2016 matches = formats
2017 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2018 try:
2019 yield matches[format_idx - 1]
2020 except IndexError:
2021 return
2022
2023 filters = [self._build_format_filter(f) for f in selector.filters]
2024
2025 def final_selector(ctx):
2026 ctx_copy = copy.deepcopy(ctx)
2027 for _filter in filters:
2028 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2029 return selector_function(ctx_copy)
2030 return final_selector
2031
2032 stream = io.BytesIO(format_spec.encode('utf-8'))
2033 try:
2034 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2035 except tokenize.TokenError:
2036 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2037
2038 class TokenIterator(object):
2039 def __init__(self, tokens):
2040 self.tokens = tokens
2041 self.counter = 0
2042
2043 def __iter__(self):
2044 return self
2045
2046 def __next__(self):
2047 if self.counter >= len(self.tokens):
2048 raise StopIteration()
2049 value = self.tokens[self.counter]
2050 self.counter += 1
2051 return value
2052
2053 next = __next__
2054
2055 def restore_last_token(self):
2056 self.counter -= 1
2057
2058 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2059 return _build_selector_function(parsed_selector)
2060
2061 def _calc_headers(self, info_dict):
2062 res = std_headers.copy()
2063
2064 add_headers = info_dict.get('http_headers')
2065 if add_headers:
2066 res.update(add_headers)
2067
2068 cookies = self._calc_cookies(info_dict)
2069 if cookies:
2070 res['Cookie'] = cookies
2071
2072 if 'X-Forwarded-For' not in res:
2073 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2074 if x_forwarded_for_ip:
2075 res['X-Forwarded-For'] = x_forwarded_for_ip
2076
2077 return res
2078
2079 def _calc_cookies(self, info_dict):
2080 pr = sanitized_Request(info_dict['url'])
2081 self.cookiejar.add_cookie_header(pr)
2082 return pr.get_header('Cookie')
2083
2084 def _sanitize_thumbnails(self, info_dict):
2085 thumbnails = info_dict.get('thumbnails')
2086 if thumbnails is None:
2087 thumbnail = info_dict.get('thumbnail')
2088 if thumbnail:
2089 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2090 if thumbnails:
2091 thumbnails.sort(key=lambda t: (
2092 t.get('preference') if t.get('preference') is not None else -1,
2093 t.get('width') if t.get('width') is not None else -1,
2094 t.get('height') if t.get('height') is not None else -1,
2095 t.get('id') if t.get('id') is not None else '',
2096 t.get('url')))
2097
2098 def thumbnail_tester():
2099 def test_thumbnail(t):
2100 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2101 try:
2102 self.urlopen(HEADRequest(t['url']))
2103 except network_exceptions as err:
2104 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2105 return False
2106 return True
2107 return test_thumbnail
2108
2109 for i, t in enumerate(thumbnails):
2110 if t.get('id') is None:
2111 t['id'] = '%d' % i
2112 if t.get('width') and t.get('height'):
2113 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2114 t['url'] = sanitize_url(t['url'])
2115
2116 if self.params.get('check_formats'):
2117 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2118 else:
2119 info_dict['thumbnails'] = thumbnails
2120
2121 def process_video_result(self, info_dict, download=True):
2122 assert info_dict.get('_type', 'video') == 'video'
2123
2124 if 'id' not in info_dict:
2125 raise ExtractorError('Missing "id" field in extractor result')
2126 if 'title' not in info_dict:
2127 raise ExtractorError('Missing "title" field in extractor result',
2128 video_id=info_dict['id'], ie=info_dict['extractor'])
2129
2130 def report_force_conversion(field, field_not, conversion):
2131 self.report_warning(
2132 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2133 % (field, field_not, conversion))
2134
2135 def sanitize_string_field(info, string_field):
2136 field = info.get(string_field)
2137 if field is None or isinstance(field, compat_str):
2138 return
2139 report_force_conversion(string_field, 'a string', 'string')
2140 info[string_field] = compat_str(field)
2141
2142 def sanitize_numeric_fields(info):
2143 for numeric_field in self._NUMERIC_FIELDS:
2144 field = info.get(numeric_field)
2145 if field is None or isinstance(field, compat_numeric_types):
2146 continue
2147 report_force_conversion(numeric_field, 'numeric', 'int')
2148 info[numeric_field] = int_or_none(field)
2149
2150 sanitize_string_field(info_dict, 'id')
2151 sanitize_numeric_fields(info_dict)
2152
2153 if 'playlist' not in info_dict:
2154 # It isn't part of a playlist
2155 info_dict['playlist'] = None
2156 info_dict['playlist_index'] = None
2157
2158 self._sanitize_thumbnails(info_dict)
2159
2160 thumbnail = info_dict.get('thumbnail')
2161 thumbnails = info_dict.get('thumbnails')
2162 if thumbnail:
2163 info_dict['thumbnail'] = sanitize_url(thumbnail)
2164 elif thumbnails:
2165 info_dict['thumbnail'] = thumbnails[-1]['url']
2166
2167 if info_dict.get('display_id') is None and 'id' in info_dict:
2168 info_dict['display_id'] = info_dict['id']
2169
2170 if info_dict.get('duration') is not None:
2171 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2172
2173 for ts_key, date_key in (
2174 ('timestamp', 'upload_date'),
2175 ('release_timestamp', 'release_date'),
2176 ):
2177 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2178 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2179 # see http://bugs.python.org/issue1646728)
2180 try:
2181 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2182 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2183 except (ValueError, OverflowError, OSError):
2184 pass
2185
2186 live_keys = ('is_live', 'was_live')
2187 live_status = info_dict.get('live_status')
2188 if live_status is None:
2189 for key in live_keys:
2190 if info_dict.get(key) is False:
2191 continue
2192 if info_dict.get(key):
2193 live_status = key
2194 break
2195 if all(info_dict.get(key) is False for key in live_keys):
2196 live_status = 'not_live'
2197 if live_status:
2198 info_dict['live_status'] = live_status
2199 for key in live_keys:
2200 if info_dict.get(key) is None:
2201 info_dict[key] = (live_status == key)
2202
2203 # Auto generate title fields corresponding to the *_number fields when missing
2204 # in order to always have clean titles. This is very common for TV series.
2205 for field in ('chapter', 'season', 'episode'):
2206 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2207 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2208
2209 for cc_kind in ('subtitles', 'automatic_captions'):
2210 cc = info_dict.get(cc_kind)
2211 if cc:
2212 for _, subtitle in cc.items():
2213 for subtitle_format in subtitle:
2214 if subtitle_format.get('url'):
2215 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2216 if subtitle_format.get('ext') is None:
2217 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2218
2219 automatic_captions = info_dict.get('automatic_captions')
2220 subtitles = info_dict.get('subtitles')
2221
2222 info_dict['requested_subtitles'] = self.process_subtitles(
2223 info_dict['id'], subtitles, automatic_captions)
2224
2225 # We now pick which formats have to be downloaded
2226 if info_dict.get('formats') is None:
2227 # There's only one format available
2228 formats = [info_dict]
2229 else:
2230 formats = info_dict['formats']
2231
2232 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2233 if not self.params.get('allow_unplayable_formats'):
2234 formats = [f for f in formats if not f.get('has_drm')]
2235
2236 if not formats:
2237 self.raise_no_formats(info_dict)
2238
2239 def is_wellformed(f):
2240 url = f.get('url')
2241 if not url:
2242 self.report_warning(
2243 '"url" field is missing or empty - skipping format, '
2244 'there is an error in extractor')
2245 return False
2246 if isinstance(url, bytes):
2247 sanitize_string_field(f, 'url')
2248 return True
2249
2250 # Filter out malformed formats for better extraction robustness
2251 formats = list(filter(is_wellformed, formats))
2252
2253 formats_dict = {}
2254
2255 # We check that all the formats have the format and format_id fields
2256 for i, format in enumerate(formats):
2257 sanitize_string_field(format, 'format_id')
2258 sanitize_numeric_fields(format)
2259 format['url'] = sanitize_url(format['url'])
2260 if not format.get('format_id'):
2261 format['format_id'] = compat_str(i)
2262 else:
2263 # Sanitize format_id from characters used in format selector expression
2264 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2265 format_id = format['format_id']
2266 if format_id not in formats_dict:
2267 formats_dict[format_id] = []
2268 formats_dict[format_id].append(format)
2269
2270 # Make sure all formats have unique format_id
2271 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2272 for format_id, ambiguous_formats in formats_dict.items():
2273 ambigious_id = len(ambiguous_formats) > 1
2274 for i, format in enumerate(ambiguous_formats):
2275 if ambigious_id:
2276 format['format_id'] = '%s-%d' % (format_id, i)
2277 if format.get('ext') is None:
2278 format['ext'] = determine_ext(format['url']).lower()
2279 # Ensure there is no conflict between id and ext in format selection
2280 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2281 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2282 format['format_id'] = 'f%s' % format['format_id']
2283
2284 for i, format in enumerate(formats):
2285 if format.get('format') is None:
2286 format['format'] = '{id} - {res}{note}'.format(
2287 id=format['format_id'],
2288 res=self.format_resolution(format),
2289 note=format_field(format, 'format_note', ' (%s)'),
2290 )
2291 if format.get('protocol') is None:
2292 format['protocol'] = determine_protocol(format)
2293 if format.get('resolution') is None:
2294 format['resolution'] = self.format_resolution(format, default=None)
2295 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2296 format['dynamic_range'] = 'SDR'
2297 # Add HTTP headers, so that external programs can use them from the
2298 # json output
2299 full_format_info = info_dict.copy()
2300 full_format_info.update(format)
2301 format['http_headers'] = self._calc_headers(full_format_info)
2302 # Remove private housekeeping stuff
2303 if '__x_forwarded_for_ip' in info_dict:
2304 del info_dict['__x_forwarded_for_ip']
2305
2306 # TODO Central sorting goes here
2307
2308 if not formats or formats[0] is not info_dict:
2309 # only set the 'formats' fields if the original info_dict list them
2310 # otherwise we end up with a circular reference, the first (and unique)
2311 # element in the 'formats' field in info_dict is info_dict itself,
2312 # which can't be exported to json
2313 info_dict['formats'] = formats
2314
2315 info_dict, _ = self.pre_process(info_dict)
2316
2317 if self.params.get('list_thumbnails'):
2318 self.list_thumbnails(info_dict)
2319 if self.params.get('listformats'):
2320 if not info_dict.get('formats') and not info_dict.get('url'):
2321 self.to_screen('%s has no formats' % info_dict['id'])
2322 else:
2323 self.list_formats(info_dict)
2324 if self.params.get('listsubtitles'):
2325 if 'automatic_captions' in info_dict:
2326 self.list_subtitles(
2327 info_dict['id'], automatic_captions, 'automatic captions')
2328 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2329 list_only = self.params.get('simulate') is None and (
2330 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2331 if list_only:
2332 # Without this printing, -F --print-json will not work
2333 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2334 return
2335
2336 format_selector = self.format_selector
2337 if format_selector is None:
2338 req_format = self._default_format_spec(info_dict, download=download)
2339 self.write_debug('Default format spec: %s' % req_format)
2340 format_selector = self.build_format_selector(req_format)
2341
2342 # While in format selection we may need to have an access to the original
2343 # format set in order to calculate some metrics or do some processing.
2344 # For now we need to be able to guess whether original formats provided
2345 # by extractor are incomplete or not (i.e. whether extractor provides only
2346 # video-only or audio-only formats) for proper formats selection for
2347 # extractors with such incomplete formats (see
2348 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2349 # Since formats may be filtered during format selection and may not match
2350 # the original formats the results may be incorrect. Thus original formats
2351 # or pre-calculated metrics should be passed to format selection routines
2352 # as well.
2353 # We will pass a context object containing all necessary additional data
2354 # instead of just formats.
2355 # This fixes incorrect format selection issue (see
2356 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2357 incomplete_formats = (
2358 # All formats are video-only or
2359 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2360 # all formats are audio-only
2361 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2362
2363 ctx = {
2364 'formats': formats,
2365 'incomplete_formats': incomplete_formats,
2366 }
2367
2368 formats_to_download = list(format_selector(ctx))
2369 if not formats_to_download:
2370 if not self.params.get('ignore_no_formats_error'):
2371 raise ExtractorError('Requested format is not available', expected=True,
2372 video_id=info_dict['id'], ie=info_dict['extractor'])
2373 else:
2374 self.report_warning('Requested format is not available')
2375 # Process what we can, even without any available formats.
2376 self.process_info(dict(info_dict))
2377 elif download:
2378 self.to_screen(
2379 '[info] %s: Downloading %d format(s): %s' % (
2380 info_dict['id'], len(formats_to_download),
2381 ", ".join([f['format_id'] for f in formats_to_download])))
2382 for fmt in formats_to_download:
2383 new_info = dict(info_dict)
2384 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2385 new_info['__original_infodict'] = info_dict
2386 new_info.update(fmt)
2387 self.process_info(new_info)
2388 # We update the info dict with the selected best quality format (backwards compatibility)
2389 if formats_to_download:
2390 info_dict.update(formats_to_download[-1])
2391 return info_dict
2392
2393 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2394 """Select the requested subtitles and their format"""
2395 available_subs = {}
2396 if normal_subtitles and self.params.get('writesubtitles'):
2397 available_subs.update(normal_subtitles)
2398 if automatic_captions and self.params.get('writeautomaticsub'):
2399 for lang, cap_info in automatic_captions.items():
2400 if lang not in available_subs:
2401 available_subs[lang] = cap_info
2402
2403 if (not self.params.get('writesubtitles') and not
2404 self.params.get('writeautomaticsub') or not
2405 available_subs):
2406 return None
2407
2408 all_sub_langs = available_subs.keys()
2409 if self.params.get('allsubtitles', False):
2410 requested_langs = all_sub_langs
2411 elif self.params.get('subtitleslangs', False):
2412 # A list is used so that the order of languages will be the same as
2413 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2414 requested_langs = []
2415 for lang_re in self.params.get('subtitleslangs'):
2416 if lang_re == 'all':
2417 requested_langs.extend(all_sub_langs)
2418 continue
2419 discard = lang_re[0] == '-'
2420 if discard:
2421 lang_re = lang_re[1:]
2422 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2423 if discard:
2424 for lang in current_langs:
2425 while lang in requested_langs:
2426 requested_langs.remove(lang)
2427 else:
2428 requested_langs.extend(current_langs)
2429 requested_langs = orderedSet(requested_langs)
2430 elif 'en' in available_subs:
2431 requested_langs = ['en']
2432 else:
2433 requested_langs = [list(all_sub_langs)[0]]
2434 if requested_langs:
2435 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2436
2437 formats_query = self.params.get('subtitlesformat', 'best')
2438 formats_preference = formats_query.split('/') if formats_query else []
2439 subs = {}
2440 for lang in requested_langs:
2441 formats = available_subs.get(lang)
2442 if formats is None:
2443 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2444 continue
2445 for ext in formats_preference:
2446 if ext == 'best':
2447 f = formats[-1]
2448 break
2449 matches = list(filter(lambda f: f['ext'] == ext, formats))
2450 if matches:
2451 f = matches[-1]
2452 break
2453 else:
2454 f = formats[-1]
2455 self.report_warning(
2456 'No subtitle format found matching "%s" for language %s, '
2457 'using %s' % (formats_query, lang, f['ext']))
2458 subs[lang] = f
2459 return subs
2460
2461 def __forced_printings(self, info_dict, filename, incomplete):
2462 def print_mandatory(field, actual_field=None):
2463 if actual_field is None:
2464 actual_field = field
2465 if (self.params.get('force%s' % field, False)
2466 and (not incomplete or info_dict.get(actual_field) is not None)):
2467 self.to_stdout(info_dict[actual_field])
2468
2469 def print_optional(field):
2470 if (self.params.get('force%s' % field, False)
2471 and info_dict.get(field) is not None):
2472 self.to_stdout(info_dict[field])
2473
2474 info_dict = info_dict.copy()
2475 if filename is not None:
2476 info_dict['filename'] = filename
2477 if info_dict.get('requested_formats') is not None:
2478 # For RTMP URLs, also include the playpath
2479 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2480 elif 'url' in info_dict:
2481 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2482
2483 if self.params.get('forceprint') or self.params.get('forcejson'):
2484 self.post_extract(info_dict)
2485 for tmpl in self.params.get('forceprint', []):
2486 mobj = re.match(r'\w+(=?)$', tmpl)
2487 if mobj and mobj.group(1):
2488 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2489 elif mobj:
2490 tmpl = '%({})s'.format(tmpl)
2491 self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2492
2493 print_mandatory('title')
2494 print_mandatory('id')
2495 print_mandatory('url', 'urls')
2496 print_optional('thumbnail')
2497 print_optional('description')
2498 print_optional('filename')
2499 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2500 self.to_stdout(formatSeconds(info_dict['duration']))
2501 print_mandatory('format')
2502
2503 if self.params.get('forcejson'):
2504 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2505
2506 def dl(self, name, info, subtitle=False, test=False):
2507 if not info.get('url'):
2508 self.raise_no_formats(info, True)
2509
2510 if test:
2511 verbose = self.params.get('verbose')
2512 params = {
2513 'test': True,
2514 'quiet': self.params.get('quiet') or not verbose,
2515 'verbose': verbose,
2516 'noprogress': not verbose,
2517 'nopart': True,
2518 'skip_unavailable_fragments': False,
2519 'keep_fragments': False,
2520 'overwrites': True,
2521 '_no_ytdl_file': True,
2522 }
2523 else:
2524 params = self.params
2525 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2526 if not test:
2527 for ph in self._progress_hooks:
2528 fd.add_progress_hook(ph)
2529 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2530 self.write_debug('Invoking downloader on "%s"' % urls)
2531
2532 new_info = copy.deepcopy(self._copy_infodict(info))
2533 if new_info.get('http_headers') is None:
2534 new_info['http_headers'] = self._calc_headers(new_info)
2535 return fd.download(name, new_info, subtitle)
2536
2537 def process_info(self, info_dict):
2538 """Process a single resolved IE result."""
2539
2540 assert info_dict.get('_type', 'video') == 'video'
2541
2542 max_downloads = self.params.get('max_downloads')
2543 if max_downloads is not None:
2544 if self._num_downloads >= int(max_downloads):
2545 raise MaxDownloadsReached()
2546
2547 # TODO: backward compatibility, to be removed
2548 info_dict['fulltitle'] = info_dict['title']
2549
2550 if 'format' not in info_dict and 'ext' in info_dict:
2551 info_dict['format'] = info_dict['ext']
2552
2553 if self._match_entry(info_dict) is not None:
2554 return
2555
2556 self.post_extract(info_dict)
2557 self._num_downloads += 1
2558
2559 # info_dict['_filename'] needs to be set for backward compatibility
2560 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2561 temp_filename = self.prepare_filename(info_dict, 'temp')
2562 files_to_move = {}
2563
2564 # Forced printings
2565 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2566
2567 if self.params.get('simulate'):
2568 if self.params.get('force_write_download_archive', False):
2569 self.record_download_archive(info_dict)
2570 # Do nothing else if in simulate mode
2571 return
2572
2573 if full_filename is None:
2574 return
2575 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2576 return
2577 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2578 return
2579
2580 if self._write_description('video', info_dict,
2581 self.prepare_filename(info_dict, 'description')) is None:
2582 return
2583
2584 sub_files = self._write_subtitles(info_dict, temp_filename)
2585 if sub_files is None:
2586 return
2587 files_to_move.update(dict(sub_files))
2588
2589 thumb_files = self._write_thumbnails(
2590 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2591 if thumb_files is None:
2592 return
2593 files_to_move.update(dict(thumb_files))
2594
2595 infofn = self.prepare_filename(info_dict, 'infojson')
2596 _infojson_written = self._write_info_json('video', info_dict, infofn)
2597 if _infojson_written:
2598 info_dict['__infojson_filename'] = infofn
2599 elif _infojson_written is None:
2600 return
2601
2602 # Note: Annotations are deprecated
2603 annofn = None
2604 if self.params.get('writeannotations', False):
2605 annofn = self.prepare_filename(info_dict, 'annotation')
2606 if annofn:
2607 if not self._ensure_dir_exists(encodeFilename(annofn)):
2608 return
2609 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2610 self.to_screen('[info] Video annotations are already present')
2611 elif not info_dict.get('annotations'):
2612 self.report_warning('There are no annotations to write.')
2613 else:
2614 try:
2615 self.to_screen('[info] Writing video annotations to: ' + annofn)
2616 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2617 annofile.write(info_dict['annotations'])
2618 except (KeyError, TypeError):
2619 self.report_warning('There are no annotations to write.')
2620 except (OSError, IOError):
2621 self.report_error('Cannot write annotations file: ' + annofn)
2622 return
2623
2624 # Write internet shortcut files
2625 url_link = webloc_link = desktop_link = False
2626 if self.params.get('writelink', False):
2627 if sys.platform == "darwin": # macOS.
2628 webloc_link = True
2629 elif sys.platform.startswith("linux"):
2630 desktop_link = True
2631 else: # if sys.platform in ['win32', 'cygwin']:
2632 url_link = True
2633 if self.params.get('writeurllink', False):
2634 url_link = True
2635 if self.params.get('writewebloclink', False):
2636 webloc_link = True
2637 if self.params.get('writedesktoplink', False):
2638 desktop_link = True
2639
2640 if url_link or webloc_link or desktop_link:
2641 if 'webpage_url' not in info_dict:
2642 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2643 return
2644 ascii_url = iri_to_uri(info_dict['webpage_url'])
2645
2646 def _write_link_file(extension, template, newline, embed_filename):
2647 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2648 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2649 self.to_screen('[info] Internet shortcut is already present')
2650 else:
2651 try:
2652 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2653 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2654 template_vars = {'url': ascii_url}
2655 if embed_filename:
2656 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2657 linkfile.write(template % template_vars)
2658 except (OSError, IOError):
2659 self.report_error('Cannot write internet shortcut ' + linkfn)
2660 return False
2661 return True
2662
2663 if url_link:
2664 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2665 return
2666 if webloc_link:
2667 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2668 return
2669 if desktop_link:
2670 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2671 return
2672
2673 try:
2674 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2675 except PostProcessingError as err:
2676 self.report_error('Preprocessing: %s' % str(err))
2677 return
2678
2679 must_record_download_archive = False
2680 if self.params.get('skip_download', False):
2681 info_dict['filepath'] = temp_filename
2682 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2683 info_dict['__files_to_move'] = files_to_move
2684 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2685 else:
2686 # Download
2687 info_dict.setdefault('__postprocessors', [])
2688 try:
2689
2690 def existing_file(*filepaths):
2691 ext = info_dict.get('ext')
2692 final_ext = self.params.get('final_ext', ext)
2693 existing_files = []
2694 for file in orderedSet(filepaths):
2695 if final_ext != ext:
2696 converted = replace_extension(file, final_ext, ext)
2697 if os.path.exists(encodeFilename(converted)):
2698 existing_files.append(converted)
2699 if os.path.exists(encodeFilename(file)):
2700 existing_files.append(file)
2701
2702 if not existing_files or self.params.get('overwrites', False):
2703 for file in orderedSet(existing_files):
2704 self.report_file_delete(file)
2705 os.remove(encodeFilename(file))
2706 return None
2707
2708 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2709 return existing_files[0]
2710
2711 success = True
2712 if info_dict.get('requested_formats') is not None:
2713
2714 def compatible_formats(formats):
2715 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2716 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2717 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2718 if len(video_formats) > 2 or len(audio_formats) > 2:
2719 return False
2720
2721 # Check extension
2722 exts = set(format.get('ext') for format in formats)
2723 COMPATIBLE_EXTS = (
2724 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2725 set(('webm',)),
2726 )
2727 for ext_sets in COMPATIBLE_EXTS:
2728 if ext_sets.issuperset(exts):
2729 return True
2730 # TODO: Check acodec/vcodec
2731 return False
2732
2733 requested_formats = info_dict['requested_formats']
2734 old_ext = info_dict['ext']
2735 if self.params.get('merge_output_format') is None:
2736 if not compatible_formats(requested_formats):
2737 info_dict['ext'] = 'mkv'
2738 self.report_warning(
2739 'Requested formats are incompatible for merge and will be merged into mkv')
2740 if (info_dict['ext'] == 'webm'
2741 and info_dict.get('thumbnails')
2742 # check with type instead of pp_key, __name__, or isinstance
2743 # since we dont want any custom PPs to trigger this
2744 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2745 info_dict['ext'] = 'mkv'
2746 self.report_warning(
2747 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2748 new_ext = info_dict['ext']
2749
2750 def correct_ext(filename, ext=new_ext):
2751 if filename == '-':
2752 return filename
2753 filename_real_ext = os.path.splitext(filename)[1][1:]
2754 filename_wo_ext = (
2755 os.path.splitext(filename)[0]
2756 if filename_real_ext in (old_ext, new_ext)
2757 else filename)
2758 return '%s.%s' % (filename_wo_ext, ext)
2759
2760 # Ensure filename always has a correct extension for successful merge
2761 full_filename = correct_ext(full_filename)
2762 temp_filename = correct_ext(temp_filename)
2763 dl_filename = existing_file(full_filename, temp_filename)
2764 info_dict['__real_download'] = False
2765
2766 if dl_filename is not None:
2767 self.report_file_already_downloaded(dl_filename)
2768 elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2769 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2770 success, real_download = self.dl(temp_filename, info_dict)
2771 info_dict['__real_download'] = real_download
2772 else:
2773 downloaded = []
2774 merger = FFmpegMergerPP(self)
2775 if self.params.get('allow_unplayable_formats'):
2776 self.report_warning(
2777 'You have requested merging of multiple formats '
2778 'while also allowing unplayable formats to be downloaded. '
2779 'The formats won\'t be merged to prevent data corruption.')
2780 elif not merger.available:
2781 self.report_warning(
2782 'You have requested merging of multiple formats but ffmpeg is not installed. '
2783 'The formats won\'t be merged.')
2784
2785 if temp_filename == '-':
2786 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2787 else 'but the formats are incompatible for simultaneous download' if merger.available
2788 else 'but ffmpeg is not installed')
2789 self.report_warning(
2790 f'You have requested downloading multiple formats to stdout {reason}. '
2791 'The formats will be streamed one after the other')
2792 fname = temp_filename
2793 for f in requested_formats:
2794 new_info = dict(info_dict)
2795 del new_info['requested_formats']
2796 new_info.update(f)
2797 if temp_filename != '-':
2798 fname = prepend_extension(
2799 correct_ext(temp_filename, new_info['ext']),
2800 'f%s' % f['format_id'], new_info['ext'])
2801 if not self._ensure_dir_exists(fname):
2802 return
2803 f['filepath'] = fname
2804 downloaded.append(fname)
2805 partial_success, real_download = self.dl(fname, new_info)
2806 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2807 success = success and partial_success
2808 if merger.available and not self.params.get('allow_unplayable_formats'):
2809 info_dict['__postprocessors'].append(merger)
2810 info_dict['__files_to_merge'] = downloaded
2811 # Even if there were no downloads, it is being merged only now
2812 info_dict['__real_download'] = True
2813 else:
2814 for file in downloaded:
2815 files_to_move[file] = None
2816 else:
2817 # Just a single file
2818 dl_filename = existing_file(full_filename, temp_filename)
2819 if dl_filename is None or dl_filename == temp_filename:
2820 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2821 # So we should try to resume the download
2822 success, real_download = self.dl(temp_filename, info_dict)
2823 info_dict['__real_download'] = real_download
2824 else:
2825 self.report_file_already_downloaded(dl_filename)
2826
2827 dl_filename = dl_filename or temp_filename
2828 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2829
2830 except network_exceptions as err:
2831 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2832 return
2833 except (OSError, IOError) as err:
2834 raise UnavailableVideoError(err)
2835 except (ContentTooShortError, ) as err:
2836 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2837 return
2838
2839 if success and full_filename != '-':
2840
2841 def fixup():
2842 do_fixup = True
2843 fixup_policy = self.params.get('fixup')
2844 vid = info_dict['id']
2845
2846 if fixup_policy in ('ignore', 'never'):
2847 return
2848 elif fixup_policy == 'warn':
2849 do_fixup = False
2850 elif fixup_policy != 'force':
2851 assert fixup_policy in ('detect_or_warn', None)
2852 if not info_dict.get('__real_download'):
2853 do_fixup = False
2854
2855 def ffmpeg_fixup(cndn, msg, cls):
2856 if not cndn:
2857 return
2858 if not do_fixup:
2859 self.report_warning(f'{vid}: {msg}')
2860 return
2861 pp = cls(self)
2862 if pp.available:
2863 info_dict['__postprocessors'].append(pp)
2864 else:
2865 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2866
2867 stretched_ratio = info_dict.get('stretched_ratio')
2868 ffmpeg_fixup(
2869 stretched_ratio not in (1, None),
2870 f'Non-uniform pixel ratio {stretched_ratio}',
2871 FFmpegFixupStretchedPP)
2872
2873 ffmpeg_fixup(
2874 (info_dict.get('requested_formats') is None
2875 and info_dict.get('container') == 'm4a_dash'
2876 and info_dict.get('ext') == 'm4a'),
2877 'writing DASH m4a. Only some players support this container',
2878 FFmpegFixupM4aPP)
2879
2880 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2881 downloader = downloader.__name__ if downloader else None
2882 ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2883 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2884 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2885 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2886
2887 fixup()
2888 try:
2889 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2890 except PostProcessingError as err:
2891 self.report_error('Postprocessing: %s' % str(err))
2892 return
2893 try:
2894 for ph in self._post_hooks:
2895 ph(info_dict['filepath'])
2896 except Exception as err:
2897 self.report_error('post hooks: %s' % str(err))
2898 return
2899 must_record_download_archive = True
2900
2901 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2902 self.record_download_archive(info_dict)
2903 max_downloads = self.params.get('max_downloads')
2904 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2905 raise MaxDownloadsReached()
2906
2907 def download(self, url_list):
2908 """Download a given list of URLs."""
2909 outtmpl = self.outtmpl_dict['default']
2910 if (len(url_list) > 1
2911 and outtmpl != '-'
2912 and '%' not in outtmpl
2913 and self.params.get('max_downloads') != 1):
2914 raise SameFileError(outtmpl)
2915
2916 for url in url_list:
2917 try:
2918 # It also downloads the videos
2919 res = self.extract_info(
2920 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2921 except UnavailableVideoError:
2922 self.report_error('unable to download video')
2923 except MaxDownloadsReached:
2924 self.to_screen('[info] Maximum number of downloads reached')
2925 raise
2926 except ExistingVideoReached:
2927 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2928 raise
2929 except RejectedVideoReached:
2930 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2931 raise
2932 else:
2933 if self.params.get('dump_single_json', False):
2934 self.post_extract(res)
2935 self.to_stdout(json.dumps(self.sanitize_info(res)))
2936
2937 return self._download_retcode
2938
2939 def download_with_info_file(self, info_filename):
2940 with contextlib.closing(fileinput.FileInput(
2941 [info_filename], mode='r',
2942 openhook=fileinput.hook_encoded('utf-8'))) as f:
2943 # FileInput doesn't have a read method, we can't call json.load
2944 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2945 try:
2946 self.process_ie_result(info, download=True)
2947 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2948 webpage_url = info.get('webpage_url')
2949 if webpage_url is not None:
2950 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2951 return self.download([webpage_url])
2952 else:
2953 raise
2954 return self._download_retcode
2955
2956 @staticmethod
2957 def sanitize_info(info_dict, remove_private_keys=False):
2958 ''' Sanitize the infodict for converting to json '''
2959 if info_dict is None:
2960 return info_dict
2961 info_dict.setdefault('epoch', int(time.time()))
2962 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
2963 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2964 if remove_private_keys:
2965 remove_keys |= {
2966 'requested_formats', 'requested_subtitles', 'requested_entries',
2967 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2968 }
2969 empty_values = (None, {}, [], set(), tuple())
2970 reject = lambda k, v: k not in keep_keys and (
2971 k.startswith('_') or k in remove_keys or v in empty_values)
2972 else:
2973 reject = lambda k, v: k in remove_keys
2974 filter_fn = lambda obj: (
2975 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2976 else obj if not isinstance(obj, dict)
2977 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2978 return filter_fn(info_dict)
2979
2980 @staticmethod
2981 def filter_requested_info(info_dict, actually_filter=True):
2982 ''' Alias of sanitize_info for backward compatibility '''
2983 return YoutubeDL.sanitize_info(info_dict, actually_filter)
2984
2985 def run_pp(self, pp, infodict):
2986 files_to_delete = []
2987 if '__files_to_move' not in infodict:
2988 infodict['__files_to_move'] = {}
2989 try:
2990 files_to_delete, infodict = pp.run(infodict)
2991 except PostProcessingError as e:
2992 # Must be True and not 'only_download'
2993 if self.params.get('ignoreerrors') is True:
2994 self.report_error(e)
2995 return infodict
2996 raise
2997
2998 if not files_to_delete:
2999 return infodict
3000 if self.params.get('keepvideo', False):
3001 for f in files_to_delete:
3002 infodict['__files_to_move'].setdefault(f, '')
3003 else:
3004 for old_filename in set(files_to_delete):
3005 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3006 try:
3007 os.remove(encodeFilename(old_filename))
3008 except (IOError, OSError):
3009 self.report_warning('Unable to remove downloaded original file')
3010 if old_filename in infodict['__files_to_move']:
3011 del infodict['__files_to_move'][old_filename]
3012 return infodict
3013
3014 @staticmethod
3015 def post_extract(info_dict):
3016 def actual_post_extract(info_dict):
3017 if info_dict.get('_type') in ('playlist', 'multi_video'):
3018 for video_dict in info_dict.get('entries', {}):
3019 actual_post_extract(video_dict or {})
3020 return
3021
3022 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3023 extra = post_extractor().items()
3024 info_dict.update(extra)
3025 info_dict.pop('__post_extractor', None)
3026
3027 original_infodict = info_dict.get('__original_infodict') or {}
3028 original_infodict.update(extra)
3029 original_infodict.pop('__post_extractor', None)
3030
3031 actual_post_extract(info_dict or {})
3032
3033 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3034 info = dict(ie_info)
3035 info['__files_to_move'] = files_to_move or {}
3036 for pp in self._pps[key]:
3037 info = self.run_pp(pp, info)
3038 return info, info.pop('__files_to_move', None)
3039
3040 def post_process(self, filename, ie_info, files_to_move=None):
3041 """Run all the postprocessors on the given file."""
3042 info = dict(ie_info)
3043 info['filepath'] = filename
3044 info['__files_to_move'] = files_to_move or {}
3045
3046 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3047 info = self.run_pp(pp, info)
3048 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3049 del info['__files_to_move']
3050 for pp in self._pps['after_move']:
3051 info = self.run_pp(pp, info)
3052 return info
3053
3054 def _make_archive_id(self, info_dict):
3055 video_id = info_dict.get('id')
3056 if not video_id:
3057 return
3058 # Future-proof against any change in case
3059 # and backwards compatibility with prior versions
3060 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3061 if extractor is None:
3062 url = str_or_none(info_dict.get('url'))
3063 if not url:
3064 return
3065 # Try to find matching extractor for the URL and take its ie_key
3066 for ie_key, ie in self._ies.items():
3067 if ie.suitable(url):
3068 extractor = ie_key
3069 break
3070 else:
3071 return
3072 return '%s %s' % (extractor.lower(), video_id)
3073
3074 def in_download_archive(self, info_dict):
3075 fn = self.params.get('download_archive')
3076 if fn is None:
3077 return False
3078
3079 vid_id = self._make_archive_id(info_dict)
3080 if not vid_id:
3081 return False # Incomplete video information
3082
3083 return vid_id in self.archive
3084
3085 def record_download_archive(self, info_dict):
3086 fn = self.params.get('download_archive')
3087 if fn is None:
3088 return
3089 vid_id = self._make_archive_id(info_dict)
3090 assert vid_id
3091 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3092 archive_file.write(vid_id + '\n')
3093 self.archive.add(vid_id)
3094
3095 @staticmethod
3096 def format_resolution(format, default='unknown'):
3097 is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3098 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3099 return 'audio only'
3100 if format.get('resolution') is not None:
3101 return format['resolution']
3102 if format.get('width') and format.get('height'):
3103 res = '%dx%d' % (format['width'], format['height'])
3104 elif format.get('height'):
3105 res = '%sp' % format['height']
3106 elif format.get('width'):
3107 res = '%dx?' % format['width']
3108 elif is_images:
3109 return 'images'
3110 else:
3111 return default
3112 return f'{res} images' if is_images else res
3113
3114 def _format_note(self, fdict):
3115 res = ''
3116 if fdict.get('ext') in ['f4f', 'f4m']:
3117 res += '(unsupported) '
3118 if fdict.get('language'):
3119 if res:
3120 res += ' '
3121 res += '[%s] ' % fdict['language']
3122 if fdict.get('format_note') is not None:
3123 res += fdict['format_note'] + ' '
3124 if fdict.get('tbr') is not None:
3125 res += '%4dk ' % fdict['tbr']
3126 if fdict.get('container') is not None:
3127 if res:
3128 res += ', '
3129 res += '%s container' % fdict['container']
3130 if (fdict.get('vcodec') is not None
3131 and fdict.get('vcodec') != 'none'):
3132 if res:
3133 res += ', '
3134 res += fdict['vcodec']
3135 if fdict.get('vbr') is not None:
3136 res += '@'
3137 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3138 res += 'video@'
3139 if fdict.get('vbr') is not None:
3140 res += '%4dk' % fdict['vbr']
3141 if fdict.get('fps') is not None:
3142 if res:
3143 res += ', '
3144 res += '%sfps' % fdict['fps']
3145 if fdict.get('acodec') is not None:
3146 if res:
3147 res += ', '
3148 if fdict['acodec'] == 'none':
3149 res += 'video only'
3150 else:
3151 res += '%-5s' % fdict['acodec']
3152 elif fdict.get('abr') is not None:
3153 if res:
3154 res += ', '
3155 res += 'audio'
3156 if fdict.get('abr') is not None:
3157 res += '@%3dk' % fdict['abr']
3158 if fdict.get('asr') is not None:
3159 res += ' (%5dHz)' % fdict['asr']
3160 if fdict.get('filesize') is not None:
3161 if res:
3162 res += ', '
3163 res += format_bytes(fdict['filesize'])
3164 elif fdict.get('filesize_approx') is not None:
3165 if res:
3166 res += ', '
3167 res += '~' + format_bytes(fdict['filesize_approx'])
3168 return res
3169
3170 def list_formats(self, info_dict):
3171 formats = info_dict.get('formats', [info_dict])
3172 new_format = (
3173 'list-formats' not in self.params.get('compat_opts', [])
3174 and self.params.get('listformats_table', True) is not False)
3175 if new_format:
3176 table = [
3177 [
3178 format_field(f, 'format_id'),
3179 format_field(f, 'ext'),
3180 self.format_resolution(f),
3181 format_field(f, 'fps', '%d'),
3182 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3183 '|',
3184 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3185 format_field(f, 'tbr', '%4dk'),
3186 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3187 '|',
3188 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3189 format_field(f, 'vbr', '%4dk'),
3190 format_field(f, 'acodec', default='unknown').replace('none', ''),
3191 format_field(f, 'abr', '%3dk'),
3192 format_field(f, 'asr', '%5dHz'),
3193 ', '.join(filter(None, (
3194 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3195 format_field(f, 'language', '[%s]'),
3196 format_field(f, 'format_note'),
3197 format_field(f, 'container', ignore=(None, f.get('ext'))),
3198 ))),
3199 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3200 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO',
3201 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3202 else:
3203 table = [
3204 [
3205 format_field(f, 'format_id'),
3206 format_field(f, 'ext'),
3207 self.format_resolution(f),
3208 self._format_note(f)]
3209 for f in formats
3210 if f.get('preference') is None or f['preference'] >= -1000]
3211 header_line = ['format code', 'extension', 'resolution', 'note']
3212
3213 self.to_screen(
3214 '[info] Available formats for %s:' % info_dict['id'])
3215 self.to_stdout(render_table(
3216 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3217
3218 def list_thumbnails(self, info_dict):
3219 thumbnails = list(info_dict.get('thumbnails'))
3220 if not thumbnails:
3221 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3222 return
3223
3224 self.to_screen(
3225 '[info] Thumbnails for %s:' % info_dict['id'])
3226 self.to_stdout(render_table(
3227 ['ID', 'width', 'height', 'URL'],
3228 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3229
3230 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3231 if not subtitles:
3232 self.to_screen('%s has no %s' % (video_id, name))
3233 return
3234 self.to_screen(
3235 'Available %s for %s:' % (name, video_id))
3236
3237 def _row(lang, formats):
3238 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3239 if len(set(names)) == 1:
3240 names = [] if names[0] == 'unknown' else names[:1]
3241 return [lang, ', '.join(names), ', '.join(exts)]
3242
3243 self.to_stdout(render_table(
3244 ['Language', 'Name', 'Formats'],
3245 [_row(lang, formats) for lang, formats in subtitles.items()],
3246 hideEmpty=True))
3247
3248 def urlopen(self, req):
3249 """ Start an HTTP download """
3250 if isinstance(req, compat_basestring):
3251 req = sanitized_Request(req)
3252 return self._opener.open(req, timeout=self._socket_timeout)
3253
3254 def print_debug_header(self):
3255 if not self.params.get('verbose'):
3256 return
3257
3258 def get_encoding(stream):
3259 ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3260 if not supports_terminal_sequences(stream):
3261 ret += ' (No ANSI)'
3262 return ret
3263
3264 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3265 locale.getpreferredencoding(),
3266 sys.getfilesystemencoding(),
3267 get_encoding(self._screen_file), get_encoding(self._err_file),
3268 self.get_encoding())
3269
3270 logger = self.params.get('logger')
3271 if logger:
3272 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3273 write_debug(encoding_str)
3274 else:
3275 write_string(f'[debug] {encoding_str}', encoding=None)
3276 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3277
3278 source = detect_variant()
3279 write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3280 if not _LAZY_LOADER:
3281 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3282 write_debug('Lazy loading extractors is forcibly disabled')
3283 else:
3284 write_debug('Lazy loading extractors is disabled')
3285 if plugin_extractors or plugin_postprocessors:
3286 write_debug('Plugins: %s' % [
3287 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3288 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3289 if self.params.get('compat_opts'):
3290 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3291 try:
3292 sp = Popen(
3293 ['git', 'rev-parse', '--short', 'HEAD'],
3294 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3295 cwd=os.path.dirname(os.path.abspath(__file__)))
3296 out, err = sp.communicate_or_kill()
3297 out = out.decode().strip()
3298 if re.match('[0-9a-f]+', out):
3299 write_debug('Git HEAD: %s' % out)
3300 except Exception:
3301 try:
3302 sys.exc_clear()
3303 except Exception:
3304 pass
3305
3306 def python_implementation():
3307 impl_name = platform.python_implementation()
3308 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3309 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3310 return impl_name
3311
3312 write_debug('Python version %s (%s %s) - %s' % (
3313 platform.python_version(),
3314 python_implementation(),
3315 platform.architecture()[0],
3316 platform_name()))
3317
3318 exe_versions = FFmpegPostProcessor.get_versions(self)
3319 exe_versions['rtmpdump'] = rtmpdump_version()
3320 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3321 exe_str = ', '.join(
3322 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3323 ) or 'none'
3324 write_debug('exe versions: %s' % exe_str)
3325
3326 from .downloader.websocket import has_websockets
3327 from .postprocessor.embedthumbnail import has_mutagen
3328 from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3329
3330 lib_str = ', '.join(sorted(filter(None, (
3331 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3332 has_websockets and 'websockets',
3333 has_mutagen and 'mutagen',
3334 SQLITE_AVAILABLE and 'sqlite',
3335 KEYRING_AVAILABLE and 'keyring',
3336 )))) or 'none'
3337 write_debug('Optional libraries: %s' % lib_str)
3338
3339 proxy_map = {}
3340 for handler in self._opener.handlers:
3341 if hasattr(handler, 'proxies'):
3342 proxy_map.update(handler.proxies)
3343 write_debug(f'Proxy map: {proxy_map}')
3344
3345 # Not implemented
3346 if False and self.params.get('call_home'):
3347 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3348 write_debug('Public IP address: %s' % ipaddr)
3349 latest_version = self.urlopen(
3350 'https://yt-dl.org/latest/version').read().decode('utf-8')
3351 if version_tuple(latest_version) > version_tuple(__version__):
3352 self.report_warning(
3353 'You are using an outdated version (newest version: %s)! '
3354 'See https://yt-dl.org/update if you need help updating.' %
3355 latest_version)
3356
3357 def _setup_opener(self):
3358 timeout_val = self.params.get('socket_timeout')
3359 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3360
3361 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3362 opts_cookiefile = self.params.get('cookiefile')
3363 opts_proxy = self.params.get('proxy')
3364
3365 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3366
3367 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3368 if opts_proxy is not None:
3369 if opts_proxy == '':
3370 proxies = {}
3371 else:
3372 proxies = {'http': opts_proxy, 'https': opts_proxy}
3373 else:
3374 proxies = compat_urllib_request.getproxies()
3375 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3376 if 'http' in proxies and 'https' not in proxies:
3377 proxies['https'] = proxies['http']
3378 proxy_handler = PerRequestProxyHandler(proxies)
3379
3380 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3381 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3382 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3383 redirect_handler = YoutubeDLRedirectHandler()
3384 data_handler = compat_urllib_request_DataHandler()
3385
3386 # When passing our own FileHandler instance, build_opener won't add the
3387 # default FileHandler and allows us to disable the file protocol, which
3388 # can be used for malicious purposes (see
3389 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3390 file_handler = compat_urllib_request.FileHandler()
3391
3392 def file_open(*args, **kwargs):
3393 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3394 file_handler.file_open = file_open
3395
3396 opener = compat_urllib_request.build_opener(
3397 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3398
3399 # Delete the default user-agent header, which would otherwise apply in
3400 # cases where our custom HTTP handler doesn't come into play
3401 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3402 opener.addheaders = []
3403 self._opener = opener
3404
3405 def encode(self, s):
3406 if isinstance(s, bytes):
3407 return s # Already encoded
3408
3409 try:
3410 return s.encode(self.get_encoding())
3411 except UnicodeEncodeError as err:
3412 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3413 raise
3414
3415 def get_encoding(self):
3416 encoding = self.params.get('encoding')
3417 if encoding is None:
3418 encoding = preferredencoding()
3419 return encoding
3420
3421 def _write_info_json(self, label, ie_result, infofn):
3422 ''' Write infojson and returns True = written, False = skip, None = error '''
3423 if not self.params.get('writeinfojson'):
3424 return False
3425 elif not infofn:
3426 self.write_debug(f'Skipping writing {label} infojson')
3427 return False
3428 elif not self._ensure_dir_exists(infofn):
3429 return None
3430 elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3431 self.to_screen(f'[info] {label.title()} metadata is already present')
3432 else:
3433 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3434 try:
3435 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3436 except (OSError, IOError):
3437 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3438 return None
3439 return True
3440
3441 def _write_description(self, label, ie_result, descfn):
3442 ''' Write description and returns True = written, False = skip, None = error '''
3443 if not self.params.get('writedescription'):
3444 return False
3445 elif not descfn:
3446 self.write_debug(f'Skipping writing {label} description')
3447 return False
3448 elif not self._ensure_dir_exists(descfn):
3449 return None
3450 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3451 self.to_screen(f'[info] {label.title()} description is already present')
3452 elif ie_result.get('description') is None:
3453 self.report_warning(f'There\'s no {label} description to write')
3454 return False
3455 else:
3456 try:
3457 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3458 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3459 descfile.write(ie_result['description'])
3460 except (OSError, IOError):
3461 self.report_error(f'Cannot write {label} description file {descfn}')
3462 return None
3463 return True
3464
3465 def _write_subtitles(self, info_dict, filename):
3466 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3467 ret = []
3468 subtitles = info_dict.get('requested_subtitles')
3469 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3470 # subtitles download errors are already managed as troubles in relevant IE
3471 # that way it will silently go on when used with unsupporting IE
3472 return ret
3473
3474 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3475 if not sub_filename_base:
3476 self.to_screen('[info] Skipping writing video subtitles')
3477 return ret
3478 for sub_lang, sub_info in subtitles.items():
3479 sub_format = sub_info['ext']
3480 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3481 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3482 if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3483 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3484 sub_info['filepath'] = sub_filename
3485 ret.append((sub_filename, sub_filename_final))
3486 continue
3487
3488 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3489 if sub_info.get('data') is not None:
3490 try:
3491 # Use newline='' to prevent conversion of newline characters
3492 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3493 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3494 subfile.write(sub_info['data'])
3495 sub_info['filepath'] = sub_filename
3496 ret.append((sub_filename, sub_filename_final))
3497 continue
3498 except (OSError, IOError):
3499 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3500 return None
3501
3502 try:
3503 sub_copy = sub_info.copy()
3504 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3505 self.dl(sub_filename, sub_copy, subtitle=True)
3506 sub_info['filepath'] = sub_filename
3507 ret.append((sub_filename, sub_filename_final))
3508 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3509 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3510 continue
3511 return ret
3512
3513 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3514 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3515 write_all = self.params.get('write_all_thumbnails', False)
3516 thumbnails, ret = [], []
3517 if write_all or self.params.get('writethumbnail', False):
3518 thumbnails = info_dict.get('thumbnails') or []
3519 multiple = write_all and len(thumbnails) > 1
3520
3521 if thumb_filename_base is None:
3522 thumb_filename_base = filename
3523 if thumbnails and not thumb_filename_base:
3524 self.write_debug(f'Skipping writing {label} thumbnail')
3525 return ret
3526
3527 for t in thumbnails[::-1]:
3528 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3529 thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3530 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3531 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3532
3533 if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3534 ret.append((thumb_filename, thumb_filename_final))
3535 t['filepath'] = thumb_filename
3536 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3537 else:
3538 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3539 try:
3540 uf = self.urlopen(t['url'])
3541 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3542 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3543 shutil.copyfileobj(uf, thumbf)
3544 ret.append((thumb_filename, thumb_filename_final))
3545 t['filepath'] = thumb_filename
3546 except network_exceptions as err:
3547 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3548 if ret and not write_all:
3549 break
3550 return ret